xref: /dragonfly/sys/bus/pci/pci.c (revision f116de0a)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include "pcib_if.h"
63 #include "pci_if.h"
64 
65 #ifdef __HAVE_ACPI
66 #include <contrib/dev/acpica/acpi.h>
67 #include "acpi_if.h"
68 #else
69 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
70 #endif
71 
72 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_child_detached(device_t, device_t);
90 static void		pci_load_vendor_data(void);
91 static int		pci_describe_parse_line(char **ptr, int *vendor,
92 			    int *device, char **desc);
93 static char		*pci_describe_device(device_t dev);
94 static int		pci_modevent(module_t mod, int what, void *arg);
95 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
96 			    pcicfgregs *cfg);
97 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
98 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
99 			    int reg, uint32_t *data);
100 #if 0
101 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t data);
103 #endif
104 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
105 static void		pci_disable_msi(device_t dev);
106 static void		pci_enable_msi(device_t dev, uint64_t address,
107 			    uint16_t data);
108 static void		pci_setup_msix_vector(device_t dev, u_int index,
109 			    uint64_t address, uint32_t data);
110 static void		pci_mask_msix_vector(device_t dev, u_int index);
111 static void		pci_unmask_msix_vector(device_t dev, u_int index);
112 static void		pci_mask_msix_allvectors(device_t dev);
113 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
114 static int		pci_msi_blacklisted(void);
115 static void		pci_resume_msi(device_t dev);
116 static void		pci_resume_msix(device_t dev);
117 static int		pcie_slotimpl(const pcicfgregs *);
118 static void		pci_print_verbose_expr(const pcicfgregs *);
119 
120 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
121 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
122 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_subvendor(device_t, int, int,
126 			    pcicfgregs *);
127 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
129 
130 static device_method_t pci_methods[] = {
131 	/* Device interface */
132 	DEVMETHOD(device_probe,		pci_probe),
133 	DEVMETHOD(device_attach,	pci_attach),
134 	DEVMETHOD(device_detach,	bus_generic_detach),
135 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
136 	DEVMETHOD(device_suspend,	pci_suspend),
137 	DEVMETHOD(device_resume,	pci_resume),
138 
139 	/* Bus interface */
140 	DEVMETHOD(bus_print_child,	pci_print_child),
141 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
142 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
143 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
144 	DEVMETHOD(bus_driver_added,	pci_driver_added),
145 	DEVMETHOD(bus_child_detached,	pci_child_detached),
146 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
147 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
148 
149 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
150 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
151 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
152 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
153 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
154 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
155 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
156 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
157 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
158 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
159 
160 	/* PCI interface */
161 	DEVMETHOD(pci_read_config,	pci_read_config_method),
162 	DEVMETHOD(pci_write_config,	pci_write_config_method),
163 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
164 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
165 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
166 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
167 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
168 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
169 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
170 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
171 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
172 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
173 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
174 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
175 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
176 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
177 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
178 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
179 
180 	DEVMETHOD_END
181 };
182 
183 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
184 
185 static devclass_t pci_devclass;
186 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
187 MODULE_VERSION(pci, 1);
188 
189 static char	*pci_vendordata;
190 static size_t	pci_vendordata_size;
191 
192 
193 static const struct pci_read_cap {
194 	int		cap;
195 	pci_read_cap_t	read_cap;
196 } pci_read_caps[] = {
197 	{ PCIY_PMG,		pci_read_cap_pmgt },
198 	{ PCIY_HT,		pci_read_cap_ht },
199 	{ PCIY_MSI,		pci_read_cap_msi },
200 	{ PCIY_MSIX,		pci_read_cap_msix },
201 	{ PCIY_VPD,		pci_read_cap_vpd },
202 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
203 	{ PCIY_PCIX,		pci_read_cap_pcix },
204 	{ PCIY_EXPRESS,		pci_read_cap_express },
205 	{ 0, NULL } /* required last entry */
206 };
207 
208 struct pci_quirk {
209 	uint32_t devid;	/* Vendor/device of the card */
210 	int	type;
211 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
212 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
213 	int	arg1;
214 	int	arg2;
215 };
216 
217 struct pci_quirk pci_quirks[] = {
218 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
219 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
222 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 
224 	/*
225 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
226 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
227 	 */
228 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 
231 	/*
232 	 * MSI doesn't work on earlier Intel chipsets including
233 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
234 	 */
235 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 
243 	/*
244 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
245 	 * bridge.
246 	 */
247 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 
249 	{ 0 }
250 };
251 
252 /* map register information */
253 #define	PCI_MAPMEM	0x01	/* memory map */
254 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
255 #define	PCI_MAPPORT	0x04	/* port map */
256 
257 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
258 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
259 
260 struct devlist pci_devq;
261 uint32_t pci_generation;
262 uint32_t pci_numdevs = 0;
263 static int pcie_chipset, pcix_chipset;
264 
265 /* sysctl vars */
266 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
267 
268 static int pci_enable_io_modes = 1;
269 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
270 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
271     &pci_enable_io_modes, 1,
272     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
273 enable these bits correctly.  We'd like to do this all the time, but there\n\
274 are some peripherals that this causes problems with.");
275 
276 static int pci_do_power_nodriver = 0;
277 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
278 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
279     &pci_do_power_nodriver, 0,
280   "Place a function into D3 state when no driver attaches to it.  0 means\n\
281 disable.  1 means conservatively place devices into D3 state.  2 means\n\
282 aggressively place devices into D3 state.  3 means put absolutely everything\n\
283 in D3 state.");
284 
285 static int pci_do_power_resume = 1;
286 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
287 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
288     &pci_do_power_resume, 1,
289   "Transition from D3 -> D0 on resume.");
290 
291 static int pci_do_msi = 1;
292 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
293 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
294     "Enable support for MSI interrupts");
295 
296 static int pci_do_msix = 1;
297 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
298 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
299     "Enable support for MSI-X interrupts");
300 
301 static int pci_honor_msi_blacklist = 1;
302 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
303 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
304     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
305 
306 static int pci_msi_cpuid;
307 
308 /* Find a device_t by bus/slot/function in domain 0 */
309 
310 device_t
311 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
312 {
313 
314 	return (pci_find_dbsf(0, bus, slot, func));
315 }
316 
317 /* Find a device_t by domain/bus/slot/function */
318 
319 device_t
320 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
321 {
322 	struct pci_devinfo *dinfo;
323 
324 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
325 		if ((dinfo->cfg.domain == domain) &&
326 		    (dinfo->cfg.bus == bus) &&
327 		    (dinfo->cfg.slot == slot) &&
328 		    (dinfo->cfg.func == func)) {
329 			return (dinfo->cfg.dev);
330 		}
331 	}
332 
333 	return (NULL);
334 }
335 
336 /* Find a device_t by vendor/device ID */
337 
338 device_t
339 pci_find_device(uint16_t vendor, uint16_t device)
340 {
341 	struct pci_devinfo *dinfo;
342 
343 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
344 		if ((dinfo->cfg.vendor == vendor) &&
345 		    (dinfo->cfg.device == device)) {
346 			return (dinfo->cfg.dev);
347 		}
348 	}
349 
350 	return (NULL);
351 }
352 
353 device_t
354 pci_find_class(uint8_t class, uint8_t subclass)
355 {
356 	struct pci_devinfo *dinfo;
357 
358 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
359 		if (dinfo->cfg.baseclass == class &&
360 		    dinfo->cfg.subclass == subclass) {
361 			return (dinfo->cfg.dev);
362 		}
363 	}
364 
365 	return (NULL);
366 }
367 
368 /* return base address of memory or port map */
369 
370 static uint32_t
371 pci_mapbase(uint32_t mapreg)
372 {
373 
374 	if (PCI_BAR_MEM(mapreg))
375 		return (mapreg & PCIM_BAR_MEM_BASE);
376 	else
377 		return (mapreg & PCIM_BAR_IO_BASE);
378 }
379 
380 /* return map type of memory or port map */
381 
382 static const char *
383 pci_maptype(unsigned mapreg)
384 {
385 
386 	if (PCI_BAR_IO(mapreg))
387 		return ("I/O Port");
388 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
389 		return ("Prefetchable Memory");
390 	return ("Memory");
391 }
392 
393 /* return log2 of map size decoded for memory or port map */
394 
395 static int
396 pci_mapsize(uint32_t testval)
397 {
398 	int ln2size;
399 
400 	testval = pci_mapbase(testval);
401 	ln2size = 0;
402 	if (testval != 0) {
403 		while ((testval & 1) == 0)
404 		{
405 			ln2size++;
406 			testval >>= 1;
407 		}
408 	}
409 	return (ln2size);
410 }
411 
412 /* return log2 of address range supported by map register */
413 
414 static int
415 pci_maprange(unsigned mapreg)
416 {
417 	int ln2range = 0;
418 
419 	if (PCI_BAR_IO(mapreg))
420 		ln2range = 32;
421 	else
422 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
423 		case PCIM_BAR_MEM_32:
424 			ln2range = 32;
425 			break;
426 		case PCIM_BAR_MEM_1MB:
427 			ln2range = 20;
428 			break;
429 		case PCIM_BAR_MEM_64:
430 			ln2range = 64;
431 			break;
432 		}
433 	return (ln2range);
434 }
435 
436 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
437 
438 static void
439 pci_fixancient(pcicfgregs *cfg)
440 {
441 	if (cfg->hdrtype != 0)
442 		return;
443 
444 	/* PCI to PCI bridges use header type 1 */
445 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
446 		cfg->hdrtype = 1;
447 }
448 
449 /* extract header type specific config data */
450 
451 static void
452 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
453 {
454 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
455 	switch (cfg->hdrtype) {
456 	case 0:
457 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
458 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
459 		cfg->nummaps	    = PCI_MAXMAPS_0;
460 		break;
461 	case 1:
462 		cfg->nummaps	    = PCI_MAXMAPS_1;
463 #ifdef COMPAT_OLDPCI
464 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
465 #endif
466 		break;
467 	case 2:
468 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
469 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
470 		cfg->nummaps	    = PCI_MAXMAPS_2;
471 #ifdef COMPAT_OLDPCI
472 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
473 #endif
474 		break;
475 	}
476 #undef REG
477 }
478 
479 /* read configuration header into pcicfgregs structure */
480 struct pci_devinfo *
481 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
482 {
483 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
484 	pcicfgregs *cfg = NULL;
485 	struct pci_devinfo *devlist_entry;
486 	struct devlist *devlist_head;
487 
488 	devlist_head = &pci_devq;
489 
490 	devlist_entry = NULL;
491 
492 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
493 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
494 
495 		cfg = &devlist_entry->cfg;
496 
497 		cfg->domain		= d;
498 		cfg->bus		= b;
499 		cfg->slot		= s;
500 		cfg->func		= f;
501 		cfg->vendor		= REG(PCIR_VENDOR, 2);
502 		cfg->device		= REG(PCIR_DEVICE, 2);
503 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
504 		cfg->statreg		= REG(PCIR_STATUS, 2);
505 		cfg->baseclass		= REG(PCIR_CLASS, 1);
506 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
507 		cfg->progif		= REG(PCIR_PROGIF, 1);
508 		cfg->revid		= REG(PCIR_REVID, 1);
509 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
510 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
511 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
512 		cfg->intpin		= REG(PCIR_INTPIN, 1);
513 		cfg->intline		= REG(PCIR_INTLINE, 1);
514 
515 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
516 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
517 
518 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
519 		cfg->hdrtype		&= ~PCIM_MFDEV;
520 
521 		pci_fixancient(cfg);
522 		pci_hdrtypedata(pcib, b, s, f, cfg);
523 
524 		pci_read_capabilities(pcib, cfg);
525 
526 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
527 
528 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
529 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
530 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
531 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
532 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
533 
534 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
535 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
536 		devlist_entry->conf.pc_vendor = cfg->vendor;
537 		devlist_entry->conf.pc_device = cfg->device;
538 
539 		devlist_entry->conf.pc_class = cfg->baseclass;
540 		devlist_entry->conf.pc_subclass = cfg->subclass;
541 		devlist_entry->conf.pc_progif = cfg->progif;
542 		devlist_entry->conf.pc_revid = cfg->revid;
543 
544 		pci_numdevs++;
545 		pci_generation++;
546 	}
547 	return (devlist_entry);
548 #undef REG
549 }
550 
551 static int
552 pci_fixup_nextptr(int *nextptr0)
553 {
554 	int nextptr = *nextptr0;
555 
556 	/* "Next pointer" is only one byte */
557 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
558 
559 	if (nextptr & 0x3) {
560 		/*
561 		 * PCI local bus spec 3.0:
562 		 *
563 		 * "... The bottom two bits of all pointers are reserved
564 		 *  and must be implemented as 00b although software must
565 		 *  mask them to allow for future uses of these bits ..."
566 		 */
567 		if (bootverbose) {
568 			kprintf("Illegal PCI extended capability "
569 				"offset, fixup 0x%02x -> 0x%02x\n",
570 				nextptr, nextptr & ~0x3);
571 		}
572 		nextptr &= ~0x3;
573 	}
574 	*nextptr0 = nextptr;
575 
576 	if (nextptr < 0x40) {
577 		if (nextptr != 0) {
578 			kprintf("Illegal PCI extended capability "
579 				"offset 0x%02x", nextptr);
580 		}
581 		return 0;
582 	}
583 	return 1;
584 }
585 
586 static void
587 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
588 {
589 #define REG(n, w)	\
590 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
591 
592 	struct pcicfg_pp *pp = &cfg->pp;
593 
594 	if (pp->pp_cap)
595 		return;
596 
597 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
598 	pp->pp_status = ptr + PCIR_POWER_STATUS;
599 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
600 
601 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
602 		/*
603 		 * XXX
604 		 * We should write to data_select and read back from
605 		 * data_scale to determine whether data register is
606 		 * implemented.
607 		 */
608 #ifdef foo
609 		pp->pp_data = ptr + PCIR_POWER_DATA;
610 #else
611 		pp->pp_data = 0;
612 #endif
613 	}
614 
615 #undef REG
616 }
617 
618 static void
619 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
620 {
621 #if defined(__i386__) || defined(__x86_64__)
622 
623 #define REG(n, w)	\
624 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
625 
626 	struct pcicfg_ht *ht = &cfg->ht;
627 	uint64_t addr;
628 	uint32_t val;
629 
630 	/* Determine HT-specific capability type. */
631 	val = REG(ptr + PCIR_HT_COMMAND, 2);
632 
633 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
634 		cfg->ht.ht_slave = ptr;
635 
636 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
637 		return;
638 
639 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
640 		/* Sanity check the mapping window. */
641 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
642 		addr <<= 32;
643 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
644 		if (addr != MSI_X86_ADDR_BASE) {
645 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
646 				"has non-default MSI window 0x%llx\n",
647 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
648 				(long long)addr);
649 		}
650 	} else {
651 		addr = MSI_X86_ADDR_BASE;
652 	}
653 
654 	ht->ht_msimap = ptr;
655 	ht->ht_msictrl = val;
656 	ht->ht_msiaddr = addr;
657 
658 #undef REG
659 
660 #endif	/* __i386__ || __x86_64__ */
661 }
662 
663 static void
664 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
665 {
666 #define REG(n, w)	\
667 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
668 
669 	struct pcicfg_msi *msi = &cfg->msi;
670 
671 	msi->msi_location = ptr;
672 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
673 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
674 
675 #undef REG
676 }
677 
678 static void
679 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
680 {
681 #define REG(n, w)	\
682 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
683 
684 	struct pcicfg_msix *msix = &cfg->msix;
685 	uint32_t val;
686 
687 	msix->msix_location = ptr;
688 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
689 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
690 
691 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
692 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
693 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
694 
695 	val = REG(ptr + PCIR_MSIX_PBA, 4);
696 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
697 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
698 
699 	TAILQ_INIT(&msix->msix_vectors);
700 
701 #undef REG
702 }
703 
704 static void
705 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
706 {
707 	cfg->vpd.vpd_reg = ptr;
708 }
709 
710 static void
711 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
712 {
713 #define REG(n, w)	\
714 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
715 
716 	/* Should always be true. */
717 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
718 		uint32_t val;
719 
720 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
721 		cfg->subvendor = val & 0xffff;
722 		cfg->subdevice = val >> 16;
723 	}
724 
725 #undef REG
726 }
727 
728 static void
729 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
730 {
731 	/*
732 	 * Assume we have a PCI-X chipset if we have
733 	 * at least one PCI-PCI bridge with a PCI-X
734 	 * capability.  Note that some systems with
735 	 * PCI-express or HT chipsets might match on
736 	 * this check as well.
737 	 */
738 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
739 		pcix_chipset = 1;
740 
741 	cfg->pcix.pcix_ptr = ptr;
742 }
743 
744 static int
745 pcie_slotimpl(const pcicfgregs *cfg)
746 {
747 	const struct pcicfg_expr *expr = &cfg->expr;
748 	uint16_t port_type;
749 
750 	/*
751 	 * - Slot implemented bit is meaningful iff current port is
752 	 *   root port or down stream port.
753 	 * - Testing for root port or down stream port is meanningful
754 	 *   iff PCI configure has type 1 header.
755 	 */
756 
757 	if (cfg->hdrtype != 1)
758 		return 0;
759 
760 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
761 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
762 		return 0;
763 
764 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
765 		return 0;
766 
767 	return 1;
768 }
769 
770 static void
771 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
772 {
773 #define REG(n, w)	\
774 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
775 
776 	struct pcicfg_expr *expr = &cfg->expr;
777 
778 	/*
779 	 * Assume we have a PCI-express chipset if we have
780 	 * at least one PCI-express device.
781 	 */
782 	pcie_chipset = 1;
783 
784 	expr->expr_ptr = ptr;
785 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
786 
787 	/*
788 	 * Read slot capabilities.  Slot capabilities exists iff
789 	 * current port's slot is implemented
790 	 */
791 	if (pcie_slotimpl(cfg))
792 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
793 
794 #undef REG
795 }
796 
797 static void
798 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
799 {
800 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
801 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
802 
803 	uint32_t val;
804 	int nextptr, ptrptr;
805 
806 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
807 		/* No capabilities */
808 		return;
809 	}
810 
811 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
812 	case 0:
813 	case 1:
814 		ptrptr = PCIR_CAP_PTR;
815 		break;
816 	case 2:
817 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
818 		break;
819 	default:
820 		return;				/* no capabilities support */
821 	}
822 	nextptr = REG(ptrptr, 1);	/* sanity check? */
823 
824 	/*
825 	 * Read capability entries.
826 	 */
827 	while (pci_fixup_nextptr(&nextptr)) {
828 		const struct pci_read_cap *rc;
829 		int ptr = nextptr;
830 
831 		/* Find the next entry */
832 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
833 
834 		/* Process this entry */
835 		val = REG(ptr + PCICAP_ID, 1);
836 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
837 			if (rc->cap == val) {
838 				rc->read_cap(pcib, ptr, nextptr, cfg);
839 				break;
840 			}
841 		}
842 	}
843 
844 #if defined(__i386__) || defined(__x86_64__)
845 	/*
846 	 * Enable the MSI mapping window for all HyperTransport
847 	 * slaves.  PCI-PCI bridges have their windows enabled via
848 	 * PCIB_MAP_MSI().
849 	 */
850 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
851 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
852 		device_printf(pcib,
853 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
854 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
855 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
856 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
857 		     2);
858 	}
859 #endif
860 
861 /* REG and WREG use carry through to next functions */
862 }
863 
864 /*
865  * PCI Vital Product Data
866  */
867 
868 #define	PCI_VPD_TIMEOUT		1000000
869 
870 static int
871 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
872 {
873 	int count = PCI_VPD_TIMEOUT;
874 
875 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
876 
877 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
878 
879 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
880 		if (--count < 0)
881 			return (ENXIO);
882 		DELAY(1);	/* limit looping */
883 	}
884 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
885 
886 	return (0);
887 }
888 
889 #if 0
890 static int
891 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
892 {
893 	int count = PCI_VPD_TIMEOUT;
894 
895 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
896 
897 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
898 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
899 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
900 		if (--count < 0)
901 			return (ENXIO);
902 		DELAY(1);	/* limit looping */
903 	}
904 
905 	return (0);
906 }
907 #endif
908 
909 #undef PCI_VPD_TIMEOUT
910 
911 struct vpd_readstate {
912 	device_t	pcib;
913 	pcicfgregs	*cfg;
914 	uint32_t	val;
915 	int		bytesinval;
916 	int		off;
917 	uint8_t		cksum;
918 };
919 
920 static int
921 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
922 {
923 	uint32_t reg;
924 	uint8_t byte;
925 
926 	if (vrs->bytesinval == 0) {
927 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
928 			return (ENXIO);
929 		vrs->val = le32toh(reg);
930 		vrs->off += 4;
931 		byte = vrs->val & 0xff;
932 		vrs->bytesinval = 3;
933 	} else {
934 		vrs->val = vrs->val >> 8;
935 		byte = vrs->val & 0xff;
936 		vrs->bytesinval--;
937 	}
938 
939 	vrs->cksum += byte;
940 	*data = byte;
941 	return (0);
942 }
943 
944 int
945 pcie_slot_implemented(device_t dev)
946 {
947 	struct pci_devinfo *dinfo = device_get_ivars(dev);
948 
949 	return pcie_slotimpl(&dinfo->cfg);
950 }
951 
952 void
953 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
954 {
955 	uint8_t expr_ptr;
956 	uint16_t val;
957 
958 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
959 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
960 		panic("%s: invalid max read request size 0x%02x",
961 		      device_get_nameunit(dev), rqsize);
962 	}
963 
964 	expr_ptr = pci_get_pciecap_ptr(dev);
965 	if (!expr_ptr)
966 		panic("%s: not PCIe device", device_get_nameunit(dev));
967 
968 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
969 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
970 		if (bootverbose)
971 			device_printf(dev, "adjust device control 0x%04x", val);
972 
973 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
974 		val |= rqsize;
975 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
976 
977 		if (bootverbose)
978 			kprintf(" -> 0x%04x\n", val);
979 	}
980 }
981 
982 uint16_t
983 pcie_get_max_readrq(device_t dev)
984 {
985 	uint8_t expr_ptr;
986 	uint16_t val;
987 
988 	expr_ptr = pci_get_pciecap_ptr(dev);
989 	if (!expr_ptr)
990 		panic("%s: not PCIe device", device_get_nameunit(dev));
991 
992 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
993 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
994 }
995 
996 static void
997 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
998 {
999 	struct vpd_readstate vrs;
1000 	int state;
1001 	int name;
1002 	int remain;
1003 	int i;
1004 	int alloc, off;		/* alloc/off for RO/W arrays */
1005 	int cksumvalid;
1006 	int dflen;
1007 	uint8_t byte;
1008 	uint8_t byte2;
1009 
1010 	/* init vpd reader */
1011 	vrs.bytesinval = 0;
1012 	vrs.off = 0;
1013 	vrs.pcib = pcib;
1014 	vrs.cfg = cfg;
1015 	vrs.cksum = 0;
1016 
1017 	state = 0;
1018 	name = remain = i = 0;	/* shut up stupid gcc */
1019 	alloc = off = 0;	/* shut up stupid gcc */
1020 	dflen = 0;		/* shut up stupid gcc */
1021 	cksumvalid = -1;
1022 	while (state >= 0) {
1023 		if (vpd_nextbyte(&vrs, &byte)) {
1024 			state = -2;
1025 			break;
1026 		}
1027 #if 0
1028 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1029 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1030 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1031 #endif
1032 		switch (state) {
1033 		case 0:		/* item name */
1034 			if (byte & 0x80) {
1035 				if (vpd_nextbyte(&vrs, &byte2)) {
1036 					state = -2;
1037 					break;
1038 				}
1039 				remain = byte2;
1040 				if (vpd_nextbyte(&vrs, &byte2)) {
1041 					state = -2;
1042 					break;
1043 				}
1044 				remain |= byte2 << 8;
1045 				if (remain > (0x7f*4 - vrs.off)) {
1046 					state = -1;
1047 					kprintf(
1048 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1049 					    cfg->domain, cfg->bus, cfg->slot,
1050 					    cfg->func, remain);
1051 				}
1052 				name = byte & 0x7f;
1053 			} else {
1054 				remain = byte & 0x7;
1055 				name = (byte >> 3) & 0xf;
1056 			}
1057 			switch (name) {
1058 			case 0x2:	/* String */
1059 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1060 				    M_DEVBUF, M_WAITOK);
1061 				i = 0;
1062 				state = 1;
1063 				break;
1064 			case 0xf:	/* End */
1065 				state = -1;
1066 				break;
1067 			case 0x10:	/* VPD-R */
1068 				alloc = 8;
1069 				off = 0;
1070 				cfg->vpd.vpd_ros = kmalloc(alloc *
1071 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1072 				    M_WAITOK | M_ZERO);
1073 				state = 2;
1074 				break;
1075 			case 0x11:	/* VPD-W */
1076 				alloc = 8;
1077 				off = 0;
1078 				cfg->vpd.vpd_w = kmalloc(alloc *
1079 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1080 				    M_WAITOK | M_ZERO);
1081 				state = 5;
1082 				break;
1083 			default:	/* Invalid data, abort */
1084 				state = -1;
1085 				break;
1086 			}
1087 			break;
1088 
1089 		case 1:	/* Identifier String */
1090 			cfg->vpd.vpd_ident[i++] = byte;
1091 			remain--;
1092 			if (remain == 0)  {
1093 				cfg->vpd.vpd_ident[i] = '\0';
1094 				state = 0;
1095 			}
1096 			break;
1097 
1098 		case 2:	/* VPD-R Keyword Header */
1099 			if (off == alloc) {
1100 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1101 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1102 				    M_DEVBUF, M_WAITOK | M_ZERO);
1103 			}
1104 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1105 			if (vpd_nextbyte(&vrs, &byte2)) {
1106 				state = -2;
1107 				break;
1108 			}
1109 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1110 			if (vpd_nextbyte(&vrs, &byte2)) {
1111 				state = -2;
1112 				break;
1113 			}
1114 			dflen = byte2;
1115 			if (dflen == 0 &&
1116 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1117 			    2) == 0) {
1118 				/*
1119 				 * if this happens, we can't trust the rest
1120 				 * of the VPD.
1121 				 */
1122 				kprintf(
1123 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1124 				    cfg->domain, cfg->bus, cfg->slot,
1125 				    cfg->func, dflen);
1126 				cksumvalid = 0;
1127 				state = -1;
1128 				break;
1129 			} else if (dflen == 0) {
1130 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1131 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1132 				    M_DEVBUF, M_WAITOK);
1133 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1134 			} else
1135 				cfg->vpd.vpd_ros[off].value = kmalloc(
1136 				    (dflen + 1) *
1137 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1138 				    M_DEVBUF, M_WAITOK);
1139 			remain -= 3;
1140 			i = 0;
1141 			/* keep in sync w/ state 3's transistions */
1142 			if (dflen == 0 && remain == 0)
1143 				state = 0;
1144 			else if (dflen == 0)
1145 				state = 2;
1146 			else
1147 				state = 3;
1148 			break;
1149 
1150 		case 3:	/* VPD-R Keyword Value */
1151 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1152 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1153 			    "RV", 2) == 0 && cksumvalid == -1) {
1154 				if (vrs.cksum == 0)
1155 					cksumvalid = 1;
1156 				else {
1157 					if (bootverbose)
1158 						kprintf(
1159 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1160 						    cfg->domain, cfg->bus,
1161 						    cfg->slot, cfg->func,
1162 						    vrs.cksum);
1163 					cksumvalid = 0;
1164 					state = -1;
1165 					break;
1166 				}
1167 			}
1168 			dflen--;
1169 			remain--;
1170 			/* keep in sync w/ state 2's transistions */
1171 			if (dflen == 0)
1172 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1173 			if (dflen == 0 && remain == 0) {
1174 				cfg->vpd.vpd_rocnt = off;
1175 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1176 				    off * sizeof(*cfg->vpd.vpd_ros),
1177 				    M_DEVBUF, M_WAITOK | M_ZERO);
1178 				state = 0;
1179 			} else if (dflen == 0)
1180 				state = 2;
1181 			break;
1182 
1183 		case 4:
1184 			remain--;
1185 			if (remain == 0)
1186 				state = 0;
1187 			break;
1188 
1189 		case 5:	/* VPD-W Keyword Header */
1190 			if (off == alloc) {
1191 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1192 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1193 				    M_DEVBUF, M_WAITOK | M_ZERO);
1194 			}
1195 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1196 			if (vpd_nextbyte(&vrs, &byte2)) {
1197 				state = -2;
1198 				break;
1199 			}
1200 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1201 			if (vpd_nextbyte(&vrs, &byte2)) {
1202 				state = -2;
1203 				break;
1204 			}
1205 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1206 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1207 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1208 			    sizeof(*cfg->vpd.vpd_w[off].value),
1209 			    M_DEVBUF, M_WAITOK);
1210 			remain -= 3;
1211 			i = 0;
1212 			/* keep in sync w/ state 6's transistions */
1213 			if (dflen == 0 && remain == 0)
1214 				state = 0;
1215 			else if (dflen == 0)
1216 				state = 5;
1217 			else
1218 				state = 6;
1219 			break;
1220 
1221 		case 6:	/* VPD-W Keyword Value */
1222 			cfg->vpd.vpd_w[off].value[i++] = byte;
1223 			dflen--;
1224 			remain--;
1225 			/* keep in sync w/ state 5's transistions */
1226 			if (dflen == 0)
1227 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1228 			if (dflen == 0 && remain == 0) {
1229 				cfg->vpd.vpd_wcnt = off;
1230 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1231 				    off * sizeof(*cfg->vpd.vpd_w),
1232 				    M_DEVBUF, M_WAITOK | M_ZERO);
1233 				state = 0;
1234 			} else if (dflen == 0)
1235 				state = 5;
1236 			break;
1237 
1238 		default:
1239 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1240 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1241 			    state);
1242 			state = -1;
1243 			break;
1244 		}
1245 	}
1246 
1247 	if (cksumvalid == 0 || state < -1) {
1248 		/* read-only data bad, clean up */
1249 		if (cfg->vpd.vpd_ros != NULL) {
1250 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1251 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1252 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1253 			cfg->vpd.vpd_ros = NULL;
1254 		}
1255 	}
1256 	if (state < -1) {
1257 		/* I/O error, clean up */
1258 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1259 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1260 		if (cfg->vpd.vpd_ident != NULL) {
1261 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1262 			cfg->vpd.vpd_ident = NULL;
1263 		}
1264 		if (cfg->vpd.vpd_w != NULL) {
1265 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1266 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1267 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1268 			cfg->vpd.vpd_w = NULL;
1269 		}
1270 	}
1271 	cfg->vpd.vpd_cached = 1;
1272 #undef REG
1273 #undef WREG
1274 }
1275 
1276 int
1277 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1278 {
1279 	struct pci_devinfo *dinfo = device_get_ivars(child);
1280 	pcicfgregs *cfg = &dinfo->cfg;
1281 
1282 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1283 		pci_read_vpd(device_get_parent(dev), cfg);
1284 
1285 	*identptr = cfg->vpd.vpd_ident;
1286 
1287 	if (*identptr == NULL)
1288 		return (ENXIO);
1289 
1290 	return (0);
1291 }
1292 
1293 int
1294 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1295 	const char **vptr)
1296 {
1297 	struct pci_devinfo *dinfo = device_get_ivars(child);
1298 	pcicfgregs *cfg = &dinfo->cfg;
1299 	int i;
1300 
1301 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1302 		pci_read_vpd(device_get_parent(dev), cfg);
1303 
1304 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1305 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1306 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1307 			*vptr = cfg->vpd.vpd_ros[i].value;
1308 		}
1309 
1310 	if (i != cfg->vpd.vpd_rocnt)
1311 		return (0);
1312 
1313 	*vptr = NULL;
1314 	return (ENXIO);
1315 }
1316 
1317 /*
1318  * Return the offset in configuration space of the requested extended
1319  * capability entry or 0 if the specified capability was not found.
1320  */
1321 int
1322 pci_find_extcap_method(device_t dev, device_t child, int capability,
1323     int *capreg)
1324 {
1325 	struct pci_devinfo *dinfo = device_get_ivars(child);
1326 	pcicfgregs *cfg = &dinfo->cfg;
1327 	u_int32_t status;
1328 	u_int8_t ptr;
1329 
1330 	/*
1331 	 * Check the CAP_LIST bit of the PCI status register first.
1332 	 */
1333 	status = pci_read_config(child, PCIR_STATUS, 2);
1334 	if (!(status & PCIM_STATUS_CAPPRESENT))
1335 		return (ENXIO);
1336 
1337 	/*
1338 	 * Determine the start pointer of the capabilities list.
1339 	 */
1340 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1341 	case 0:
1342 	case 1:
1343 		ptr = PCIR_CAP_PTR;
1344 		break;
1345 	case 2:
1346 		ptr = PCIR_CAP_PTR_2;
1347 		break;
1348 	default:
1349 		/* XXX: panic? */
1350 		return (ENXIO);		/* no extended capabilities support */
1351 	}
1352 	ptr = pci_read_config(child, ptr, 1);
1353 
1354 	/*
1355 	 * Traverse the capabilities list.
1356 	 */
1357 	while (ptr != 0) {
1358 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1359 			if (capreg != NULL)
1360 				*capreg = ptr;
1361 			return (0);
1362 		}
1363 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1364 	}
1365 
1366 	return (ENOENT);
1367 }
1368 
1369 /*
1370  * Support for MSI-X message interrupts.
1371  */
1372 static void
1373 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1374     uint32_t data)
1375 {
1376 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1377 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1378 	uint32_t offset;
1379 
1380 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1381 	offset = msix->msix_table_offset + index * 16;
1382 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1383 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1384 	bus_write_4(msix->msix_table_res, offset + 8, data);
1385 
1386 	/* Enable MSI -> HT mapping. */
1387 	pci_ht_map_msi(dev, address);
1388 }
1389 
1390 static void
1391 pci_mask_msix_vector(device_t dev, u_int index)
1392 {
1393 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1394 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395 	uint32_t offset, val;
1396 
1397 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1398 	offset = msix->msix_table_offset + index * 16 + 12;
1399 	val = bus_read_4(msix->msix_table_res, offset);
1400 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1401 		val |= PCIM_MSIX_VCTRL_MASK;
1402 		bus_write_4(msix->msix_table_res, offset, val);
1403 	}
1404 }
1405 
1406 static void
1407 pci_unmask_msix_vector(device_t dev, u_int index)
1408 {
1409 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1410 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1411 	uint32_t offset, val;
1412 
1413 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1414 	offset = msix->msix_table_offset + index * 16 + 12;
1415 	val = bus_read_4(msix->msix_table_res, offset);
1416 	if (val & PCIM_MSIX_VCTRL_MASK) {
1417 		val &= ~PCIM_MSIX_VCTRL_MASK;
1418 		bus_write_4(msix->msix_table_res, offset, val);
1419 	}
1420 }
1421 
1422 int
1423 pci_pending_msix_vector(device_t dev, u_int index)
1424 {
1425 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1426 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1427 	uint32_t offset, bit;
1428 
1429 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1430 	    ("MSI-X is not setup yet"));
1431 
1432 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1433 	offset = msix->msix_pba_offset + (index / 32) * 4;
1434 	bit = 1 << index % 32;
1435 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1436 }
1437 
1438 /*
1439  * Restore MSI-X registers and table during resume.  If MSI-X is
1440  * enabled then walk the virtual table to restore the actual MSI-X
1441  * table.
1442  */
1443 static void
1444 pci_resume_msix(device_t dev)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 
1449 	if (msix->msix_table_res != NULL) {
1450 		const struct msix_vector *mv;
1451 
1452 		pci_mask_msix_allvectors(dev);
1453 
1454 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1455 			u_int vector;
1456 
1457 			if (mv->mv_address == 0)
1458 				continue;
1459 
1460 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1461 			pci_setup_msix_vector(dev, vector,
1462 			    mv->mv_address, mv->mv_data);
1463 			pci_unmask_msix_vector(dev, vector);
1464 		}
1465 	}
1466 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1467 	    msix->msix_ctrl, 2);
1468 }
1469 
1470 /*
1471  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1472  *
1473  * After this function returns, the MSI-X's rid will be saved in rid0.
1474  */
1475 int
1476 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1477     int *rid0, int cpuid)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(child);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 	struct msix_vector *mv;
1482 	struct resource_list_entry *rle;
1483 	int error, irq, rid;
1484 
1485 	KASSERT(msix->msix_table_res != NULL &&
1486 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1487 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1488 	KASSERT(vector < msix->msix_msgnum,
1489 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1490 
1491 	if (bootverbose) {
1492 		device_printf(child,
1493 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1494 		    vector, msix->msix_msgnum);
1495 	}
1496 
1497 	/* Set rid according to vector number */
1498 	rid = PCI_MSIX_VEC2RID(vector);
1499 
1500 	/* Vector has already been allocated */
1501 	mv = pci_find_msix_vector(child, rid);
1502 	if (mv != NULL)
1503 		return EBUSY;
1504 
1505 	/* Allocate a message. */
1506 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1507 	if (error)
1508 		return error;
1509 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1510 	    irq, irq, 1, cpuid);
1511 
1512 	if (bootverbose) {
1513 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1514 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1515 		    rle->start, cpuid);
1516 	}
1517 
1518 	/* Update counts of alloc'd messages. */
1519 	msix->msix_alloc++;
1520 
1521 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1522 	mv->mv_rid = rid;
1523 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1524 
1525 	*rid0 = rid;
1526 	return 0;
1527 }
1528 
1529 int
1530 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 	struct resource_list_entry *rle;
1535 	struct msix_vector *mv;
1536 	int irq, cpuid;
1537 
1538 	KASSERT(msix->msix_table_res != NULL &&
1539 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1540 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1541 	KASSERT(rid > 0, ("invalid rid %d", rid));
1542 
1543 	mv = pci_find_msix_vector(child, rid);
1544 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1545 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1546 
1547 	/* Make sure resource is no longer allocated. */
1548 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1549 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1550 	KASSERT(rle->res == NULL,
1551 	    ("MSI-X resource is still allocated, rid %d", rid));
1552 
1553 	irq = rle->start;
1554 	cpuid = rle->cpuid;
1555 
1556 	/* Free the resource list entries. */
1557 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1558 
1559 	/* Release the IRQ. */
1560 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1561 
1562 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1563 	kfree(mv, M_DEVBUF);
1564 
1565 	msix->msix_alloc--;
1566 	return (0);
1567 }
1568 
1569 /*
1570  * Return the max supported MSI-X messages this device supports.
1571  * Basically, assuming the MD code can alloc messages, this function
1572  * should return the maximum value that pci_alloc_msix() can return.
1573  * Thus, it is subject to the tunables, etc.
1574  */
1575 int
1576 pci_msix_count_method(device_t dev, device_t child)
1577 {
1578 	struct pci_devinfo *dinfo = device_get_ivars(child);
1579 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1580 
1581 	if (pci_do_msix && msix->msix_location != 0)
1582 		return (msix->msix_msgnum);
1583 	return (0);
1584 }
1585 
1586 int
1587 pci_setup_msix(device_t dev)
1588 {
1589 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1590 	pcicfgregs *cfg = &dinfo->cfg;
1591 	struct resource_list_entry *rle;
1592 	struct resource *table_res, *pba_res;
1593 
1594 	KASSERT(cfg->msix.msix_table_res == NULL &&
1595 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1596 
1597 	/* If rid 0 is allocated, then fail. */
1598 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599 	if (rle != NULL && rle->res != NULL)
1600 		return (ENXIO);
1601 
1602 	/* Already have allocated MSIs? */
1603 	if (cfg->msi.msi_alloc != 0)
1604 		return (ENXIO);
1605 
1606 	/* If MSI is blacklisted for this system, fail. */
1607 	if (pci_msi_blacklisted())
1608 		return (ENXIO);
1609 
1610 	/* MSI-X capability present? */
1611 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1612 	    !pci_do_msix)
1613 		return (ENODEV);
1614 
1615 	KASSERT(cfg->msix.msix_alloc == 0 &&
1616 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1617 	    ("MSI-X vector has been allocated"));
1618 
1619 	/* Make sure the appropriate BARs are mapped. */
1620 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1621 	    cfg->msix.msix_table_bar);
1622 	if (rle == NULL || rle->res == NULL ||
1623 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1624 		return (ENXIO);
1625 	table_res = rle->res;
1626 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1627 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1628 		    cfg->msix.msix_pba_bar);
1629 		if (rle == NULL || rle->res == NULL ||
1630 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1631 			return (ENXIO);
1632 	}
1633 	pba_res = rle->res;
1634 
1635 	cfg->msix.msix_table_res = table_res;
1636 	cfg->msix.msix_pba_res = pba_res;
1637 
1638 	pci_mask_msix_allvectors(dev);
1639 
1640 	return 0;
1641 }
1642 
1643 void
1644 pci_teardown_msix(device_t dev)
1645 {
1646 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1647 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1648 
1649 	KASSERT(msix->msix_table_res != NULL &&
1650 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1651 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1652 	    ("MSI-X vector is still allocated"));
1653 
1654 	pci_mask_msix_allvectors(dev);
1655 
1656 	msix->msix_table_res = NULL;
1657 	msix->msix_pba_res = NULL;
1658 }
1659 
1660 void
1661 pci_enable_msix(device_t dev)
1662 {
1663 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1664 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1665 
1666 	KASSERT(msix->msix_table_res != NULL &&
1667 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1668 
1669 	/* Update control register to enable MSI-X. */
1670 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1671 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1672 	    msix->msix_ctrl, 2);
1673 }
1674 
1675 void
1676 pci_disable_msix(device_t dev)
1677 {
1678 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1679 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1680 
1681 	KASSERT(msix->msix_table_res != NULL &&
1682 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1683 
1684 	/* Disable MSI -> HT mapping. */
1685 	pci_ht_map_msi(dev, 0);
1686 
1687 	/* Update control register to disable MSI-X. */
1688 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1689 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1690 	    msix->msix_ctrl, 2);
1691 }
1692 
1693 static void
1694 pci_mask_msix_allvectors(device_t dev)
1695 {
1696 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1697 	u_int i;
1698 
1699 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1700 		pci_mask_msix_vector(dev, i);
1701 }
1702 
1703 static struct msix_vector *
1704 pci_find_msix_vector(device_t dev, int rid)
1705 {
1706 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1707 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1708 	struct msix_vector *mv;
1709 
1710 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1711 		if (mv->mv_rid == rid)
1712 			return mv;
1713 	}
1714 	return NULL;
1715 }
1716 
1717 /*
1718  * HyperTransport MSI mapping control
1719  */
1720 void
1721 pci_ht_map_msi(device_t dev, uint64_t addr)
1722 {
1723 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1724 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1725 
1726 	if (!ht->ht_msimap)
1727 		return;
1728 
1729 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1730 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1731 		/* Enable MSI -> HT mapping. */
1732 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1733 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1734 		    ht->ht_msictrl, 2);
1735 	}
1736 
1737 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1738 		/* Disable MSI -> HT mapping. */
1739 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1740 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1741 		    ht->ht_msictrl, 2);
1742 	}
1743 }
1744 
1745 /*
1746  * Support for MSI message signalled interrupts.
1747  */
1748 void
1749 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1750 {
1751 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1752 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1753 
1754 	/* Write data and address values. */
1755 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1756 	    address & 0xffffffff, 4);
1757 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1758 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1759 		    address >> 32, 4);
1760 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1761 		    data, 2);
1762 	} else
1763 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1764 		    2);
1765 
1766 	/* Enable MSI in the control register. */
1767 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1768 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1769 	    2);
1770 
1771 	/* Enable MSI -> HT mapping. */
1772 	pci_ht_map_msi(dev, address);
1773 }
1774 
1775 void
1776 pci_disable_msi(device_t dev)
1777 {
1778 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1779 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1780 
1781 	/* Disable MSI -> HT mapping. */
1782 	pci_ht_map_msi(dev, 0);
1783 
1784 	/* Disable MSI in the control register. */
1785 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1786 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1787 	    2);
1788 }
1789 
1790 /*
1791  * Restore MSI registers during resume.  If MSI is enabled then
1792  * restore the data and address registers in addition to the control
1793  * register.
1794  */
1795 static void
1796 pci_resume_msi(device_t dev)
1797 {
1798 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1799 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1800 	uint64_t address;
1801 	uint16_t data;
1802 
1803 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1804 		address = msi->msi_addr;
1805 		data = msi->msi_data;
1806 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1807 		    address & 0xffffffff, 4);
1808 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1809 			pci_write_config(dev, msi->msi_location +
1810 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1811 			pci_write_config(dev, msi->msi_location +
1812 			    PCIR_MSI_DATA_64BIT, data, 2);
1813 		} else
1814 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1815 			    data, 2);
1816 	}
1817 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1818 	    2);
1819 }
1820 
1821 /*
1822  * Returns true if the specified device is blacklisted because MSI
1823  * doesn't work.
1824  */
1825 int
1826 pci_msi_device_blacklisted(device_t dev)
1827 {
1828 	struct pci_quirk *q;
1829 
1830 	if (!pci_honor_msi_blacklist)
1831 		return (0);
1832 
1833 	for (q = &pci_quirks[0]; q->devid; q++) {
1834 		if (q->devid == pci_get_devid(dev) &&
1835 		    q->type == PCI_QUIRK_DISABLE_MSI)
1836 			return (1);
1837 	}
1838 	return (0);
1839 }
1840 
1841 /*
1842  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1843  * we just check for blacklisted chipsets as represented by the
1844  * host-PCI bridge at device 0:0:0.  In the future, it may become
1845  * necessary to check other system attributes, such as the kenv values
1846  * that give the motherboard manufacturer and model number.
1847  */
1848 static int
1849 pci_msi_blacklisted(void)
1850 {
1851 	device_t dev;
1852 
1853 	if (!pci_honor_msi_blacklist)
1854 		return (0);
1855 
1856 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1857 	if (!(pcie_chipset || pcix_chipset))
1858 		return (1);
1859 
1860 	dev = pci_find_bsf(0, 0, 0);
1861 	if (dev != NULL)
1862 		return (pci_msi_device_blacklisted(dev));
1863 	return (0);
1864 }
1865 
1866 /*
1867  * Attempt to allocate count MSI messages on start_cpuid.
1868  *
1869  * If start_cpuid < 0, then the MSI messages' target CPU will be
1870  * selected automaticly.
1871  *
1872  * If the caller explicitly specified the MSI messages' target CPU,
1873  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1874  * messages on the specified CPU, if the allocation fails due to MD
1875  * does not have enough vectors (EMSGSIZE), then we will try next
1876  * available CPU, until the allocation fails on all CPUs.
1877  *
1878  * EMSGSIZE will be returned, if all available CPUs does not have
1879  * enough vectors for the requested amount of MSI messages.  Caller
1880  * should either reduce the amount of MSI messages to be requested,
1881  * or simply giving up using MSI.
1882  *
1883  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1884  * returned in 'rid' array, if the allocation succeeds.
1885  */
1886 int
1887 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1888     int start_cpuid)
1889 {
1890 	struct pci_devinfo *dinfo = device_get_ivars(child);
1891 	pcicfgregs *cfg = &dinfo->cfg;
1892 	struct resource_list_entry *rle;
1893 	int error, i, irqs[32], cpuid = 0;
1894 	uint16_t ctrl;
1895 
1896 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1897 	    ("invalid MSI count %d", count));
1898 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1899 
1900 	/* If rid 0 is allocated, then fail. */
1901 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1902 	if (rle != NULL && rle->res != NULL)
1903 		return (ENXIO);
1904 
1905 	/* Already have allocated messages? */
1906 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1907 		return (ENXIO);
1908 
1909 	/* If MSI is blacklisted for this system, fail. */
1910 	if (pci_msi_blacklisted())
1911 		return (ENXIO);
1912 
1913 	/* MSI capability present? */
1914 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1915 	    !pci_do_msi)
1916 		return (ENODEV);
1917 
1918 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1919 	    count, cfg->msi.msi_msgnum));
1920 
1921 	if (bootverbose) {
1922 		device_printf(child,
1923 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1924 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1925 	}
1926 
1927 	if (start_cpuid < 0)
1928 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1929 
1930 	error = EINVAL;
1931 	for (i = 0; i < ncpus; ++i) {
1932 		cpuid = (start_cpuid + i) % ncpus;
1933 
1934 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1935 		    cfg->msi.msi_msgnum, irqs, cpuid);
1936 		if (error == 0)
1937 			break;
1938 		else if (error != EMSGSIZE)
1939 			return error;
1940 	}
1941 	if (error)
1942 		return error;
1943 
1944 	/*
1945 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1946 	 * the irqs[] array, so add new resources starting at rid 1.
1947 	 */
1948 	for (i = 0; i < count; i++) {
1949 		rid[i] = i + 1;
1950 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1951 		    irqs[i], irqs[i], 1, cpuid);
1952 	}
1953 
1954 	if (bootverbose) {
1955 		if (count == 1) {
1956 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1957 			    irqs[0], cpuid);
1958 		} else {
1959 			int run;
1960 
1961 			/*
1962 			 * Be fancy and try to print contiguous runs
1963 			 * of IRQ values as ranges.  'run' is true if
1964 			 * we are in a range.
1965 			 */
1966 			device_printf(child, "using IRQs %d", irqs[0]);
1967 			run = 0;
1968 			for (i = 1; i < count; i++) {
1969 
1970 				/* Still in a run? */
1971 				if (irqs[i] == irqs[i - 1] + 1) {
1972 					run = 1;
1973 					continue;
1974 				}
1975 
1976 				/* Finish previous range. */
1977 				if (run) {
1978 					kprintf("-%d", irqs[i - 1]);
1979 					run = 0;
1980 				}
1981 
1982 				/* Start new range. */
1983 				kprintf(",%d", irqs[i]);
1984 			}
1985 
1986 			/* Unfinished range? */
1987 			if (run)
1988 				kprintf("-%d", irqs[count - 1]);
1989 			kprintf(" for MSI on cpu%d\n", cpuid);
1990 		}
1991 	}
1992 
1993 	/* Update control register with count. */
1994 	ctrl = cfg->msi.msi_ctrl;
1995 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1996 	ctrl |= (ffs(count) - 1) << 4;
1997 	cfg->msi.msi_ctrl = ctrl;
1998 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1999 
2000 	/* Update counts of alloc'd messages. */
2001 	cfg->msi.msi_alloc = count;
2002 	cfg->msi.msi_handlers = 0;
2003 	return (0);
2004 }
2005 
2006 /* Release the MSI messages associated with this device. */
2007 int
2008 pci_release_msi_method(device_t dev, device_t child)
2009 {
2010 	struct pci_devinfo *dinfo = device_get_ivars(child);
2011 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2012 	struct resource_list_entry *rle;
2013 	int i, irqs[32], cpuid = -1;
2014 
2015 	/* Do we have any messages to release? */
2016 	if (msi->msi_alloc == 0)
2017 		return (ENODEV);
2018 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2019 
2020 	/* Make sure none of the resources are allocated. */
2021 	if (msi->msi_handlers > 0)
2022 		return (EBUSY);
2023 	for (i = 0; i < msi->msi_alloc; i++) {
2024 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2025 		KASSERT(rle != NULL, ("missing MSI resource"));
2026 		if (rle->res != NULL)
2027 			return (EBUSY);
2028 		if (i == 0) {
2029 			cpuid = rle->cpuid;
2030 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2031 			    ("invalid MSI target cpuid %d", cpuid));
2032 		} else {
2033 			KASSERT(rle->cpuid == cpuid,
2034 			    ("MSI targets different cpus, "
2035 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2036 		}
2037 		irqs[i] = rle->start;
2038 	}
2039 
2040 	/* Update control register with 0 count. */
2041 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2042 	    ("%s: MSI still enabled", __func__));
2043 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2044 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2045 	    msi->msi_ctrl, 2);
2046 
2047 	/* Release the messages. */
2048 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2049 	    cpuid);
2050 	for (i = 0; i < msi->msi_alloc; i++)
2051 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2052 
2053 	/* Update alloc count. */
2054 	msi->msi_alloc = 0;
2055 	msi->msi_addr = 0;
2056 	msi->msi_data = 0;
2057 	return (0);
2058 }
2059 
2060 /*
2061  * Return the max supported MSI messages this device supports.
2062  * Basically, assuming the MD code can alloc messages, this function
2063  * should return the maximum value that pci_alloc_msi() can return.
2064  * Thus, it is subject to the tunables, etc.
2065  */
2066 int
2067 pci_msi_count_method(device_t dev, device_t child)
2068 {
2069 	struct pci_devinfo *dinfo = device_get_ivars(child);
2070 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2071 
2072 	if (pci_do_msi && msi->msi_location != 0)
2073 		return (msi->msi_msgnum);
2074 	return (0);
2075 }
2076 
2077 /* kfree pcicfgregs structure and all depending data structures */
2078 
2079 int
2080 pci_freecfg(struct pci_devinfo *dinfo)
2081 {
2082 	struct devlist *devlist_head;
2083 	int i;
2084 
2085 	devlist_head = &pci_devq;
2086 
2087 	if (dinfo->cfg.vpd.vpd_reg) {
2088 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2089 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2090 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2091 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2092 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2093 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2094 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2095 	}
2096 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2097 	kfree(dinfo, M_DEVBUF);
2098 
2099 	/* increment the generation count */
2100 	pci_generation++;
2101 
2102 	/* we're losing one device */
2103 	pci_numdevs--;
2104 	return (0);
2105 }
2106 
2107 /*
2108  * PCI power manangement
2109  */
2110 int
2111 pci_set_powerstate_method(device_t dev, device_t child, int state)
2112 {
2113 	struct pci_devinfo *dinfo = device_get_ivars(child);
2114 	pcicfgregs *cfg = &dinfo->cfg;
2115 	uint16_t status;
2116 	int oldstate, highest, delay;
2117 
2118 	if (cfg->pp.pp_cap == 0)
2119 		return (EOPNOTSUPP);
2120 
2121 	/*
2122 	 * Optimize a no state change request away.  While it would be OK to
2123 	 * write to the hardware in theory, some devices have shown odd
2124 	 * behavior when going from D3 -> D3.
2125 	 */
2126 	oldstate = pci_get_powerstate(child);
2127 	if (oldstate == state)
2128 		return (0);
2129 
2130 	/*
2131 	 * The PCI power management specification states that after a state
2132 	 * transition between PCI power states, system software must
2133 	 * guarantee a minimal delay before the function accesses the device.
2134 	 * Compute the worst case delay that we need to guarantee before we
2135 	 * access the device.  Many devices will be responsive much more
2136 	 * quickly than this delay, but there are some that don't respond
2137 	 * instantly to state changes.  Transitions to/from D3 state require
2138 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2139 	 * is done below with DELAY rather than a sleeper function because
2140 	 * this function can be called from contexts where we cannot sleep.
2141 	 */
2142 	highest = (oldstate > state) ? oldstate : state;
2143 	if (highest == PCI_POWERSTATE_D3)
2144 	    delay = 10000;
2145 	else if (highest == PCI_POWERSTATE_D2)
2146 	    delay = 200;
2147 	else
2148 	    delay = 0;
2149 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2150 	    & ~PCIM_PSTAT_DMASK;
2151 	switch (state) {
2152 	case PCI_POWERSTATE_D0:
2153 		status |= PCIM_PSTAT_D0;
2154 		break;
2155 	case PCI_POWERSTATE_D1:
2156 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2157 			return (EOPNOTSUPP);
2158 		status |= PCIM_PSTAT_D1;
2159 		break;
2160 	case PCI_POWERSTATE_D2:
2161 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2162 			return (EOPNOTSUPP);
2163 		status |= PCIM_PSTAT_D2;
2164 		break;
2165 	case PCI_POWERSTATE_D3:
2166 		status |= PCIM_PSTAT_D3;
2167 		break;
2168 	default:
2169 		return (EINVAL);
2170 	}
2171 
2172 	if (bootverbose)
2173 		kprintf(
2174 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2175 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2176 		    dinfo->cfg.func, oldstate, state);
2177 
2178 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2179 	if (delay)
2180 		DELAY(delay);
2181 	return (0);
2182 }
2183 
2184 int
2185 pci_get_powerstate_method(device_t dev, device_t child)
2186 {
2187 	struct pci_devinfo *dinfo = device_get_ivars(child);
2188 	pcicfgregs *cfg = &dinfo->cfg;
2189 	uint16_t status;
2190 	int result;
2191 
2192 	if (cfg->pp.pp_cap != 0) {
2193 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2194 		switch (status & PCIM_PSTAT_DMASK) {
2195 		case PCIM_PSTAT_D0:
2196 			result = PCI_POWERSTATE_D0;
2197 			break;
2198 		case PCIM_PSTAT_D1:
2199 			result = PCI_POWERSTATE_D1;
2200 			break;
2201 		case PCIM_PSTAT_D2:
2202 			result = PCI_POWERSTATE_D2;
2203 			break;
2204 		case PCIM_PSTAT_D3:
2205 			result = PCI_POWERSTATE_D3;
2206 			break;
2207 		default:
2208 			result = PCI_POWERSTATE_UNKNOWN;
2209 			break;
2210 		}
2211 	} else {
2212 		/* No support, device is always at D0 */
2213 		result = PCI_POWERSTATE_D0;
2214 	}
2215 	return (result);
2216 }
2217 
2218 /*
2219  * Some convenience functions for PCI device drivers.
2220  */
2221 
2222 static __inline void
2223 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2224 {
2225 	uint16_t	command;
2226 
2227 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2228 	command |= bit;
2229 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2230 }
2231 
2232 static __inline void
2233 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2234 {
2235 	uint16_t	command;
2236 
2237 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2238 	command &= ~bit;
2239 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2240 }
2241 
2242 int
2243 pci_enable_busmaster_method(device_t dev, device_t child)
2244 {
2245 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2246 	return (0);
2247 }
2248 
2249 int
2250 pci_disable_busmaster_method(device_t dev, device_t child)
2251 {
2252 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2253 	return (0);
2254 }
2255 
2256 int
2257 pci_enable_io_method(device_t dev, device_t child, int space)
2258 {
2259 	uint16_t command;
2260 	uint16_t bit;
2261 	char *error;
2262 
2263 	bit = 0;
2264 	error = NULL;
2265 
2266 	switch(space) {
2267 	case SYS_RES_IOPORT:
2268 		bit = PCIM_CMD_PORTEN;
2269 		error = "port";
2270 		break;
2271 	case SYS_RES_MEMORY:
2272 		bit = PCIM_CMD_MEMEN;
2273 		error = "memory";
2274 		break;
2275 	default:
2276 		return (EINVAL);
2277 	}
2278 	pci_set_command_bit(dev, child, bit);
2279 	/* Some devices seem to need a brief stall here, what do to? */
2280 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2281 	if (command & bit)
2282 		return (0);
2283 	device_printf(child, "failed to enable %s mapping!\n", error);
2284 	return (ENXIO);
2285 }
2286 
2287 int
2288 pci_disable_io_method(device_t dev, device_t child, int space)
2289 {
2290 	uint16_t command;
2291 	uint16_t bit;
2292 	char *error;
2293 
2294 	bit = 0;
2295 	error = NULL;
2296 
2297 	switch(space) {
2298 	case SYS_RES_IOPORT:
2299 		bit = PCIM_CMD_PORTEN;
2300 		error = "port";
2301 		break;
2302 	case SYS_RES_MEMORY:
2303 		bit = PCIM_CMD_MEMEN;
2304 		error = "memory";
2305 		break;
2306 	default:
2307 		return (EINVAL);
2308 	}
2309 	pci_clear_command_bit(dev, child, bit);
2310 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2311 	if (command & bit) {
2312 		device_printf(child, "failed to disable %s mapping!\n", error);
2313 		return (ENXIO);
2314 	}
2315 	return (0);
2316 }
2317 
2318 /*
2319  * New style pci driver.  Parent device is either a pci-host-bridge or a
2320  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2321  */
2322 
2323 void
2324 pci_print_verbose(struct pci_devinfo *dinfo)
2325 {
2326 
2327 	if (bootverbose) {
2328 		pcicfgregs *cfg = &dinfo->cfg;
2329 
2330 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2331 		    cfg->vendor, cfg->device, cfg->revid);
2332 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2333 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2334 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2335 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2336 		    cfg->mfdev);
2337 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2338 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2339 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2340 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2341 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2342 		if (cfg->intpin > 0)
2343 			kprintf("\tintpin=%c, irq=%d\n",
2344 			    cfg->intpin +'a' -1, cfg->intline);
2345 		if (cfg->pp.pp_cap) {
2346 			uint16_t status;
2347 
2348 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2349 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2350 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2351 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2352 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2353 			    status & PCIM_PSTAT_DMASK);
2354 		}
2355 		if (cfg->msi.msi_location) {
2356 			int ctrl;
2357 
2358 			ctrl = cfg->msi.msi_ctrl;
2359 			kprintf("\tMSI supports %d message%s%s%s\n",
2360 			    cfg->msi.msi_msgnum,
2361 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2362 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2363 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2364 		}
2365 		if (cfg->msix.msix_location) {
2366 			kprintf("\tMSI-X supports %d message%s ",
2367 			    cfg->msix.msix_msgnum,
2368 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2369 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2370 				kprintf("in map 0x%x\n",
2371 				    cfg->msix.msix_table_bar);
2372 			else
2373 				kprintf("in maps 0x%x and 0x%x\n",
2374 				    cfg->msix.msix_table_bar,
2375 				    cfg->msix.msix_pba_bar);
2376 		}
2377 		pci_print_verbose_expr(cfg);
2378 	}
2379 }
2380 
2381 static void
2382 pci_print_verbose_expr(const pcicfgregs *cfg)
2383 {
2384 	const struct pcicfg_expr *expr = &cfg->expr;
2385 	const char *port_name;
2386 	uint16_t port_type;
2387 
2388 	if (!bootverbose)
2389 		return;
2390 
2391 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2392 		return;
2393 
2394 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2395 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2396 
2397 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2398 
2399 	switch (port_type) {
2400 	case PCIE_END_POINT:
2401 		port_name = "DEVICE";
2402 		break;
2403 	case PCIE_LEG_END_POINT:
2404 		port_name = "LEGDEV";
2405 		break;
2406 	case PCIE_ROOT_PORT:
2407 		port_name = "ROOT";
2408 		break;
2409 	case PCIE_UP_STREAM_PORT:
2410 		port_name = "UPSTREAM";
2411 		break;
2412 	case PCIE_DOWN_STREAM_PORT:
2413 		port_name = "DOWNSTRM";
2414 		break;
2415 	case PCIE_PCIE2PCI_BRIDGE:
2416 		port_name = "PCIE2PCI";
2417 		break;
2418 	case PCIE_PCI2PCIE_BRIDGE:
2419 		port_name = "PCI2PCIE";
2420 		break;
2421 	case PCIE_ROOT_END_POINT:
2422 		port_name = "ROOTDEV";
2423 		break;
2424 	case PCIE_ROOT_EVT_COLL:
2425 		port_name = "ROOTEVTC";
2426 		break;
2427 	default:
2428 		port_name = NULL;
2429 		break;
2430 	}
2431 	if ((port_type == PCIE_ROOT_PORT ||
2432 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2433 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2434 		port_name = NULL;
2435 	if (port_name != NULL)
2436 		kprintf("[%s]", port_name);
2437 
2438 	if (pcie_slotimpl(cfg)) {
2439 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2440 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2441 			kprintf("[HOTPLUG]");
2442 	}
2443 	kprintf("\n");
2444 }
2445 
2446 static int
2447 pci_porten(device_t pcib, int b, int s, int f)
2448 {
2449 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2450 		& PCIM_CMD_PORTEN) != 0;
2451 }
2452 
2453 static int
2454 pci_memen(device_t pcib, int b, int s, int f)
2455 {
2456 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2457 		& PCIM_CMD_MEMEN) != 0;
2458 }
2459 
2460 /*
2461  * Add a resource based on a pci map register. Return 1 if the map
2462  * register is a 32bit map register or 2 if it is a 64bit register.
2463  */
2464 static int
2465 pci_add_map(device_t pcib, device_t bus, device_t dev,
2466     int b, int s, int f, int reg, struct resource_list *rl, int force,
2467     int prefetch)
2468 {
2469 	uint32_t map;
2470 	uint16_t old_cmd;
2471 	pci_addr_t base;
2472 	pci_addr_t start, end, count;
2473 	uint8_t ln2size;
2474 	uint8_t ln2range;
2475 	uint32_t testval;
2476 	uint16_t cmd;
2477 	int type;
2478 	int barlen;
2479 	struct resource *res;
2480 
2481 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2482 
2483         /* Disable access to device memory */
2484 	old_cmd = 0;
2485 	if (PCI_BAR_MEM(map)) {
2486 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2487 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2488 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2489 	}
2490 
2491 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2492 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2493 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2494 
2495         /* Restore memory access mode */
2496 	if (PCI_BAR_MEM(map)) {
2497 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2498 	}
2499 
2500 	if (PCI_BAR_MEM(map)) {
2501 		type = SYS_RES_MEMORY;
2502 		if (map & PCIM_BAR_MEM_PREFETCH)
2503 			prefetch = 1;
2504 	} else
2505 		type = SYS_RES_IOPORT;
2506 	ln2size = pci_mapsize(testval);
2507 	ln2range = pci_maprange(testval);
2508 	base = pci_mapbase(map);
2509 	barlen = ln2range == 64 ? 2 : 1;
2510 
2511 	/*
2512 	 * For I/O registers, if bottom bit is set, and the next bit up
2513 	 * isn't clear, we know we have a BAR that doesn't conform to the
2514 	 * spec, so ignore it.  Also, sanity check the size of the data
2515 	 * areas to the type of memory involved.  Memory must be at least
2516 	 * 16 bytes in size, while I/O ranges must be at least 4.
2517 	 */
2518 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2519 		return (barlen);
2520 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2521 	    (type == SYS_RES_IOPORT && ln2size < 2))
2522 		return (barlen);
2523 
2524 	if (ln2range == 64)
2525 		/* Read the other half of a 64bit map register */
2526 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2527 	if (bootverbose) {
2528 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2529 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2530 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2531 			kprintf(", port disabled\n");
2532 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2533 			kprintf(", memory disabled\n");
2534 		else
2535 			kprintf(", enabled\n");
2536 	}
2537 
2538 	/*
2539 	 * If base is 0, then we have problems.  It is best to ignore
2540 	 * such entries for the moment.  These will be allocated later if
2541 	 * the driver specifically requests them.  However, some
2542 	 * removable busses look better when all resources are allocated,
2543 	 * so allow '0' to be overriden.
2544 	 *
2545 	 * Similarly treat maps whose values is the same as the test value
2546 	 * read back.  These maps have had all f's written to them by the
2547 	 * BIOS in an attempt to disable the resources.
2548 	 */
2549 	if (!force && (base == 0 || map == testval))
2550 		return (barlen);
2551 	if ((u_long)base != base) {
2552 		device_printf(bus,
2553 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2554 		    pci_get_domain(dev), b, s, f, reg);
2555 		return (barlen);
2556 	}
2557 
2558 	/*
2559 	 * This code theoretically does the right thing, but has
2560 	 * undesirable side effects in some cases where peripherals
2561 	 * respond oddly to having these bits enabled.  Let the user
2562 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2563 	 * default).
2564 	 */
2565 	if (pci_enable_io_modes) {
2566 		/* Turn on resources that have been left off by a lazy BIOS */
2567 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2568 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2569 			cmd |= PCIM_CMD_PORTEN;
2570 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2571 		}
2572 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2573 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2574 			cmd |= PCIM_CMD_MEMEN;
2575 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2576 		}
2577 	} else {
2578 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2579 			return (barlen);
2580 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2581 			return (barlen);
2582 	}
2583 
2584 	count = 1 << ln2size;
2585 	if (base == 0 || base == pci_mapbase(testval)) {
2586 		start = 0;	/* Let the parent decide. */
2587 		end = ~0ULL;
2588 	} else {
2589 		start = base;
2590 		end = base + (1 << ln2size) - 1;
2591 	}
2592 	resource_list_add(rl, type, reg, start, end, count, -1);
2593 
2594 	/*
2595 	 * Try to allocate the resource for this BAR from our parent
2596 	 * so that this resource range is already reserved.  The
2597 	 * driver for this device will later inherit this resource in
2598 	 * pci_alloc_resource().
2599 	 */
2600 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2601 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2602 	if (res == NULL) {
2603 		/*
2604 		 * If the allocation fails, delete the resource list
2605 		 * entry to force pci_alloc_resource() to allocate
2606 		 * resources from the parent.
2607 		 */
2608 		resource_list_delete(rl, type, reg);
2609 #ifdef PCI_BAR_CLEAR
2610 		/* Clear the BAR */
2611 		start = 0;
2612 #else	/* !PCI_BAR_CLEAR */
2613 		/*
2614 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2615 		 * PCI function, clearing the BAR causes HPET timer
2616 		 * stop ticking.
2617 		 */
2618 		if (bootverbose) {
2619 			kprintf("pci:%d:%d:%d: resource reservation failed "
2620 				"%#jx - %#jx\n", b, s, f,
2621 				(intmax_t)start, (intmax_t)end);
2622 		}
2623 		return (barlen);
2624 #endif	/* PCI_BAR_CLEAR */
2625 	} else {
2626 		start = rman_get_start(res);
2627 	}
2628 	pci_write_config(dev, reg, start, 4);
2629 	if (ln2range == 64)
2630 		pci_write_config(dev, reg + 4, start >> 32, 4);
2631 	return (barlen);
2632 }
2633 
2634 /*
2635  * For ATA devices we need to decide early what addressing mode to use.
2636  * Legacy demands that the primary and secondary ATA ports sits on the
2637  * same addresses that old ISA hardware did. This dictates that we use
2638  * those addresses and ignore the BAR's if we cannot set PCI native
2639  * addressing mode.
2640  */
2641 static void
2642 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2643     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2644 {
2645 	int rid, type, progif;
2646 #if 0
2647 	/* if this device supports PCI native addressing use it */
2648 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2649 	if ((progif & 0x8a) == 0x8a) {
2650 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2651 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2652 			kprintf("Trying ATA native PCI addressing mode\n");
2653 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2654 		}
2655 	}
2656 #endif
2657 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2658 	type = SYS_RES_IOPORT;
2659 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2660 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2661 		    prefetchmask & (1 << 0));
2662 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2663 		    prefetchmask & (1 << 1));
2664 	} else {
2665 		rid = PCIR_BAR(0);
2666 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2667 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2668 		    0, -1);
2669 		rid = PCIR_BAR(1);
2670 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2671 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2672 		    0, -1);
2673 	}
2674 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2675 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2676 		    prefetchmask & (1 << 2));
2677 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2678 		    prefetchmask & (1 << 3));
2679 	} else {
2680 		rid = PCIR_BAR(2);
2681 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2682 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2683 		    0, -1);
2684 		rid = PCIR_BAR(3);
2685 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2686 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2687 		    0, -1);
2688 	}
2689 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2690 	    prefetchmask & (1 << 4));
2691 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2692 	    prefetchmask & (1 << 5));
2693 }
2694 
2695 static void
2696 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2697 {
2698 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2699 	pcicfgregs *cfg = &dinfo->cfg;
2700 	char tunable_name[64];
2701 	int irq;
2702 
2703 	/* Has to have an intpin to have an interrupt. */
2704 	if (cfg->intpin == 0)
2705 		return;
2706 
2707 	/* Let the user override the IRQ with a tunable. */
2708 	irq = PCI_INVALID_IRQ;
2709 	ksnprintf(tunable_name, sizeof(tunable_name),
2710 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2711 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2712 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2713 		if (irq >= 255 || irq <= 0) {
2714 			irq = PCI_INVALID_IRQ;
2715 		} else {
2716 			if (machintr_legacy_intr_find(irq,
2717 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2718 				device_printf(dev,
2719 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2720 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2721 				    cfg->intpin + 'A' - 1, irq);
2722 				irq = PCI_INVALID_IRQ;
2723 			} else {
2724 				BUS_CONFIG_INTR(bus, dev, irq,
2725 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2726 			}
2727 		}
2728 	}
2729 
2730 	/*
2731 	 * If we didn't get an IRQ via the tunable, then we either use the
2732 	 * IRQ value in the intline register or we ask the bus to route an
2733 	 * interrupt for us.  If force_route is true, then we only use the
2734 	 * value in the intline register if the bus was unable to assign an
2735 	 * IRQ.
2736 	 */
2737 	if (!PCI_INTERRUPT_VALID(irq)) {
2738 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2739 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2740 		if (!PCI_INTERRUPT_VALID(irq))
2741 			irq = cfg->intline;
2742 	}
2743 
2744 	/* If after all that we don't have an IRQ, just bail. */
2745 	if (!PCI_INTERRUPT_VALID(irq))
2746 		return;
2747 
2748 	/* Update the config register if it changed. */
2749 	if (irq != cfg->intline) {
2750 		cfg->intline = irq;
2751 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2752 	}
2753 
2754 	/* Add this IRQ as rid 0 interrupt resource. */
2755 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2756 	    machintr_legacy_intr_cpuid(irq));
2757 }
2758 
2759 void
2760 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2761 {
2762 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2763 	pcicfgregs *cfg = &dinfo->cfg;
2764 	struct resource_list *rl = &dinfo->resources;
2765 	struct pci_quirk *q;
2766 	int b, i, f, s;
2767 
2768 	b = cfg->bus;
2769 	s = cfg->slot;
2770 	f = cfg->func;
2771 
2772 	/* ATA devices needs special map treatment */
2773 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2774 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2775 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2776 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2777 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2778 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2779 	else
2780 		for (i = 0; i < cfg->nummaps;)
2781 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2782 			    rl, force, prefetchmask & (1 << i));
2783 
2784 	/*
2785 	 * Add additional, quirked resources.
2786 	 */
2787 	for (q = &pci_quirks[0]; q->devid; q++) {
2788 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2789 		    && q->type == PCI_QUIRK_MAP_REG)
2790 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2791 			  force, 0);
2792 	}
2793 
2794 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2795 		/*
2796 		 * Try to re-route interrupts. Sometimes the BIOS or
2797 		 * firmware may leave bogus values in these registers.
2798 		 * If the re-route fails, then just stick with what we
2799 		 * have.
2800 		 */
2801 		pci_assign_interrupt(bus, dev, 1);
2802 	}
2803 }
2804 
2805 void
2806 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2807 {
2808 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2809 	device_t pcib = device_get_parent(dev);
2810 	struct pci_devinfo *dinfo;
2811 	int maxslots;
2812 	int s, f, pcifunchigh;
2813 	uint8_t hdrtype;
2814 
2815 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2816 	    ("dinfo_size too small"));
2817 	maxslots = PCIB_MAXSLOTS(pcib);
2818 	for (s = 0; s <= maxslots; s++) {
2819 		pcifunchigh = 0;
2820 		f = 0;
2821 		DELAY(1);
2822 		hdrtype = REG(PCIR_HDRTYPE, 1);
2823 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2824 			continue;
2825 		if (hdrtype & PCIM_MFDEV)
2826 			pcifunchigh = PCI_FUNCMAX;
2827 		for (f = 0; f <= pcifunchigh; f++) {
2828 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2829 			    dinfo_size);
2830 			if (dinfo != NULL) {
2831 				pci_add_child(dev, dinfo);
2832 			}
2833 		}
2834 	}
2835 #undef REG
2836 }
2837 
2838 void
2839 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2840 {
2841 	device_t pcib;
2842 
2843 	pcib = device_get_parent(bus);
2844 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2845 	device_set_ivars(dinfo->cfg.dev, dinfo);
2846 	resource_list_init(&dinfo->resources);
2847 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2848 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2849 	pci_print_verbose(dinfo);
2850 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2851 }
2852 
2853 static int
2854 pci_probe(device_t dev)
2855 {
2856 	device_set_desc(dev, "PCI bus");
2857 
2858 	/* Allow other subclasses to override this driver. */
2859 	return (-1000);
2860 }
2861 
2862 static int
2863 pci_attach(device_t dev)
2864 {
2865 	int busno, domain;
2866 
2867 	/*
2868 	 * Since there can be multiple independantly numbered PCI
2869 	 * busses on systems with multiple PCI domains, we can't use
2870 	 * the unit number to decide which bus we are probing. We ask
2871 	 * the parent pcib what our domain and bus numbers are.
2872 	 */
2873 	domain = pcib_get_domain(dev);
2874 	busno = pcib_get_bus(dev);
2875 	if (bootverbose)
2876 		device_printf(dev, "domain=%d, physical bus=%d\n",
2877 		    domain, busno);
2878 
2879 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2880 
2881 	return (bus_generic_attach(dev));
2882 }
2883 
2884 int
2885 pci_suspend(device_t dev)
2886 {
2887 	int dstate, error, i, numdevs;
2888 	device_t acpi_dev, child, *devlist;
2889 	struct pci_devinfo *dinfo;
2890 
2891 	/*
2892 	 * Save the PCI configuration space for each child and set the
2893 	 * device in the appropriate power state for this sleep state.
2894 	 */
2895 	acpi_dev = NULL;
2896 	if (pci_do_power_resume)
2897 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2898 	device_get_children(dev, &devlist, &numdevs);
2899 	for (i = 0; i < numdevs; i++) {
2900 		child = devlist[i];
2901 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2902 		pci_cfg_save(child, dinfo, 0);
2903 	}
2904 
2905 	/* Suspend devices before potentially powering them down. */
2906 	error = bus_generic_suspend(dev);
2907 	if (error) {
2908 		kfree(devlist, M_TEMP);
2909 		return (error);
2910 	}
2911 
2912 	/*
2913 	 * Always set the device to D3.  If ACPI suggests a different
2914 	 * power state, use it instead.  If ACPI is not present, the
2915 	 * firmware is responsible for managing device power.  Skip
2916 	 * children who aren't attached since they are powered down
2917 	 * separately.  Only manage type 0 devices for now.
2918 	 */
2919 	for (i = 0; acpi_dev && i < numdevs; i++) {
2920 		child = devlist[i];
2921 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2922 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2923 			dstate = PCI_POWERSTATE_D3;
2924 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2925 			pci_set_powerstate(child, dstate);
2926 		}
2927 	}
2928 	kfree(devlist, M_TEMP);
2929 	return (0);
2930 }
2931 
2932 int
2933 pci_resume(device_t dev)
2934 {
2935 	int i, numdevs;
2936 	device_t acpi_dev, child, *devlist;
2937 	struct pci_devinfo *dinfo;
2938 
2939 	/*
2940 	 * Set each child to D0 and restore its PCI configuration space.
2941 	 */
2942 	acpi_dev = NULL;
2943 	if (pci_do_power_resume)
2944 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2945 	device_get_children(dev, &devlist, &numdevs);
2946 	for (i = 0; i < numdevs; i++) {
2947 		/*
2948 		 * Notify ACPI we're going to D0 but ignore the result.  If
2949 		 * ACPI is not present, the firmware is responsible for
2950 		 * managing device power.  Only manage type 0 devices for now.
2951 		 */
2952 		child = devlist[i];
2953 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2954 		if (acpi_dev && device_is_attached(child) &&
2955 		    dinfo->cfg.hdrtype == 0) {
2956 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2957 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2958 		}
2959 
2960 		/* Now the device is powered up, restore its config space. */
2961 		pci_cfg_restore(child, dinfo);
2962 	}
2963 	kfree(devlist, M_TEMP);
2964 	return (bus_generic_resume(dev));
2965 }
2966 
2967 static void
2968 pci_load_vendor_data(void)
2969 {
2970 	caddr_t vendordata, info;
2971 
2972 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2973 		info = preload_search_info(vendordata, MODINFO_ADDR);
2974 		pci_vendordata = *(char **)info;
2975 		info = preload_search_info(vendordata, MODINFO_SIZE);
2976 		pci_vendordata_size = *(size_t *)info;
2977 		/* terminate the database */
2978 		pci_vendordata[pci_vendordata_size] = '\n';
2979 	}
2980 }
2981 
2982 void
2983 pci_driver_added(device_t dev, driver_t *driver)
2984 {
2985 	int numdevs;
2986 	device_t *devlist;
2987 	device_t child;
2988 	struct pci_devinfo *dinfo;
2989 	int i;
2990 
2991 	if (bootverbose)
2992 		device_printf(dev, "driver added\n");
2993 	DEVICE_IDENTIFY(driver, dev);
2994 	device_get_children(dev, &devlist, &numdevs);
2995 	for (i = 0; i < numdevs; i++) {
2996 		child = devlist[i];
2997 		if (device_get_state(child) != DS_NOTPRESENT)
2998 			continue;
2999 		dinfo = device_get_ivars(child);
3000 		pci_print_verbose(dinfo);
3001 		if (bootverbose)
3002 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3003 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3004 			    dinfo->cfg.func);
3005 		pci_cfg_restore(child, dinfo);
3006 		if (device_probe_and_attach(child) != 0)
3007 			pci_cfg_save(child, dinfo, 1);
3008 	}
3009 	kfree(devlist, M_TEMP);
3010 }
3011 
3012 static void
3013 pci_child_detached(device_t parent __unused, device_t child)
3014 {
3015 	/* Turn child's power off */
3016 	pci_cfg_save(child, device_get_ivars(child), 1);
3017 }
3018 
3019 int
3020 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3021     driver_intr_t *intr, void *arg, void **cookiep,
3022     lwkt_serialize_t serializer, const char *desc)
3023 {
3024 	int rid, error;
3025 	void *cookie;
3026 
3027 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3028 	    arg, &cookie, serializer, desc);
3029 	if (error)
3030 		return (error);
3031 
3032 	/* If this is not a direct child, just bail out. */
3033 	if (device_get_parent(child) != dev) {
3034 		*cookiep = cookie;
3035 		return(0);
3036 	}
3037 
3038 	rid = rman_get_rid(irq);
3039 	if (rid == 0) {
3040 		/* Make sure that INTx is enabled */
3041 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3042 	} else {
3043 		struct pci_devinfo *dinfo = device_get_ivars(child);
3044 		uint64_t addr;
3045 		uint32_t data;
3046 
3047 		/*
3048 		 * Check to see if the interrupt is MSI or MSI-X.
3049 		 * Ask our parent to map the MSI and give
3050 		 * us the address and data register values.
3051 		 * If we fail for some reason, teardown the
3052 		 * interrupt handler.
3053 		 */
3054 		if (dinfo->cfg.msi.msi_alloc > 0) {
3055 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3056 
3057 			if (msi->msi_addr == 0) {
3058 				KASSERT(msi->msi_handlers == 0,
3059 			    ("MSI has handlers, but vectors not mapped"));
3060 				error = PCIB_MAP_MSI(device_get_parent(dev),
3061 				    child, rman_get_start(irq), &addr, &data,
3062 				    rman_get_cpuid(irq));
3063 				if (error)
3064 					goto bad;
3065 				msi->msi_addr = addr;
3066 				msi->msi_data = data;
3067 				pci_enable_msi(child, addr, data);
3068 			}
3069 			msi->msi_handlers++;
3070 		} else {
3071 			struct msix_vector *mv;
3072 			u_int vector;
3073 
3074 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3075 			    ("No MSI-X or MSI rid %d allocated", rid));
3076 
3077 			mv = pci_find_msix_vector(child, rid);
3078 			KASSERT(mv != NULL,
3079 			    ("MSI-X rid %d is not allocated", rid));
3080 			KASSERT(mv->mv_address == 0,
3081 			    ("MSI-X rid %d has been setup", rid));
3082 
3083 			error = PCIB_MAP_MSI(device_get_parent(dev),
3084 			    child, rman_get_start(irq), &addr, &data,
3085 			    rman_get_cpuid(irq));
3086 			if (error)
3087 				goto bad;
3088 			mv->mv_address = addr;
3089 			mv->mv_data = data;
3090 
3091 			vector = PCI_MSIX_RID2VEC(rid);
3092 			pci_setup_msix_vector(child, vector,
3093 			    mv->mv_address, mv->mv_data);
3094 			pci_unmask_msix_vector(child, vector);
3095 		}
3096 
3097 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3098 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3099 	bad:
3100 		if (error) {
3101 			(void)bus_generic_teardown_intr(dev, child, irq,
3102 			    cookie);
3103 			return (error);
3104 		}
3105 	}
3106 	*cookiep = cookie;
3107 	return (0);
3108 }
3109 
3110 int
3111 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3112     void *cookie)
3113 {
3114 	int rid, error;
3115 
3116 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3117 		return (EINVAL);
3118 
3119 	/* If this isn't a direct child, just bail out */
3120 	if (device_get_parent(child) != dev)
3121 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3122 
3123 	rid = rman_get_rid(irq);
3124 	if (rid == 0) {
3125 		/* Mask INTx */
3126 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3127 	} else {
3128 		struct pci_devinfo *dinfo = device_get_ivars(child);
3129 
3130 		/*
3131 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3132 		 * decrement the appropriate handlers count and mask the
3133 		 * MSI-X message, or disable MSI messages if the count
3134 		 * drops to 0.
3135 		 */
3136 		if (dinfo->cfg.msi.msi_alloc > 0) {
3137 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3138 
3139 			KASSERT(rid <= msi->msi_alloc,
3140 			    ("MSI-X index too high"));
3141 			KASSERT(msi->msi_handlers > 0,
3142 			    ("MSI rid %d is not setup", rid));
3143 
3144 			msi->msi_handlers--;
3145 			if (msi->msi_handlers == 0)
3146 				pci_disable_msi(child);
3147 		} else {
3148 			struct msix_vector *mv;
3149 
3150 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3151 			    ("No MSI or MSI-X rid %d allocated", rid));
3152 
3153 			mv = pci_find_msix_vector(child, rid);
3154 			KASSERT(mv != NULL,
3155 			    ("MSI-X rid %d is not allocated", rid));
3156 			KASSERT(mv->mv_address != 0,
3157 			    ("MSI-X rid %d has not been setup", rid));
3158 
3159 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3160 			mv->mv_address = 0;
3161 			mv->mv_data = 0;
3162 		}
3163 	}
3164 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3165 	if (rid > 0)
3166 		KASSERT(error == 0,
3167 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3168 	return (error);
3169 }
3170 
3171 int
3172 pci_print_child(device_t dev, device_t child)
3173 {
3174 	struct pci_devinfo *dinfo;
3175 	struct resource_list *rl;
3176 	int retval = 0;
3177 
3178 	dinfo = device_get_ivars(child);
3179 	rl = &dinfo->resources;
3180 
3181 	retval += bus_print_child_header(dev, child);
3182 
3183 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3184 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3185 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3186 	if (device_get_flags(dev))
3187 		retval += kprintf(" flags %#x", device_get_flags(dev));
3188 
3189 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3190 	    pci_get_function(child));
3191 
3192 	retval += bus_print_child_footer(dev, child);
3193 
3194 	return (retval);
3195 }
3196 
3197 static struct
3198 {
3199 	int	class;
3200 	int	subclass;
3201 	char	*desc;
3202 } pci_nomatch_tab[] = {
3203 	{PCIC_OLD,		-1,			"old"},
3204 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3205 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3206 	{PCIC_STORAGE,		-1,			"mass storage"},
3207 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3208 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3209 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3210 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3211 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3212 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3213 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3214 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3215 	{PCIC_NETWORK,		-1,			"network"},
3216 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3217 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3218 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3219 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3220 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3221 	{PCIC_DISPLAY,		-1,			"display"},
3222 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3223 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3224 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3225 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3226 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3227 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3228 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3229 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3230 	{PCIC_MEMORY,		-1,			"memory"},
3231 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3232 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3233 	{PCIC_BRIDGE,		-1,			"bridge"},
3234 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3235 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3236 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3237 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3238 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3239 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3240 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3241 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3242 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3243 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3244 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3245 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3246 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3247 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3248 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3249 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3250 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3251 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3252 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3253 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3254 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3255 	{PCIC_INPUTDEV,		-1,			"input device"},
3256 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3257 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3258 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3259 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3260 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3261 	{PCIC_DOCKING,		-1,			"docking station"},
3262 	{PCIC_PROCESSOR,	-1,			"processor"},
3263 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3264 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3265 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3266 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3267 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3268 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3269 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3270 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3271 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3272 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3273 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3274 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3275 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3276 	{PCIC_SATCOM,		-1,			"satellite communication"},
3277 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3278 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3279 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3280 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3281 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3282 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3283 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3284 	{PCIC_DASP,		-1,			"dasp"},
3285 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3286 	{0, 0,		NULL}
3287 };
3288 
3289 void
3290 pci_probe_nomatch(device_t dev, device_t child)
3291 {
3292 	int	i;
3293 	char	*cp, *scp, *device;
3294 
3295 	/*
3296 	 * Look for a listing for this device in a loaded device database.
3297 	 */
3298 	if ((device = pci_describe_device(child)) != NULL) {
3299 		device_printf(dev, "<%s>", device);
3300 		kfree(device, M_DEVBUF);
3301 	} else {
3302 		/*
3303 		 * Scan the class/subclass descriptions for a general
3304 		 * description.
3305 		 */
3306 		cp = "unknown";
3307 		scp = NULL;
3308 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3309 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3310 				if (pci_nomatch_tab[i].subclass == -1) {
3311 					cp = pci_nomatch_tab[i].desc;
3312 				} else if (pci_nomatch_tab[i].subclass ==
3313 				    pci_get_subclass(child)) {
3314 					scp = pci_nomatch_tab[i].desc;
3315 				}
3316 			}
3317 		}
3318 		device_printf(dev, "<%s%s%s>",
3319 		    cp ? cp : "",
3320 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3321 		    scp ? scp : "");
3322 	}
3323 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3324 		pci_get_vendor(child), pci_get_device(child),
3325 		pci_get_slot(child), pci_get_function(child));
3326 	if (pci_get_intpin(child) > 0) {
3327 		int irq;
3328 
3329 		irq = pci_get_irq(child);
3330 		if (PCI_INTERRUPT_VALID(irq))
3331 			kprintf(" irq %d", irq);
3332 	}
3333 	kprintf("\n");
3334 
3335 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3336 }
3337 
3338 /*
3339  * Parse the PCI device database, if loaded, and return a pointer to a
3340  * description of the device.
3341  *
3342  * The database is flat text formatted as follows:
3343  *
3344  * Any line not in a valid format is ignored.
3345  * Lines are terminated with newline '\n' characters.
3346  *
3347  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3348  * the vendor name.
3349  *
3350  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3351  * - devices cannot be listed without a corresponding VENDOR line.
3352  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3353  * another TAB, then the device name.
3354  */
3355 
3356 /*
3357  * Assuming (ptr) points to the beginning of a line in the database,
3358  * return the vendor or device and description of the next entry.
3359  * The value of (vendor) or (device) inappropriate for the entry type
3360  * is set to -1.  Returns nonzero at the end of the database.
3361  *
3362  * Note that this is slightly unrobust in the face of corrupt data;
3363  * we attempt to safeguard against this by spamming the end of the
3364  * database with a newline when we initialise.
3365  */
3366 static int
3367 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3368 {
3369 	char	*cp = *ptr;
3370 	int	left;
3371 
3372 	*device = -1;
3373 	*vendor = -1;
3374 	**desc = '\0';
3375 	for (;;) {
3376 		left = pci_vendordata_size - (cp - pci_vendordata);
3377 		if (left <= 0) {
3378 			*ptr = cp;
3379 			return(1);
3380 		}
3381 
3382 		/* vendor entry? */
3383 		if (*cp != '\t' &&
3384 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3385 			break;
3386 		/* device entry? */
3387 		if (*cp == '\t' &&
3388 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3389 			break;
3390 
3391 		/* skip to next line */
3392 		while (*cp != '\n' && left > 0) {
3393 			cp++;
3394 			left--;
3395 		}
3396 		if (*cp == '\n') {
3397 			cp++;
3398 			left--;
3399 		}
3400 	}
3401 	/* skip to next line */
3402 	while (*cp != '\n' && left > 0) {
3403 		cp++;
3404 		left--;
3405 	}
3406 	if (*cp == '\n' && left > 0)
3407 		cp++;
3408 	*ptr = cp;
3409 	return(0);
3410 }
3411 
3412 static char *
3413 pci_describe_device(device_t dev)
3414 {
3415 	int	vendor, device;
3416 	char	*desc, *vp, *dp, *line;
3417 
3418 	desc = vp = dp = NULL;
3419 
3420 	/*
3421 	 * If we have no vendor data, we can't do anything.
3422 	 */
3423 	if (pci_vendordata == NULL)
3424 		goto out;
3425 
3426 	/*
3427 	 * Scan the vendor data looking for this device
3428 	 */
3429 	line = pci_vendordata;
3430 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3431 		goto out;
3432 	for (;;) {
3433 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3434 			goto out;
3435 		if (vendor == pci_get_vendor(dev))
3436 			break;
3437 	}
3438 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3439 		goto out;
3440 	for (;;) {
3441 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3442 			*dp = 0;
3443 			break;
3444 		}
3445 		if (vendor != -1) {
3446 			*dp = 0;
3447 			break;
3448 		}
3449 		if (device == pci_get_device(dev))
3450 			break;
3451 	}
3452 	if (dp[0] == '\0')
3453 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3454 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3455 	    NULL)
3456 		ksprintf(desc, "%s, %s", vp, dp);
3457  out:
3458 	if (vp != NULL)
3459 		kfree(vp, M_DEVBUF);
3460 	if (dp != NULL)
3461 		kfree(dp, M_DEVBUF);
3462 	return(desc);
3463 }
3464 
3465 int
3466 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3467 {
3468 	struct pci_devinfo *dinfo;
3469 	pcicfgregs *cfg;
3470 
3471 	dinfo = device_get_ivars(child);
3472 	cfg = &dinfo->cfg;
3473 
3474 	switch (which) {
3475 	case PCI_IVAR_ETHADDR:
3476 		/*
3477 		 * The generic accessor doesn't deal with failure, so
3478 		 * we set the return value, then return an error.
3479 		 */
3480 		*((uint8_t **) result) = NULL;
3481 		return (EINVAL);
3482 	case PCI_IVAR_SUBVENDOR:
3483 		*result = cfg->subvendor;
3484 		break;
3485 	case PCI_IVAR_SUBDEVICE:
3486 		*result = cfg->subdevice;
3487 		break;
3488 	case PCI_IVAR_VENDOR:
3489 		*result = cfg->vendor;
3490 		break;
3491 	case PCI_IVAR_DEVICE:
3492 		*result = cfg->device;
3493 		break;
3494 	case PCI_IVAR_DEVID:
3495 		*result = (cfg->device << 16) | cfg->vendor;
3496 		break;
3497 	case PCI_IVAR_CLASS:
3498 		*result = cfg->baseclass;
3499 		break;
3500 	case PCI_IVAR_SUBCLASS:
3501 		*result = cfg->subclass;
3502 		break;
3503 	case PCI_IVAR_PROGIF:
3504 		*result = cfg->progif;
3505 		break;
3506 	case PCI_IVAR_REVID:
3507 		*result = cfg->revid;
3508 		break;
3509 	case PCI_IVAR_INTPIN:
3510 		*result = cfg->intpin;
3511 		break;
3512 	case PCI_IVAR_IRQ:
3513 		*result = cfg->intline;
3514 		break;
3515 	case PCI_IVAR_DOMAIN:
3516 		*result = cfg->domain;
3517 		break;
3518 	case PCI_IVAR_BUS:
3519 		*result = cfg->bus;
3520 		break;
3521 	case PCI_IVAR_SLOT:
3522 		*result = cfg->slot;
3523 		break;
3524 	case PCI_IVAR_FUNCTION:
3525 		*result = cfg->func;
3526 		break;
3527 	case PCI_IVAR_CMDREG:
3528 		*result = cfg->cmdreg;
3529 		break;
3530 	case PCI_IVAR_CACHELNSZ:
3531 		*result = cfg->cachelnsz;
3532 		break;
3533 	case PCI_IVAR_MINGNT:
3534 		*result = cfg->mingnt;
3535 		break;
3536 	case PCI_IVAR_MAXLAT:
3537 		*result = cfg->maxlat;
3538 		break;
3539 	case PCI_IVAR_LATTIMER:
3540 		*result = cfg->lattimer;
3541 		break;
3542 	case PCI_IVAR_PCIXCAP_PTR:
3543 		*result = cfg->pcix.pcix_ptr;
3544 		break;
3545 	case PCI_IVAR_PCIECAP_PTR:
3546 		*result = cfg->expr.expr_ptr;
3547 		break;
3548 	case PCI_IVAR_VPDCAP_PTR:
3549 		*result = cfg->vpd.vpd_reg;
3550 		break;
3551 	default:
3552 		return (ENOENT);
3553 	}
3554 	return (0);
3555 }
3556 
3557 int
3558 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3559 {
3560 	struct pci_devinfo *dinfo;
3561 
3562 	dinfo = device_get_ivars(child);
3563 
3564 	switch (which) {
3565 	case PCI_IVAR_INTPIN:
3566 		dinfo->cfg.intpin = value;
3567 		return (0);
3568 	case PCI_IVAR_ETHADDR:
3569 	case PCI_IVAR_SUBVENDOR:
3570 	case PCI_IVAR_SUBDEVICE:
3571 	case PCI_IVAR_VENDOR:
3572 	case PCI_IVAR_DEVICE:
3573 	case PCI_IVAR_DEVID:
3574 	case PCI_IVAR_CLASS:
3575 	case PCI_IVAR_SUBCLASS:
3576 	case PCI_IVAR_PROGIF:
3577 	case PCI_IVAR_REVID:
3578 	case PCI_IVAR_IRQ:
3579 	case PCI_IVAR_DOMAIN:
3580 	case PCI_IVAR_BUS:
3581 	case PCI_IVAR_SLOT:
3582 	case PCI_IVAR_FUNCTION:
3583 		return (EINVAL);	/* disallow for now */
3584 
3585 	default:
3586 		return (ENOENT);
3587 	}
3588 }
3589 #ifdef notyet
3590 #include "opt_ddb.h"
3591 #ifdef DDB
3592 #include <ddb/ddb.h>
3593 #include <sys/cons.h>
3594 
3595 /*
3596  * List resources based on pci map registers, used for within ddb
3597  */
3598 
3599 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3600 {
3601 	struct pci_devinfo *dinfo;
3602 	struct devlist *devlist_head;
3603 	struct pci_conf *p;
3604 	const char *name;
3605 	int i, error, none_count;
3606 
3607 	none_count = 0;
3608 	/* get the head of the device queue */
3609 	devlist_head = &pci_devq;
3610 
3611 	/*
3612 	 * Go through the list of devices and print out devices
3613 	 */
3614 	for (error = 0, i = 0,
3615 	     dinfo = STAILQ_FIRST(devlist_head);
3616 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3617 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3618 
3619 		/* Populate pd_name and pd_unit */
3620 		name = NULL;
3621 		if (dinfo->cfg.dev)
3622 			name = device_get_name(dinfo->cfg.dev);
3623 
3624 		p = &dinfo->conf;
3625 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3626 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3627 			(name && *name) ? name : "none",
3628 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3629 			none_count++,
3630 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3631 			p->pc_sel.pc_func, (p->pc_class << 16) |
3632 			(p->pc_subclass << 8) | p->pc_progif,
3633 			(p->pc_subdevice << 16) | p->pc_subvendor,
3634 			(p->pc_device << 16) | p->pc_vendor,
3635 			p->pc_revid, p->pc_hdr);
3636 	}
3637 }
3638 #endif /* DDB */
3639 #endif
3640 
3641 static struct resource *
3642 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3643     u_long start, u_long end, u_long count, u_int flags)
3644 {
3645 	struct pci_devinfo *dinfo = device_get_ivars(child);
3646 	struct resource_list *rl = &dinfo->resources;
3647 	struct resource_list_entry *rle;
3648 	struct resource *res;
3649 	pci_addr_t map, testval;
3650 	int mapsize;
3651 
3652 	/*
3653 	 * Weed out the bogons, and figure out how large the BAR/map
3654 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3655 	 * Note: atapci in legacy mode are special and handled elsewhere
3656 	 * in the code.  If you have a atapci device in legacy mode and
3657 	 * it fails here, that other code is broken.
3658 	 */
3659 	res = NULL;
3660 	map = pci_read_config(child, *rid, 4);
3661 	pci_write_config(child, *rid, 0xffffffff, 4);
3662 	testval = pci_read_config(child, *rid, 4);
3663 	if (pci_maprange(testval) == 64)
3664 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3665 	if (pci_mapbase(testval) == 0)
3666 		goto out;
3667 
3668 	/*
3669 	 * Restore the original value of the BAR.  We may have reprogrammed
3670 	 * the BAR of the low-level console device and when booting verbose,
3671 	 * we need the console device addressable.
3672 	 */
3673 	pci_write_config(child, *rid, map, 4);
3674 
3675 	if (PCI_BAR_MEM(testval)) {
3676 		if (type != SYS_RES_MEMORY) {
3677 			if (bootverbose)
3678 				device_printf(dev,
3679 				    "child %s requested type %d for rid %#x,"
3680 				    " but the BAR says it is an memio\n",
3681 				    device_get_nameunit(child), type, *rid);
3682 			goto out;
3683 		}
3684 	} else {
3685 		if (type != SYS_RES_IOPORT) {
3686 			if (bootverbose)
3687 				device_printf(dev,
3688 				    "child %s requested type %d for rid %#x,"
3689 				    " but the BAR says it is an ioport\n",
3690 				    device_get_nameunit(child), type, *rid);
3691 			goto out;
3692 		}
3693 	}
3694 	/*
3695 	 * For real BARs, we need to override the size that
3696 	 * the driver requests, because that's what the BAR
3697 	 * actually uses and we would otherwise have a
3698 	 * situation where we might allocate the excess to
3699 	 * another driver, which won't work.
3700 	 */
3701 	mapsize = pci_mapsize(testval);
3702 	count = 1UL << mapsize;
3703 	if (RF_ALIGNMENT(flags) < mapsize)
3704 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3705 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3706 		flags |= RF_PREFETCHABLE;
3707 
3708 	/*
3709 	 * Allocate enough resource, and then write back the
3710 	 * appropriate bar for that resource.
3711 	 */
3712 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3713 	    start, end, count, flags, -1);
3714 	if (res == NULL) {
3715 		device_printf(child,
3716 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3717 		    count, *rid, type, start, end);
3718 		goto out;
3719 	}
3720 	resource_list_add(rl, type, *rid, start, end, count, -1);
3721 	rle = resource_list_find(rl, type, *rid);
3722 	if (rle == NULL)
3723 		panic("pci_alloc_map: unexpectedly can't find resource.");
3724 	rle->res = res;
3725 	rle->start = rman_get_start(res);
3726 	rle->end = rman_get_end(res);
3727 	rle->count = count;
3728 	if (bootverbose)
3729 		device_printf(child,
3730 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3731 		    count, *rid, type, rman_get_start(res));
3732 	map = rman_get_start(res);
3733 out:;
3734 	pci_write_config(child, *rid, map, 4);
3735 	if (pci_maprange(testval) == 64)
3736 		pci_write_config(child, *rid + 4, map >> 32, 4);
3737 	return (res);
3738 }
3739 
3740 
3741 struct resource *
3742 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3743     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3744 {
3745 	struct pci_devinfo *dinfo = device_get_ivars(child);
3746 	struct resource_list *rl = &dinfo->resources;
3747 	struct resource_list_entry *rle;
3748 	pcicfgregs *cfg = &dinfo->cfg;
3749 
3750 	/*
3751 	 * Perform lazy resource allocation
3752 	 */
3753 	if (device_get_parent(child) == dev) {
3754 		switch (type) {
3755 		case SYS_RES_IRQ:
3756 			/*
3757 			 * Can't alloc legacy interrupt once MSI messages
3758 			 * have been allocated.
3759 			 */
3760 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3761 			    cfg->msix.msix_alloc > 0))
3762 				return (NULL);
3763 			/*
3764 			 * If the child device doesn't have an
3765 			 * interrupt routed and is deserving of an
3766 			 * interrupt, try to assign it one.
3767 			 */
3768 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3769 			    (cfg->intpin != 0))
3770 				pci_assign_interrupt(dev, child, 0);
3771 			break;
3772 		case SYS_RES_IOPORT:
3773 		case SYS_RES_MEMORY:
3774 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3775 				/*
3776 				 * Enable the I/O mode.  We should
3777 				 * also be assigning resources too
3778 				 * when none are present.  The
3779 				 * resource_list_alloc kind of sorta does
3780 				 * this...
3781 				 */
3782 				if (PCI_ENABLE_IO(dev, child, type))
3783 					return (NULL);
3784 			}
3785 			rle = resource_list_find(rl, type, *rid);
3786 			if (rle == NULL)
3787 				return (pci_alloc_map(dev, child, type, rid,
3788 				    start, end, count, flags));
3789 			break;
3790 		}
3791 		/*
3792 		 * If we've already allocated the resource, then
3793 		 * return it now.  But first we may need to activate
3794 		 * it, since we don't allocate the resource as active
3795 		 * above.  Normally this would be done down in the
3796 		 * nexus, but since we short-circuit that path we have
3797 		 * to do its job here.  Not sure if we should kfree the
3798 		 * resource if it fails to activate.
3799 		 */
3800 		rle = resource_list_find(rl, type, *rid);
3801 		if (rle != NULL && rle->res != NULL) {
3802 			if (bootverbose)
3803 				device_printf(child,
3804 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3805 				    rman_get_size(rle->res), *rid, type,
3806 				    rman_get_start(rle->res));
3807 			if ((flags & RF_ACTIVE) &&
3808 			    bus_generic_activate_resource(dev, child, type,
3809 			    *rid, rle->res) != 0)
3810 				return (NULL);
3811 			return (rle->res);
3812 		}
3813 	}
3814 	return (resource_list_alloc(rl, dev, child, type, rid,
3815 	    start, end, count, flags, cpuid));
3816 }
3817 
3818 void
3819 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3820 {
3821 	struct pci_devinfo *dinfo;
3822 	struct resource_list *rl;
3823 	struct resource_list_entry *rle;
3824 
3825 	if (device_get_parent(child) != dev)
3826 		return;
3827 
3828 	dinfo = device_get_ivars(child);
3829 	rl = &dinfo->resources;
3830 	rle = resource_list_find(rl, type, rid);
3831 	if (rle) {
3832 		if (rle->res) {
3833 			if (rman_get_device(rle->res) != dev ||
3834 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3835 				device_printf(dev, "delete_resource: "
3836 				    "Resource still owned by child, oops. "
3837 				    "(type=%d, rid=%d, addr=%lx)\n",
3838 				    rle->type, rle->rid,
3839 				    rman_get_start(rle->res));
3840 				return;
3841 			}
3842 			bus_release_resource(dev, type, rid, rle->res);
3843 		}
3844 		resource_list_delete(rl, type, rid);
3845 	}
3846 	/*
3847 	 * Why do we turn off the PCI configuration BAR when we delete a
3848 	 * resource? -- imp
3849 	 */
3850 	pci_write_config(child, rid, 0, 4);
3851 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3852 }
3853 
3854 struct resource_list *
3855 pci_get_resource_list (device_t dev, device_t child)
3856 {
3857 	struct pci_devinfo *dinfo = device_get_ivars(child);
3858 
3859 	if (dinfo == NULL)
3860 		return (NULL);
3861 
3862 	return (&dinfo->resources);
3863 }
3864 
3865 uint32_t
3866 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3867 {
3868 	struct pci_devinfo *dinfo = device_get_ivars(child);
3869 	pcicfgregs *cfg = &dinfo->cfg;
3870 
3871 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3872 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3873 }
3874 
3875 void
3876 pci_write_config_method(device_t dev, device_t child, int reg,
3877     uint32_t val, int width)
3878 {
3879 	struct pci_devinfo *dinfo = device_get_ivars(child);
3880 	pcicfgregs *cfg = &dinfo->cfg;
3881 
3882 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3883 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3884 }
3885 
3886 int
3887 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3888     size_t buflen)
3889 {
3890 
3891 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3892 	    pci_get_function(child));
3893 	return (0);
3894 }
3895 
3896 int
3897 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3898     size_t buflen)
3899 {
3900 	struct pci_devinfo *dinfo;
3901 	pcicfgregs *cfg;
3902 
3903 	dinfo = device_get_ivars(child);
3904 	cfg = &dinfo->cfg;
3905 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3906 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3907 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3908 	    cfg->progif);
3909 	return (0);
3910 }
3911 
3912 int
3913 pci_assign_interrupt_method(device_t dev, device_t child)
3914 {
3915 	struct pci_devinfo *dinfo = device_get_ivars(child);
3916 	pcicfgregs *cfg = &dinfo->cfg;
3917 
3918 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3919 	    cfg->intpin));
3920 }
3921 
3922 static int
3923 pci_modevent(module_t mod, int what, void *arg)
3924 {
3925 	static struct cdev *pci_cdev;
3926 
3927 	switch (what) {
3928 	case MOD_LOAD:
3929 		STAILQ_INIT(&pci_devq);
3930 		pci_generation = 0;
3931 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3932 				    "pci");
3933 		pci_load_vendor_data();
3934 		break;
3935 
3936 	case MOD_UNLOAD:
3937 		destroy_dev(pci_cdev);
3938 		break;
3939 	}
3940 
3941 	return (0);
3942 }
3943 
3944 void
3945 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3946 {
3947 	int i;
3948 
3949 	/*
3950 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3951 	 * which we know need special treatment.  Type 2 devices are
3952 	 * cardbus bridges which also require special treatment.
3953 	 * Other types are unknown, and we err on the side of safety
3954 	 * by ignoring them.
3955 	 */
3956 	if (dinfo->cfg.hdrtype != 0)
3957 		return;
3958 
3959 	/*
3960 	 * Restore the device to full power mode.  We must do this
3961 	 * before we restore the registers because moving from D3 to
3962 	 * D0 will cause the chip's BARs and some other registers to
3963 	 * be reset to some unknown power on reset values.  Cut down
3964 	 * the noise on boot by doing nothing if we are already in
3965 	 * state D0.
3966 	 */
3967 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3968 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3969 	}
3970 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3971 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3972 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3973 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3974 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3975 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3976 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3977 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3978 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3979 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3980 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3981 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3982 
3983 	/* Restore MSI and MSI-X configurations if they are present. */
3984 	if (dinfo->cfg.msi.msi_location != 0)
3985 		pci_resume_msi(dev);
3986 	if (dinfo->cfg.msix.msix_location != 0)
3987 		pci_resume_msix(dev);
3988 }
3989 
3990 void
3991 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3992 {
3993 	int i;
3994 	uint32_t cls;
3995 	int ps;
3996 
3997 	/*
3998 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3999 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4000 	 * which also require special treatment.  Other types are unknown, and
4001 	 * we err on the side of safety by ignoring them.  Powering down
4002 	 * bridges should not be undertaken lightly.
4003 	 */
4004 	if (dinfo->cfg.hdrtype != 0)
4005 		return;
4006 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4007 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4008 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4009 
4010 	/*
4011 	 * Some drivers apparently write to these registers w/o updating our
4012 	 * cached copy.  No harm happens if we update the copy, so do so here
4013 	 * so we can restore them.  The COMMAND register is modified by the
4014 	 * bus w/o updating the cache.  This should represent the normally
4015 	 * writable portion of the 'defined' part of type 0 headers.  In
4016 	 * theory we also need to save/restore the PCI capability structures
4017 	 * we know about, but apart from power we don't know any that are
4018 	 * writable.
4019 	 */
4020 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4021 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4022 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4023 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4024 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4025 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4026 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4027 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4028 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4029 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4030 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4031 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4032 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4033 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4034 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4035 
4036 	/*
4037 	 * don't set the state for display devices, base peripherals and
4038 	 * memory devices since bad things happen when they are powered down.
4039 	 * We should (a) have drivers that can easily detach and (b) use
4040 	 * generic drivers for these devices so that some device actually
4041 	 * attaches.  We need to make sure that when we implement (a) we don't
4042 	 * power the device down on a reattach.
4043 	 */
4044 	cls = pci_get_class(dev);
4045 	if (!setstate)
4046 		return;
4047 	switch (pci_do_power_nodriver)
4048 	{
4049 		case 0:		/* NO powerdown at all */
4050 			return;
4051 		case 1:		/* Conservative about what to power down */
4052 			if (cls == PCIC_STORAGE)
4053 				return;
4054 			/*FALLTHROUGH*/
4055 		case 2:		/* Agressive about what to power down */
4056 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4057 			    cls == PCIC_BASEPERIPH)
4058 				return;
4059 			/*FALLTHROUGH*/
4060 		case 3:		/* Power down everything */
4061 			break;
4062 	}
4063 	/*
4064 	 * PCI spec says we can only go into D3 state from D0 state.
4065 	 * Transition from D[12] into D0 before going to D3 state.
4066 	 */
4067 	ps = pci_get_powerstate(dev);
4068 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4069 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4070 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4071 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4072 }
4073 
4074 #ifdef COMPAT_OLDPCI
4075 
4076 /*
4077  * Locate the parent of a PCI device by scanning the PCI devlist
4078  * and return the entry for the parent.
4079  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4080  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4081  */
4082 pcicfgregs *
4083 pci_devlist_get_parent(pcicfgregs *cfg)
4084 {
4085 	struct devlist *devlist_head;
4086 	struct pci_devinfo *dinfo;
4087 	pcicfgregs *bridge_cfg;
4088 	int i;
4089 
4090 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4091 
4092 	/* If the device is on PCI bus 0, look for the host */
4093 	if (cfg->bus == 0) {
4094 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4095 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4096 			bridge_cfg = &dinfo->cfg;
4097 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4098 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4099 		    		&& bridge_cfg->bus == cfg->bus) {
4100 				return bridge_cfg;
4101 			}
4102 		}
4103 	}
4104 
4105 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4106 	if (cfg->bus > 0) {
4107 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4108 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4109 			bridge_cfg = &dinfo->cfg;
4110 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4111 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4112 				&& bridge_cfg->secondarybus == cfg->bus) {
4113 				return bridge_cfg;
4114 			}
4115 		}
4116 	}
4117 
4118 	return NULL;
4119 }
4120 
4121 #endif	/* COMPAT_OLDPCI */
4122 
4123 int
4124 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4125 {
4126 	int rid, type;
4127 	u_int flags;
4128 
4129 	rid = 0;
4130 	type = PCI_INTR_TYPE_LEGACY;
4131 	flags = RF_SHAREABLE | RF_ACTIVE;
4132 
4133 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4134 	if (msi_enable) {
4135 		int cpu;
4136 
4137 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4138 		if (cpu >= ncpus)
4139 			cpu = ncpus - 1;
4140 
4141 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4142 			flags &= ~RF_SHAREABLE;
4143 			type = PCI_INTR_TYPE_MSI;
4144 		}
4145 	}
4146 
4147 	*rid0 = rid;
4148 	*flags0 = flags;
4149 
4150 	return type;
4151 }
4152