xref: /dragonfly/sys/bus/pci/pci.c (revision a563ca70)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_bus.h"
32 #include "opt_acpi.h"
33 #include "opt_compat_oldpci.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 #include <sys/machintr.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 #include <sys/device.h>
55 
56 #include <sys/pciio.h>
57 #include <bus/pci/pcireg.h>
58 #include <bus/pci/pcivar.h>
59 #include <bus/pci/pci_private.h>
60 
61 #include "pcib_if.h"
62 #include "pci_if.h"
63 
64 #ifdef __HAVE_ACPI
65 #include <contrib/dev/acpica/acpi.h>
66 #include "acpi_if.h"
67 #else
68 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
69 #endif
70 
71 extern struct dev_ops pcic_ops;	/* XXX */
72 
73 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
74 
75 static uint32_t		pci_mapbase(unsigned mapreg);
76 static const char	*pci_maptype(unsigned mapreg);
77 static int		pci_mapsize(unsigned testval);
78 static int		pci_maprange(unsigned mapreg);
79 static void		pci_fixancient(pcicfgregs *cfg);
80 
81 static int		pci_porten(device_t pcib, int b, int s, int f);
82 static int		pci_memen(device_t pcib, int b, int s, int f);
83 static void		pci_assign_interrupt(device_t bus, device_t dev,
84 			    int force_route);
85 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
86 			    int b, int s, int f, int reg,
87 			    struct resource_list *rl, int force, int prefetch);
88 static int		pci_probe(device_t dev);
89 static int		pci_attach(device_t dev);
90 static void		pci_child_detached(device_t, device_t);
91 static void		pci_load_vendor_data(void);
92 static int		pci_describe_parse_line(char **ptr, int *vendor,
93 			    int *device, char **desc);
94 static char		*pci_describe_device(device_t dev);
95 static int		pci_modevent(module_t mod, int what, void *arg);
96 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
97 			    pcicfgregs *cfg);
98 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
99 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
100 			    int reg, uint32_t *data);
101 #if 0
102 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t data);
104 #endif
105 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
106 static void		pci_disable_msi(device_t dev);
107 static void		pci_enable_msi(device_t dev, uint64_t address,
108 			    uint16_t data);
109 static void		pci_enable_msix(device_t dev, u_int index,
110 			    uint64_t address, uint32_t data);
111 static void		pci_mask_msix(device_t dev, u_int index);
112 static void		pci_unmask_msix(device_t dev, u_int index);
113 static int		pci_msi_blacklisted(void);
114 static void		pci_resume_msi(device_t dev);
115 static void		pci_resume_msix(device_t dev);
116 static int		pcie_slotimpl(const pcicfgregs *);
117 static void		pci_print_verbose_expr(const pcicfgregs *);
118 
119 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
120 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
121 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
122 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_subvendor(device_t, int, int,
125 			    pcicfgregs *);
126 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
128 
129 static device_method_t pci_methods[] = {
130 	/* Device interface */
131 	DEVMETHOD(device_probe,		pci_probe),
132 	DEVMETHOD(device_attach,	pci_attach),
133 	DEVMETHOD(device_detach,	bus_generic_detach),
134 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
135 	DEVMETHOD(device_suspend,	pci_suspend),
136 	DEVMETHOD(device_resume,	pci_resume),
137 
138 	/* Bus interface */
139 	DEVMETHOD(bus_print_child,	pci_print_child),
140 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
141 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
142 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
143 	DEVMETHOD(bus_driver_added,	pci_driver_added),
144 	DEVMETHOD(bus_child_detached,	pci_child_detached),
145 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
146 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
147 
148 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
149 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
150 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
151 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
152 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
153 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
154 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
155 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
156 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
157 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
158 
159 	/* PCI interface */
160 	DEVMETHOD(pci_read_config,	pci_read_config_method),
161 	DEVMETHOD(pci_write_config,	pci_write_config_method),
162 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
163 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
164 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
165 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
166 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
167 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
168 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
169 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
170 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
171 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
172 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
173 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
174 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
175 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
176 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
177 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
178 
179 	{ 0, 0 }
180 };
181 
182 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
183 
184 static devclass_t pci_devclass;
185 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
186 MODULE_VERSION(pci, 1);
187 
188 static char	*pci_vendordata;
189 static size_t	pci_vendordata_size;
190 
191 
192 static const struct pci_read_cap {
193 	int		cap;
194 	pci_read_cap_t	read_cap;
195 } pci_read_caps[] = {
196 	{ PCIY_PMG,		pci_read_cap_pmgt },
197 	{ PCIY_HT,		pci_read_cap_ht },
198 	{ PCIY_MSI,		pci_read_cap_msi },
199 	{ PCIY_MSIX,		pci_read_cap_msix },
200 	{ PCIY_VPD,		pci_read_cap_vpd },
201 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
202 	{ PCIY_PCIX,		pci_read_cap_pcix },
203 	{ PCIY_EXPRESS,		pci_read_cap_express },
204 	{ 0, NULL } /* required last entry */
205 };
206 
207 struct pci_quirk {
208 	uint32_t devid;	/* Vendor/device of the card */
209 	int	type;
210 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
211 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
212 	int	arg1;
213 	int	arg2;
214 };
215 
216 struct pci_quirk pci_quirks[] = {
217 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
218 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
220 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
221 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 
223 	/*
224 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
225 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
226 	 */
227 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
229 
230 	/*
231 	 * MSI doesn't work on earlier Intel chipsets including
232 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
233 	 */
234 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 
242 	/*
243 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
244 	 * bridge.
245 	 */
246 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	{ 0 }
249 };
250 
251 /* map register information */
252 #define	PCI_MAPMEM	0x01	/* memory map */
253 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
254 #define	PCI_MAPPORT	0x04	/* port map */
255 
256 struct devlist pci_devq;
257 uint32_t pci_generation;
258 uint32_t pci_numdevs = 0;
259 static int pcie_chipset, pcix_chipset;
260 
261 /* sysctl vars */
262 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
263 
264 static int pci_enable_io_modes = 1;
265 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
266 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
267     &pci_enable_io_modes, 1,
268     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
269 enable these bits correctly.  We'd like to do this all the time, but there\n\
270 are some peripherals that this causes problems with.");
271 
272 static int pci_do_power_nodriver = 0;
273 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
274 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
275     &pci_do_power_nodriver, 0,
276   "Place a function into D3 state when no driver attaches to it.  0 means\n\
277 disable.  1 means conservatively place devices into D3 state.  2 means\n\
278 aggressively place devices into D3 state.  3 means put absolutely everything\n\
279 in D3 state.");
280 
281 static int pci_do_power_resume = 1;
282 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
283 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
284     &pci_do_power_resume, 1,
285   "Transition from D3 -> D0 on resume.");
286 
287 static int pci_do_msi = 1;
288 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
289 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
290     "Enable support for MSI interrupts");
291 
292 static int pci_do_msix = 1;
293 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
294 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
295     "Enable support for MSI-X interrupts");
296 
297 static int pci_honor_msi_blacklist = 1;
298 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
299 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
300     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
301 
302 /* Find a device_t by bus/slot/function in domain 0 */
303 
304 device_t
305 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
306 {
307 
308 	return (pci_find_dbsf(0, bus, slot, func));
309 }
310 
311 /* Find a device_t by domain/bus/slot/function */
312 
313 device_t
314 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
315 {
316 	struct pci_devinfo *dinfo;
317 
318 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
319 		if ((dinfo->cfg.domain == domain) &&
320 		    (dinfo->cfg.bus == bus) &&
321 		    (dinfo->cfg.slot == slot) &&
322 		    (dinfo->cfg.func == func)) {
323 			return (dinfo->cfg.dev);
324 		}
325 	}
326 
327 	return (NULL);
328 }
329 
330 /* Find a device_t by vendor/device ID */
331 
332 device_t
333 pci_find_device(uint16_t vendor, uint16_t device)
334 {
335 	struct pci_devinfo *dinfo;
336 
337 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
338 		if ((dinfo->cfg.vendor == vendor) &&
339 		    (dinfo->cfg.device == device)) {
340 			return (dinfo->cfg.dev);
341 		}
342 	}
343 
344 	return (NULL);
345 }
346 
347 /* return base address of memory or port map */
348 
349 static uint32_t
350 pci_mapbase(uint32_t mapreg)
351 {
352 
353 	if (PCI_BAR_MEM(mapreg))
354 		return (mapreg & PCIM_BAR_MEM_BASE);
355 	else
356 		return (mapreg & PCIM_BAR_IO_BASE);
357 }
358 
359 /* return map type of memory or port map */
360 
361 static const char *
362 pci_maptype(unsigned mapreg)
363 {
364 
365 	if (PCI_BAR_IO(mapreg))
366 		return ("I/O Port");
367 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
368 		return ("Prefetchable Memory");
369 	return ("Memory");
370 }
371 
372 /* return log2 of map size decoded for memory or port map */
373 
374 static int
375 pci_mapsize(uint32_t testval)
376 {
377 	int ln2size;
378 
379 	testval = pci_mapbase(testval);
380 	ln2size = 0;
381 	if (testval != 0) {
382 		while ((testval & 1) == 0)
383 		{
384 			ln2size++;
385 			testval >>= 1;
386 		}
387 	}
388 	return (ln2size);
389 }
390 
391 /* return log2 of address range supported by map register */
392 
393 static int
394 pci_maprange(unsigned mapreg)
395 {
396 	int ln2range = 0;
397 
398 	if (PCI_BAR_IO(mapreg))
399 		ln2range = 32;
400 	else
401 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
402 		case PCIM_BAR_MEM_32:
403 			ln2range = 32;
404 			break;
405 		case PCIM_BAR_MEM_1MB:
406 			ln2range = 20;
407 			break;
408 		case PCIM_BAR_MEM_64:
409 			ln2range = 64;
410 			break;
411 		}
412 	return (ln2range);
413 }
414 
415 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
416 
417 static void
418 pci_fixancient(pcicfgregs *cfg)
419 {
420 	if (cfg->hdrtype != 0)
421 		return;
422 
423 	/* PCI to PCI bridges use header type 1 */
424 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
425 		cfg->hdrtype = 1;
426 }
427 
428 /* extract header type specific config data */
429 
430 static void
431 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
432 {
433 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
434 	switch (cfg->hdrtype) {
435 	case 0:
436 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
437 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
438 		cfg->nummaps	    = PCI_MAXMAPS_0;
439 		break;
440 	case 1:
441 		cfg->nummaps	    = PCI_MAXMAPS_1;
442 #ifdef COMPAT_OLDPCI
443 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
444 #endif
445 		break;
446 	case 2:
447 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
448 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
449 		cfg->nummaps	    = PCI_MAXMAPS_2;
450 #ifdef COMPAT_OLDPCI
451 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
452 #endif
453 		break;
454 	}
455 #undef REG
456 }
457 
458 /* read configuration header into pcicfgregs structure */
459 struct pci_devinfo *
460 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
461 {
462 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
463 	pcicfgregs *cfg = NULL;
464 	struct pci_devinfo *devlist_entry;
465 	struct devlist *devlist_head;
466 
467 	devlist_head = &pci_devq;
468 
469 	devlist_entry = NULL;
470 
471 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
472 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
473 
474 		cfg = &devlist_entry->cfg;
475 
476 		cfg->domain		= d;
477 		cfg->bus		= b;
478 		cfg->slot		= s;
479 		cfg->func		= f;
480 		cfg->vendor		= REG(PCIR_VENDOR, 2);
481 		cfg->device		= REG(PCIR_DEVICE, 2);
482 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
483 		cfg->statreg		= REG(PCIR_STATUS, 2);
484 		cfg->baseclass		= REG(PCIR_CLASS, 1);
485 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
486 		cfg->progif		= REG(PCIR_PROGIF, 1);
487 		cfg->revid		= REG(PCIR_REVID, 1);
488 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
489 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
490 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
491 		cfg->intpin		= REG(PCIR_INTPIN, 1);
492 		cfg->intline		= REG(PCIR_INTLINE, 1);
493 
494 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
495 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
496 
497 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
498 		cfg->hdrtype		&= ~PCIM_MFDEV;
499 
500 		pci_fixancient(cfg);
501 		pci_hdrtypedata(pcib, b, s, f, cfg);
502 
503 		pci_read_capabilities(pcib, cfg);
504 
505 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
506 
507 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
508 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
509 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
510 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
511 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
512 
513 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
514 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
515 		devlist_entry->conf.pc_vendor = cfg->vendor;
516 		devlist_entry->conf.pc_device = cfg->device;
517 
518 		devlist_entry->conf.pc_class = cfg->baseclass;
519 		devlist_entry->conf.pc_subclass = cfg->subclass;
520 		devlist_entry->conf.pc_progif = cfg->progif;
521 		devlist_entry->conf.pc_revid = cfg->revid;
522 
523 		pci_numdevs++;
524 		pci_generation++;
525 	}
526 	return (devlist_entry);
527 #undef REG
528 }
529 
530 static int
531 pci_fixup_nextptr(int *nextptr0)
532 {
533 	int nextptr = *nextptr0;
534 
535 	/* "Next pointer" is only one byte */
536 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d\n", nextptr));
537 
538 	if (nextptr & 0x3) {
539 		/*
540 		 * PCI local bus spec 3.0:
541 		 *
542 		 * "... The bottom two bits of all pointers are reserved
543 		 *  and must be implemented as 00b although software must
544 		 *  mask them to allow for future uses of these bits ..."
545 		 */
546 		if (bootverbose) {
547 			kprintf("Illegal PCI extended capability "
548 				"offset, fixup 0x%02x -> 0x%02x\n",
549 				nextptr, nextptr & ~0x3);
550 		}
551 		nextptr &= ~0x3;
552 	}
553 	*nextptr0 = nextptr;
554 
555 	if (nextptr < 0x40) {
556 		if (nextptr != 0) {
557 			kprintf("Illegal PCI extended capability "
558 				"offset 0x%02x", nextptr);
559 		}
560 		return 0;
561 	}
562 	return 1;
563 }
564 
565 static void
566 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
567 {
568 #define REG(n, w)	\
569 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
570 
571 	struct pcicfg_pp *pp = &cfg->pp;
572 
573 	if (pp->pp_cap)
574 		return;
575 
576 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
577 	pp->pp_status = ptr + PCIR_POWER_STATUS;
578 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
579 
580 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
581 		/*
582 		 * XXX
583 		 * We should write to data_select and read back from
584 		 * data_scale to determine whether data register is
585 		 * implemented.
586 		 */
587 #ifdef foo
588 		pp->pp_data = ptr + PCIR_POWER_DATA;
589 #else
590 		pp->pp_data = 0;
591 #endif
592 	}
593 
594 #undef REG
595 }
596 
597 static void
598 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
599 {
600 #ifdef notyet
601 #if defined(__i386__) || defined(__x86_64__)
602 
603 #define REG(n, w)	\
604 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
605 
606 	struct pcicfg_ht *ht = &cfg->ht;
607 	uint64_t addr;
608 	uint32_t val;
609 
610 	/* Determine HT-specific capability type. */
611 	val = REG(ptr + PCIR_HT_COMMAND, 2);
612 
613 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
614 		return;
615 
616 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
617 		/* Sanity check the mapping window. */
618 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
619 		addr <<= 32;
620 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
621 		if (addr != MSI_INTEL_ADDR_BASE) {
622 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
623 				"has non-default MSI window 0x%llx\n",
624 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
625 				(long long)addr);
626 		}
627 	} else {
628 		addr = MSI_INTEL_ADDR_BASE;
629 	}
630 
631 	ht->ht_msimap = ptr;
632 	ht->ht_msictrl = val;
633 	ht->ht_msiaddr = addr;
634 
635 #undef REG
636 
637 #endif	/* __i386__ || __x86_64__ */
638 #endif	/* notyet */
639 }
640 
641 static void
642 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
643 {
644 #define REG(n, w)	\
645 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
646 
647 	struct pcicfg_msi *msi = &cfg->msi;
648 
649 	msi->msi_location = ptr;
650 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
651 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
652 
653 #undef REG
654 }
655 
656 static void
657 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
658 {
659 #define REG(n, w)	\
660 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
661 
662 	struct pcicfg_msix *msix = &cfg->msix;
663 	uint32_t val;
664 
665 	msix->msix_location = ptr;
666 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
667 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
668 
669 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
670 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
671 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
672 
673 	val = REG(ptr + PCIR_MSIX_PBA, 4);
674 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
675 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
676 
677 #undef REG
678 }
679 
680 static void
681 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
682 {
683 	cfg->vpd.vpd_reg = ptr;
684 }
685 
686 static void
687 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
688 {
689 #define REG(n, w)	\
690 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
691 
692 	/* Should always be true. */
693 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
694 		uint32_t val;
695 
696 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
697 		cfg->subvendor = val & 0xffff;
698 		cfg->subdevice = val >> 16;
699 	}
700 
701 #undef REG
702 }
703 
704 static void
705 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
706 {
707 	/*
708 	 * Assume we have a PCI-X chipset if we have
709 	 * at least one PCI-PCI bridge with a PCI-X
710 	 * capability.  Note that some systems with
711 	 * PCI-express or HT chipsets might match on
712 	 * this check as well.
713 	 */
714 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
715 		pcix_chipset = 1;
716 
717 	cfg->pcix.pcix_ptr = ptr;
718 }
719 
720 static int
721 pcie_slotimpl(const pcicfgregs *cfg)
722 {
723 	const struct pcicfg_expr *expr = &cfg->expr;
724 	uint16_t port_type;
725 
726 	/*
727 	 * Only version 1 can be parsed currently
728 	 */
729 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
730 		return 0;
731 
732 	/*
733 	 * - Slot implemented bit is meaningful iff current port is
734 	 *   root port or down stream port.
735 	 * - Testing for root port or down stream port is meanningful
736 	 *   iff PCI configure has type 1 header.
737 	 */
738 
739 	if (cfg->hdrtype != 1)
740 		return 0;
741 
742 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
743 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
744 		return 0;
745 
746 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
747 		return 0;
748 
749 	return 1;
750 }
751 
752 static void
753 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
754 {
755 #define REG(n, w)	\
756 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
757 
758 	struct pcicfg_expr *expr = &cfg->expr;
759 
760 	/*
761 	 * Assume we have a PCI-express chipset if we have
762 	 * at least one PCI-express device.
763 	 */
764 	pcie_chipset = 1;
765 
766 	expr->expr_ptr = ptr;
767 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
768 
769 	/*
770 	 * Only version 1 can be parsed currently
771 	 */
772 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
773 		return;
774 
775 	/*
776 	 * Read slot capabilities.  Slot capabilities exists iff
777 	 * current port's slot is implemented
778 	 */
779 	if (pcie_slotimpl(cfg))
780 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
781 
782 #undef REG
783 }
784 
785 static void
786 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
787 {
788 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
789 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
790 
791 	uint32_t val;
792 	int nextptr, ptrptr;
793 
794 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
795 		/* No capabilities */
796 		return;
797 	}
798 
799 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
800 	case 0:
801 	case 1:
802 		ptrptr = PCIR_CAP_PTR;
803 		break;
804 	case 2:
805 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
806 		break;
807 	default:
808 		return;				/* no capabilities support */
809 	}
810 	nextptr = REG(ptrptr, 1);	/* sanity check? */
811 
812 	/*
813 	 * Read capability entries.
814 	 */
815 	while (pci_fixup_nextptr(&nextptr)) {
816 		const struct pci_read_cap *rc;
817 		int ptr = nextptr;
818 
819 		/* Find the next entry */
820 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
821 
822 		/* Process this entry */
823 		val = REG(ptr + PCICAP_ID, 1);
824 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
825 			if (rc->cap == val) {
826 				rc->read_cap(pcib, ptr, nextptr, cfg);
827 				break;
828 			}
829 		}
830 	}
831 /* REG and WREG use carry through to next functions */
832 }
833 
834 /*
835  * PCI Vital Product Data
836  */
837 
838 #define	PCI_VPD_TIMEOUT		1000000
839 
840 static int
841 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
842 {
843 	int count = PCI_VPD_TIMEOUT;
844 
845 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
846 
847 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
848 
849 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
850 		if (--count < 0)
851 			return (ENXIO);
852 		DELAY(1);	/* limit looping */
853 	}
854 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
855 
856 	return (0);
857 }
858 
859 #if 0
860 static int
861 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
862 {
863 	int count = PCI_VPD_TIMEOUT;
864 
865 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
866 
867 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
868 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
869 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
870 		if (--count < 0)
871 			return (ENXIO);
872 		DELAY(1);	/* limit looping */
873 	}
874 
875 	return (0);
876 }
877 #endif
878 
879 #undef PCI_VPD_TIMEOUT
880 
881 struct vpd_readstate {
882 	device_t	pcib;
883 	pcicfgregs	*cfg;
884 	uint32_t	val;
885 	int		bytesinval;
886 	int		off;
887 	uint8_t		cksum;
888 };
889 
890 static int
891 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
892 {
893 	uint32_t reg;
894 	uint8_t byte;
895 
896 	if (vrs->bytesinval == 0) {
897 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
898 			return (ENXIO);
899 		vrs->val = le32toh(reg);
900 		vrs->off += 4;
901 		byte = vrs->val & 0xff;
902 		vrs->bytesinval = 3;
903 	} else {
904 		vrs->val = vrs->val >> 8;
905 		byte = vrs->val & 0xff;
906 		vrs->bytesinval--;
907 	}
908 
909 	vrs->cksum += byte;
910 	*data = byte;
911 	return (0);
912 }
913 
914 int
915 pcie_slot_implemented(device_t dev)
916 {
917 	struct pci_devinfo *dinfo = device_get_ivars(dev);
918 
919 	return pcie_slotimpl(&dinfo->cfg);
920 }
921 
922 void
923 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
924 {
925 	uint8_t expr_ptr;
926 	uint16_t val;
927 
928 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
929 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
930 		panic("%s: invalid max read request size 0x%02x\n",
931 		      device_get_nameunit(dev), rqsize);
932 	}
933 
934 	expr_ptr = pci_get_pciecap_ptr(dev);
935 	if (!expr_ptr)
936 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
937 
938 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
939 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
940 		if (bootverbose)
941 			device_printf(dev, "adjust device control 0x%04x", val);
942 
943 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
944 		val |= rqsize;
945 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
946 
947 		if (bootverbose)
948 			kprintf(" -> 0x%04x\n", val);
949 	}
950 }
951 
952 uint16_t
953 pcie_get_max_readrq(device_t dev)
954 {
955 	uint8_t expr_ptr;
956 	uint16_t val;
957 
958 	expr_ptr = pci_get_pciecap_ptr(dev);
959 	if (!expr_ptr)
960 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
961 
962 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
963 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
964 }
965 
966 static void
967 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
968 {
969 	struct vpd_readstate vrs;
970 	int state;
971 	int name;
972 	int remain;
973 	int i;
974 	int alloc, off;		/* alloc/off for RO/W arrays */
975 	int cksumvalid;
976 	int dflen;
977 	uint8_t byte;
978 	uint8_t byte2;
979 
980 	/* init vpd reader */
981 	vrs.bytesinval = 0;
982 	vrs.off = 0;
983 	vrs.pcib = pcib;
984 	vrs.cfg = cfg;
985 	vrs.cksum = 0;
986 
987 	state = 0;
988 	name = remain = i = 0;	/* shut up stupid gcc */
989 	alloc = off = 0;	/* shut up stupid gcc */
990 	dflen = 0;		/* shut up stupid gcc */
991 	cksumvalid = -1;
992 	while (state >= 0) {
993 		if (vpd_nextbyte(&vrs, &byte)) {
994 			state = -2;
995 			break;
996 		}
997 #if 0
998 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
999 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1000 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1001 #endif
1002 		switch (state) {
1003 		case 0:		/* item name */
1004 			if (byte & 0x80) {
1005 				if (vpd_nextbyte(&vrs, &byte2)) {
1006 					state = -2;
1007 					break;
1008 				}
1009 				remain = byte2;
1010 				if (vpd_nextbyte(&vrs, &byte2)) {
1011 					state = -2;
1012 					break;
1013 				}
1014 				remain |= byte2 << 8;
1015 				if (remain > (0x7f*4 - vrs.off)) {
1016 					state = -1;
1017 					kprintf(
1018 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1019 					    cfg->domain, cfg->bus, cfg->slot,
1020 					    cfg->func, remain);
1021 				}
1022 				name = byte & 0x7f;
1023 			} else {
1024 				remain = byte & 0x7;
1025 				name = (byte >> 3) & 0xf;
1026 			}
1027 			switch (name) {
1028 			case 0x2:	/* String */
1029 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1030 				    M_DEVBUF, M_WAITOK);
1031 				i = 0;
1032 				state = 1;
1033 				break;
1034 			case 0xf:	/* End */
1035 				state = -1;
1036 				break;
1037 			case 0x10:	/* VPD-R */
1038 				alloc = 8;
1039 				off = 0;
1040 				cfg->vpd.vpd_ros = kmalloc(alloc *
1041 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1042 				    M_WAITOK | M_ZERO);
1043 				state = 2;
1044 				break;
1045 			case 0x11:	/* VPD-W */
1046 				alloc = 8;
1047 				off = 0;
1048 				cfg->vpd.vpd_w = kmalloc(alloc *
1049 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1050 				    M_WAITOK | M_ZERO);
1051 				state = 5;
1052 				break;
1053 			default:	/* Invalid data, abort */
1054 				state = -1;
1055 				break;
1056 			}
1057 			break;
1058 
1059 		case 1:	/* Identifier String */
1060 			cfg->vpd.vpd_ident[i++] = byte;
1061 			remain--;
1062 			if (remain == 0)  {
1063 				cfg->vpd.vpd_ident[i] = '\0';
1064 				state = 0;
1065 			}
1066 			break;
1067 
1068 		case 2:	/* VPD-R Keyword Header */
1069 			if (off == alloc) {
1070 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1071 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1072 				    M_DEVBUF, M_WAITOK | M_ZERO);
1073 			}
1074 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1075 			if (vpd_nextbyte(&vrs, &byte2)) {
1076 				state = -2;
1077 				break;
1078 			}
1079 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1080 			if (vpd_nextbyte(&vrs, &byte2)) {
1081 				state = -2;
1082 				break;
1083 			}
1084 			dflen = byte2;
1085 			if (dflen == 0 &&
1086 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1087 			    2) == 0) {
1088 				/*
1089 				 * if this happens, we can't trust the rest
1090 				 * of the VPD.
1091 				 */
1092 				kprintf(
1093 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1094 				    cfg->domain, cfg->bus, cfg->slot,
1095 				    cfg->func, dflen);
1096 				cksumvalid = 0;
1097 				state = -1;
1098 				break;
1099 			} else if (dflen == 0) {
1100 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1101 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1102 				    M_DEVBUF, M_WAITOK);
1103 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1104 			} else
1105 				cfg->vpd.vpd_ros[off].value = kmalloc(
1106 				    (dflen + 1) *
1107 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1108 				    M_DEVBUF, M_WAITOK);
1109 			remain -= 3;
1110 			i = 0;
1111 			/* keep in sync w/ state 3's transistions */
1112 			if (dflen == 0 && remain == 0)
1113 				state = 0;
1114 			else if (dflen == 0)
1115 				state = 2;
1116 			else
1117 				state = 3;
1118 			break;
1119 
1120 		case 3:	/* VPD-R Keyword Value */
1121 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1122 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1123 			    "RV", 2) == 0 && cksumvalid == -1) {
1124 				if (vrs.cksum == 0)
1125 					cksumvalid = 1;
1126 				else {
1127 					if (bootverbose)
1128 						kprintf(
1129 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1130 						    cfg->domain, cfg->bus,
1131 						    cfg->slot, cfg->func,
1132 						    vrs.cksum);
1133 					cksumvalid = 0;
1134 					state = -1;
1135 					break;
1136 				}
1137 			}
1138 			dflen--;
1139 			remain--;
1140 			/* keep in sync w/ state 2's transistions */
1141 			if (dflen == 0)
1142 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1143 			if (dflen == 0 && remain == 0) {
1144 				cfg->vpd.vpd_rocnt = off;
1145 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1146 				    off * sizeof(*cfg->vpd.vpd_ros),
1147 				    M_DEVBUF, M_WAITOK | M_ZERO);
1148 				state = 0;
1149 			} else if (dflen == 0)
1150 				state = 2;
1151 			break;
1152 
1153 		case 4:
1154 			remain--;
1155 			if (remain == 0)
1156 				state = 0;
1157 			break;
1158 
1159 		case 5:	/* VPD-W Keyword Header */
1160 			if (off == alloc) {
1161 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1162 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1163 				    M_DEVBUF, M_WAITOK | M_ZERO);
1164 			}
1165 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1166 			if (vpd_nextbyte(&vrs, &byte2)) {
1167 				state = -2;
1168 				break;
1169 			}
1170 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1171 			if (vpd_nextbyte(&vrs, &byte2)) {
1172 				state = -2;
1173 				break;
1174 			}
1175 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1176 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1177 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1178 			    sizeof(*cfg->vpd.vpd_w[off].value),
1179 			    M_DEVBUF, M_WAITOK);
1180 			remain -= 3;
1181 			i = 0;
1182 			/* keep in sync w/ state 6's transistions */
1183 			if (dflen == 0 && remain == 0)
1184 				state = 0;
1185 			else if (dflen == 0)
1186 				state = 5;
1187 			else
1188 				state = 6;
1189 			break;
1190 
1191 		case 6:	/* VPD-W Keyword Value */
1192 			cfg->vpd.vpd_w[off].value[i++] = byte;
1193 			dflen--;
1194 			remain--;
1195 			/* keep in sync w/ state 5's transistions */
1196 			if (dflen == 0)
1197 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1198 			if (dflen == 0 && remain == 0) {
1199 				cfg->vpd.vpd_wcnt = off;
1200 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1201 				    off * sizeof(*cfg->vpd.vpd_w),
1202 				    M_DEVBUF, M_WAITOK | M_ZERO);
1203 				state = 0;
1204 			} else if (dflen == 0)
1205 				state = 5;
1206 			break;
1207 
1208 		default:
1209 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1210 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1211 			    state);
1212 			state = -1;
1213 			break;
1214 		}
1215 	}
1216 
1217 	if (cksumvalid == 0 || state < -1) {
1218 		/* read-only data bad, clean up */
1219 		if (cfg->vpd.vpd_ros != NULL) {
1220 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1221 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1222 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1223 			cfg->vpd.vpd_ros = NULL;
1224 		}
1225 	}
1226 	if (state < -1) {
1227 		/* I/O error, clean up */
1228 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1229 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1230 		if (cfg->vpd.vpd_ident != NULL) {
1231 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1232 			cfg->vpd.vpd_ident = NULL;
1233 		}
1234 		if (cfg->vpd.vpd_w != NULL) {
1235 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1236 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1237 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1238 			cfg->vpd.vpd_w = NULL;
1239 		}
1240 	}
1241 	cfg->vpd.vpd_cached = 1;
1242 #undef REG
1243 #undef WREG
1244 }
1245 
1246 int
1247 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1248 {
1249 	struct pci_devinfo *dinfo = device_get_ivars(child);
1250 	pcicfgregs *cfg = &dinfo->cfg;
1251 
1252 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1253 		pci_read_vpd(device_get_parent(dev), cfg);
1254 
1255 	*identptr = cfg->vpd.vpd_ident;
1256 
1257 	if (*identptr == NULL)
1258 		return (ENXIO);
1259 
1260 	return (0);
1261 }
1262 
1263 int
1264 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1265 	const char **vptr)
1266 {
1267 	struct pci_devinfo *dinfo = device_get_ivars(child);
1268 	pcicfgregs *cfg = &dinfo->cfg;
1269 	int i;
1270 
1271 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1272 		pci_read_vpd(device_get_parent(dev), cfg);
1273 
1274 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1275 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1276 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1277 			*vptr = cfg->vpd.vpd_ros[i].value;
1278 		}
1279 
1280 	if (i != cfg->vpd.vpd_rocnt)
1281 		return (0);
1282 
1283 	*vptr = NULL;
1284 	return (ENXIO);
1285 }
1286 
1287 /*
1288  * Return the offset in configuration space of the requested extended
1289  * capability entry or 0 if the specified capability was not found.
1290  */
1291 int
1292 pci_find_extcap_method(device_t dev, device_t child, int capability,
1293     int *capreg)
1294 {
1295 	struct pci_devinfo *dinfo = device_get_ivars(child);
1296 	pcicfgregs *cfg = &dinfo->cfg;
1297 	u_int32_t status;
1298 	u_int8_t ptr;
1299 
1300 	/*
1301 	 * Check the CAP_LIST bit of the PCI status register first.
1302 	 */
1303 	status = pci_read_config(child, PCIR_STATUS, 2);
1304 	if (!(status & PCIM_STATUS_CAPPRESENT))
1305 		return (ENXIO);
1306 
1307 	/*
1308 	 * Determine the start pointer of the capabilities list.
1309 	 */
1310 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1311 	case 0:
1312 	case 1:
1313 		ptr = PCIR_CAP_PTR;
1314 		break;
1315 	case 2:
1316 		ptr = PCIR_CAP_PTR_2;
1317 		break;
1318 	default:
1319 		/* XXX: panic? */
1320 		return (ENXIO);		/* no extended capabilities support */
1321 	}
1322 	ptr = pci_read_config(child, ptr, 1);
1323 
1324 	/*
1325 	 * Traverse the capabilities list.
1326 	 */
1327 	while (ptr != 0) {
1328 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1329 			if (capreg != NULL)
1330 				*capreg = ptr;
1331 			return (0);
1332 		}
1333 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1334 	}
1335 
1336 	return (ENOENT);
1337 }
1338 
1339 /*
1340  * Support for MSI-X message interrupts.
1341  */
1342 void
1343 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1344 {
1345 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1346 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1347 	uint32_t offset;
1348 
1349 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1350 	offset = msix->msix_table_offset + index * 16;
1351 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1352 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1353 	bus_write_4(msix->msix_table_res, offset + 8, data);
1354 
1355 	/* Enable MSI -> HT mapping. */
1356 	pci_ht_map_msi(dev, address);
1357 }
1358 
1359 void
1360 pci_mask_msix(device_t dev, u_int index)
1361 {
1362 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1363 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1364 	uint32_t offset, val;
1365 
1366 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1367 	offset = msix->msix_table_offset + index * 16 + 12;
1368 	val = bus_read_4(msix->msix_table_res, offset);
1369 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1370 		val |= PCIM_MSIX_VCTRL_MASK;
1371 		bus_write_4(msix->msix_table_res, offset, val);
1372 	}
1373 }
1374 
1375 void
1376 pci_unmask_msix(device_t dev, u_int index)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1379 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1380 	uint32_t offset, val;
1381 
1382 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1383 	offset = msix->msix_table_offset + index * 16 + 12;
1384 	val = bus_read_4(msix->msix_table_res, offset);
1385 	if (val & PCIM_MSIX_VCTRL_MASK) {
1386 		val &= ~PCIM_MSIX_VCTRL_MASK;
1387 		bus_write_4(msix->msix_table_res, offset, val);
1388 	}
1389 }
1390 
1391 int
1392 pci_pending_msix(device_t dev, u_int index)
1393 {
1394 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1395 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1396 	uint32_t offset, bit;
1397 
1398 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1399 	offset = msix->msix_pba_offset + (index / 32) * 4;
1400 	bit = 1 << index % 32;
1401 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1402 }
1403 
1404 /*
1405  * Restore MSI-X registers and table during resume.  If MSI-X is
1406  * enabled then walk the virtual table to restore the actual MSI-X
1407  * table.
1408  */
1409 static void
1410 pci_resume_msix(device_t dev)
1411 {
1412 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1413 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1414 	struct msix_table_entry *mte;
1415 	struct msix_vector *mv;
1416 	int i;
1417 
1418 	if (msix->msix_alloc > 0) {
1419 		/* First, mask all vectors. */
1420 		for (i = 0; i < msix->msix_msgnum; i++)
1421 			pci_mask_msix(dev, i);
1422 
1423 		/* Second, program any messages with at least one handler. */
1424 		for (i = 0; i < msix->msix_table_len; i++) {
1425 			mte = &msix->msix_table[i];
1426 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1427 				continue;
1428 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1429 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1430 			pci_unmask_msix(dev, i);
1431 		}
1432 	}
1433 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1434 	    msix->msix_ctrl, 2);
1435 }
1436 
1437 /*
1438  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1439  * returned in *count.  After this function returns, each message will be
1440  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1441  */
1442 int
1443 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1444 {
1445 	struct pci_devinfo *dinfo = device_get_ivars(child);
1446 	pcicfgregs *cfg = &dinfo->cfg;
1447 	struct resource_list_entry *rle;
1448 	int actual, error, i, irq, max;
1449 
1450 	/* Don't let count == 0 get us into trouble. */
1451 	if (*count == 0)
1452 		return (EINVAL);
1453 
1454 	/* If rid 0 is allocated, then fail. */
1455 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1456 	if (rle != NULL && rle->res != NULL)
1457 		return (ENXIO);
1458 
1459 	/* Already have allocated messages? */
1460 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1461 		return (ENXIO);
1462 
1463 	/* If MSI is blacklisted for this system, fail. */
1464 	if (pci_msi_blacklisted())
1465 		return (ENXIO);
1466 
1467 	/* MSI-X capability present? */
1468 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1469 		return (ENODEV);
1470 
1471 	/* Make sure the appropriate BARs are mapped. */
1472 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1473 	    cfg->msix.msix_table_bar);
1474 	if (rle == NULL || rle->res == NULL ||
1475 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1476 		return (ENXIO);
1477 	cfg->msix.msix_table_res = rle->res;
1478 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1479 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1480 		    cfg->msix.msix_pba_bar);
1481 		if (rle == NULL || rle->res == NULL ||
1482 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1483 			return (ENXIO);
1484 	}
1485 	cfg->msix.msix_pba_res = rle->res;
1486 
1487 	if (bootverbose)
1488 		device_printf(child,
1489 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1490 		    *count, cfg->msix.msix_msgnum);
1491 	max = min(*count, cfg->msix.msix_msgnum);
1492 	for (i = 0; i < max; i++) {
1493 		/* Allocate a message. */
1494 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1495 		if (error)
1496 			break;
1497 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1498 		    irq, 1, -1);
1499 	}
1500 	actual = i;
1501 
1502 	if (actual == 0) {
1503 		if (bootverbose) {
1504 			device_printf(child,
1505 			    "could not allocate any MSI-X vectors\n");
1506 		}
1507 		return  (ENXIO);
1508 	}
1509 
1510 	if (bootverbose) {
1511 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1512 		if (actual == 1)
1513 			device_printf(child, "using IRQ %lu for MSI-X\n",
1514 			    rle->start);
1515 		else {
1516 			int run;
1517 
1518 			/*
1519 			 * Be fancy and try to print contiguous runs of
1520 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1521 			 * 'run' is true if we are in a range.
1522 			 */
1523 			device_printf(child, "using IRQs %lu", rle->start);
1524 			irq = rle->start;
1525 			run = 0;
1526 			for (i = 1; i < actual; i++) {
1527 				rle = resource_list_find(&dinfo->resources,
1528 				    SYS_RES_IRQ, i + 1);
1529 
1530 				/* Still in a run? */
1531 				if (rle->start == irq + 1) {
1532 					run = 1;
1533 					irq++;
1534 					continue;
1535 				}
1536 
1537 				/* Finish previous range. */
1538 				if (run) {
1539 					kprintf("-%d", irq);
1540 					run = 0;
1541 				}
1542 
1543 				/* Start new range. */
1544 				kprintf(",%lu", rle->start);
1545 				irq = rle->start;
1546 			}
1547 
1548 			/* Unfinished range? */
1549 			if (run)
1550 				kprintf("-%d", irq);
1551 			kprintf(" for MSI-X\n");
1552 		}
1553 	}
1554 
1555 	/* Mask all vectors. */
1556 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1557 		pci_mask_msix(child, i);
1558 
1559 	/* Allocate and initialize vector data and virtual table. */
1560 	cfg->msix.msix_vectors = kmalloc(sizeof(struct msix_vector) * actual,
1561 	    M_DEVBUF, M_WAITOK | M_ZERO);
1562 	cfg->msix.msix_table = kmalloc(sizeof(struct msix_table_entry) * actual,
1563 	    M_DEVBUF, M_WAITOK | M_ZERO);
1564 	for (i = 0; i < actual; i++) {
1565 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1566 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1567 		cfg->msix.msix_table[i].mte_vector = i + 1;
1568 	}
1569 
1570 	/* Update control register to enable MSI-X. */
1571 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1572 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1573 	    cfg->msix.msix_ctrl, 2);
1574 
1575 	/* Update counts of alloc'd messages. */
1576 	cfg->msix.msix_alloc = actual;
1577 	cfg->msix.msix_table_len = actual;
1578 	*count = actual;
1579 	return (0);
1580 }
1581 
1582 /*
1583  * By default, pci_alloc_msix() will assign the allocated IRQ
1584  * resources consecutively to the first N messages in the MSI-X table.
1585  * However, device drivers may want to use different layouts if they
1586  * either receive fewer messages than they asked for, or they wish to
1587  * populate the MSI-X table sparsely.  This method allows the driver
1588  * to specify what layout it wants.  It must be called after a
1589  * successful pci_alloc_msix() but before any of the associated
1590  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1591  *
1592  * The 'vectors' array contains 'count' message vectors.  The array
1593  * maps directly to the MSI-X table in that index 0 in the array
1594  * specifies the vector for the first message in the MSI-X table, etc.
1595  * The vector value in each array index can either be 0 to indicate
1596  * that no vector should be assigned to a message slot, or it can be a
1597  * number from 1 to N (where N is the count returned from a
1598  * succcessful call to pci_alloc_msix()) to indicate which message
1599  * vector (IRQ) to be used for the corresponding message.
1600  *
1601  * On successful return, each message with a non-zero vector will have
1602  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1603  * 1.  Additionally, if any of the IRQs allocated via the previous
1604  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1605  * will be kfreed back to the system automatically.
1606  *
1607  * For example, suppose a driver has a MSI-X table with 6 messages and
1608  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1609  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1610  * C.  After the call to pci_alloc_msix(), the device will be setup to
1611  * have an MSI-X table of ABC--- (where - means no vector assigned).
1612  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1613  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1614  * be kfreed back to the system.  This device will also have valid
1615  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1616  *
1617  * In any case, the SYS_RES_IRQ rid X will always map to the message
1618  * at MSI-X table index X - 1 and will only be valid if a vector is
1619  * assigned to that table entry.
1620  */
1621 int
1622 pci_remap_msix_method(device_t dev, device_t child, int count,
1623     const u_int *vectors)
1624 {
1625 	struct pci_devinfo *dinfo = device_get_ivars(child);
1626 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1627 	struct resource_list_entry *rle;
1628 	int i, irq, j, *used;
1629 
1630 	/*
1631 	 * Have to have at least one message in the table but the
1632 	 * table can't be bigger than the actual MSI-X table in the
1633 	 * device.
1634 	 */
1635 	if (count == 0 || count > msix->msix_msgnum)
1636 		return (EINVAL);
1637 
1638 	/* Sanity check the vectors. */
1639 	for (i = 0; i < count; i++)
1640 		if (vectors[i] > msix->msix_alloc)
1641 			return (EINVAL);
1642 
1643 	/*
1644 	 * Make sure there aren't any holes in the vectors to be used.
1645 	 * It's a big pain to support it, and it doesn't really make
1646 	 * sense anyway.  Also, at least one vector must be used.
1647 	 */
1648 	used = kmalloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1649 	    M_ZERO);
1650 	for (i = 0; i < count; i++)
1651 		if (vectors[i] != 0)
1652 			used[vectors[i] - 1] = 1;
1653 	for (i = 0; i < msix->msix_alloc - 1; i++)
1654 		if (used[i] == 0 && used[i + 1] == 1) {
1655 			kfree(used, M_DEVBUF);
1656 			return (EINVAL);
1657 		}
1658 	if (used[0] != 1) {
1659 		kfree(used, M_DEVBUF);
1660 		return (EINVAL);
1661 	}
1662 
1663 	/* Make sure none of the resources are allocated. */
1664 	for (i = 0; i < msix->msix_table_len; i++) {
1665 		if (msix->msix_table[i].mte_vector == 0)
1666 			continue;
1667 		if (msix->msix_table[i].mte_handlers > 0)
1668 			return (EBUSY);
1669 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1670 		KASSERT(rle != NULL, ("missing resource"));
1671 		if (rle->res != NULL)
1672 			return (EBUSY);
1673 	}
1674 
1675 	/* Free the existing resource list entries. */
1676 	for (i = 0; i < msix->msix_table_len; i++) {
1677 		if (msix->msix_table[i].mte_vector == 0)
1678 			continue;
1679 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1680 	}
1681 
1682 	/*
1683 	 * Build the new virtual table keeping track of which vectors are
1684 	 * used.
1685 	 */
1686 	kfree(msix->msix_table, M_DEVBUF);
1687 	msix->msix_table = kmalloc(sizeof(struct msix_table_entry) * count,
1688 	    M_DEVBUF, M_WAITOK | M_ZERO);
1689 	for (i = 0; i < count; i++)
1690 		msix->msix_table[i].mte_vector = vectors[i];
1691 	msix->msix_table_len = count;
1692 
1693 	/* Free any unused IRQs and resize the vectors array if necessary. */
1694 	j = msix->msix_alloc - 1;
1695 	if (used[j] == 0) {
1696 		struct msix_vector *vec;
1697 
1698 		while (used[j] == 0) {
1699 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1700 			    msix->msix_vectors[j].mv_irq);
1701 			j--;
1702 		}
1703 		vec = kmalloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1704 		    M_WAITOK);
1705 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1706 		    (j + 1));
1707 		kfree(msix->msix_vectors, M_DEVBUF);
1708 		msix->msix_vectors = vec;
1709 		msix->msix_alloc = j + 1;
1710 	}
1711 	kfree(used, M_DEVBUF);
1712 
1713 	/* Map the IRQs onto the rids. */
1714 	for (i = 0; i < count; i++) {
1715 		if (vectors[i] == 0)
1716 			continue;
1717 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1718 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1719 		    irq, 1, -1);
1720 	}
1721 
1722 	if (bootverbose) {
1723 		device_printf(child, "Remapped MSI-X IRQs as: ");
1724 		for (i = 0; i < count; i++) {
1725 			if (i != 0)
1726 				kprintf(", ");
1727 			if (vectors[i] == 0)
1728 				kprintf("---");
1729 			else
1730 				kprintf("%d",
1731 				    msix->msix_vectors[vectors[i]].mv_irq);
1732 		}
1733 		kprintf("\n");
1734 	}
1735 
1736 	return (0);
1737 }
1738 
1739 static int
1740 pci_release_msix(device_t dev, device_t child)
1741 {
1742 	struct pci_devinfo *dinfo = device_get_ivars(child);
1743 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1744 	struct resource_list_entry *rle;
1745 	int i;
1746 
1747 	/* Do we have any messages to release? */
1748 	if (msix->msix_alloc == 0)
1749 		return (ENODEV);
1750 
1751 	/* Make sure none of the resources are allocated. */
1752 	for (i = 0; i < msix->msix_table_len; i++) {
1753 		if (msix->msix_table[i].mte_vector == 0)
1754 			continue;
1755 		if (msix->msix_table[i].mte_handlers > 0)
1756 			return (EBUSY);
1757 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1758 		KASSERT(rle != NULL, ("missing resource"));
1759 		if (rle->res != NULL)
1760 			return (EBUSY);
1761 	}
1762 
1763 	/* Update control register to disable MSI-X. */
1764 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1765 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1766 	    msix->msix_ctrl, 2);
1767 
1768 	/* Free the resource list entries. */
1769 	for (i = 0; i < msix->msix_table_len; i++) {
1770 		if (msix->msix_table[i].mte_vector == 0)
1771 			continue;
1772 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1773 	}
1774 	kfree(msix->msix_table, M_DEVBUF);
1775 	msix->msix_table_len = 0;
1776 
1777 	/* Release the IRQs. */
1778 	for (i = 0; i < msix->msix_alloc; i++)
1779 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1780 		    msix->msix_vectors[i].mv_irq);
1781 	kfree(msix->msix_vectors, M_DEVBUF);
1782 	msix->msix_alloc = 0;
1783 	return (0);
1784 }
1785 
1786 /*
1787  * Return the max supported MSI-X messages this device supports.
1788  * Basically, assuming the MD code can alloc messages, this function
1789  * should return the maximum value that pci_alloc_msix() can return.
1790  * Thus, it is subject to the tunables, etc.
1791  */
1792 int
1793 pci_msix_count_method(device_t dev, device_t child)
1794 {
1795 	struct pci_devinfo *dinfo = device_get_ivars(child);
1796 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1797 
1798 	if (pci_do_msix && msix->msix_location != 0)
1799 		return (msix->msix_msgnum);
1800 	return (0);
1801 }
1802 
1803 /*
1804  * HyperTransport MSI mapping control
1805  */
1806 void
1807 pci_ht_map_msi(device_t dev, uint64_t addr)
1808 {
1809 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1810 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1811 
1812 	if (!ht->ht_msimap)
1813 		return;
1814 
1815 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1816 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1817 		/* Enable MSI -> HT mapping. */
1818 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1819 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1820 		    ht->ht_msictrl, 2);
1821 	}
1822 
1823 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1824 		/* Disable MSI -> HT mapping. */
1825 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1826 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1827 		    ht->ht_msictrl, 2);
1828 	}
1829 }
1830 
1831 /*
1832  * Support for MSI message signalled interrupts.
1833  */
1834 void
1835 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1836 {
1837 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1838 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1839 
1840 	/* Write data and address values. */
1841 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1842 	    address & 0xffffffff, 4);
1843 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1844 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1845 		    address >> 32, 4);
1846 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1847 		    data, 2);
1848 	} else
1849 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1850 		    2);
1851 
1852 	/* Enable MSI in the control register. */
1853 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1854 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1855 	    2);
1856 
1857 	/* Enable MSI -> HT mapping. */
1858 	pci_ht_map_msi(dev, address);
1859 }
1860 
1861 void
1862 pci_disable_msi(device_t dev)
1863 {
1864 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1865 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1866 
1867 	/* Disable MSI -> HT mapping. */
1868 	pci_ht_map_msi(dev, 0);
1869 
1870 	/* Disable MSI in the control register. */
1871 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1872 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1873 	    2);
1874 }
1875 
1876 /*
1877  * Restore MSI registers during resume.  If MSI is enabled then
1878  * restore the data and address registers in addition to the control
1879  * register.
1880  */
1881 static void
1882 pci_resume_msi(device_t dev)
1883 {
1884 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1885 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1886 	uint64_t address;
1887 	uint16_t data;
1888 
1889 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1890 		address = msi->msi_addr;
1891 		data = msi->msi_data;
1892 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1893 		    address & 0xffffffff, 4);
1894 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1895 			pci_write_config(dev, msi->msi_location +
1896 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1897 			pci_write_config(dev, msi->msi_location +
1898 			    PCIR_MSI_DATA_64BIT, data, 2);
1899 		} else
1900 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1901 			    data, 2);
1902 	}
1903 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1904 	    2);
1905 }
1906 
1907 int
1908 pci_remap_msi_irq(device_t dev, u_int irq)
1909 {
1910 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1911 	pcicfgregs *cfg = &dinfo->cfg;
1912 	struct resource_list_entry *rle;
1913 	struct msix_table_entry *mte;
1914 	struct msix_vector *mv;
1915 	device_t bus;
1916 	uint64_t addr;
1917 	uint32_t data;
1918 	int error, i, j;
1919 
1920 	bus = device_get_parent(dev);
1921 
1922 	/*
1923 	 * Handle MSI first.  We try to find this IRQ among our list
1924 	 * of MSI IRQs.  If we find it, we request updated address and
1925 	 * data registers and apply the results.
1926 	 */
1927 	if (cfg->msi.msi_alloc > 0) {
1928 
1929 		/* If we don't have any active handlers, nothing to do. */
1930 		if (cfg->msi.msi_handlers == 0)
1931 			return (0);
1932 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1933 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1934 			    i + 1);
1935 			if (rle->start == irq) {
1936 				error = PCIB_MAP_MSI(device_get_parent(bus),
1937 				    dev, irq, &addr, &data);
1938 				if (error)
1939 					return (error);
1940 				pci_disable_msi(dev);
1941 				dinfo->cfg.msi.msi_addr = addr;
1942 				dinfo->cfg.msi.msi_data = data;
1943 				pci_enable_msi(dev, addr, data);
1944 				return (0);
1945 			}
1946 		}
1947 		return (ENOENT);
1948 	}
1949 
1950 	/*
1951 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1952 	 * we request the updated mapping info.  If that works, we go
1953 	 * through all the slots that use this IRQ and update them.
1954 	 */
1955 	if (cfg->msix.msix_alloc > 0) {
1956 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1957 			mv = &cfg->msix.msix_vectors[i];
1958 			if (mv->mv_irq == irq) {
1959 				error = PCIB_MAP_MSI(device_get_parent(bus),
1960 				    dev, irq, &addr, &data);
1961 				if (error)
1962 					return (error);
1963 				mv->mv_address = addr;
1964 				mv->mv_data = data;
1965 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1966 					mte = &cfg->msix.msix_table[j];
1967 					if (mte->mte_vector != i + 1)
1968 						continue;
1969 					if (mte->mte_handlers == 0)
1970 						continue;
1971 					pci_mask_msix(dev, j);
1972 					pci_enable_msix(dev, j, addr, data);
1973 					pci_unmask_msix(dev, j);
1974 				}
1975 			}
1976 		}
1977 		return (ENOENT);
1978 	}
1979 
1980 	return (ENOENT);
1981 }
1982 
1983 /*
1984  * Returns true if the specified device is blacklisted because MSI
1985  * doesn't work.
1986  */
1987 int
1988 pci_msi_device_blacklisted(device_t dev)
1989 {
1990 	struct pci_quirk *q;
1991 
1992 	if (!pci_honor_msi_blacklist)
1993 		return (0);
1994 
1995 	for (q = &pci_quirks[0]; q->devid; q++) {
1996 		if (q->devid == pci_get_devid(dev) &&
1997 		    q->type == PCI_QUIRK_DISABLE_MSI)
1998 			return (1);
1999 	}
2000 	return (0);
2001 }
2002 
2003 /*
2004  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2005  * we just check for blacklisted chipsets as represented by the
2006  * host-PCI bridge at device 0:0:0.  In the future, it may become
2007  * necessary to check other system attributes, such as the kenv values
2008  * that give the motherboard manufacturer and model number.
2009  */
2010 static int
2011 pci_msi_blacklisted(void)
2012 {
2013 	device_t dev;
2014 
2015 	if (!pci_honor_msi_blacklist)
2016 		return (0);
2017 
2018 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2019 	if (!(pcie_chipset || pcix_chipset))
2020 		return (1);
2021 
2022 	dev = pci_find_bsf(0, 0, 0);
2023 	if (dev != NULL)
2024 		return (pci_msi_device_blacklisted(dev));
2025 	return (0);
2026 }
2027 
2028 /*
2029  * Attempt to allocate *count MSI messages.  The actual number allocated is
2030  * returned in *count.  After this function returns, each message will be
2031  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2032  */
2033 int
2034 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2035 {
2036 	struct pci_devinfo *dinfo = device_get_ivars(child);
2037 	pcicfgregs *cfg = &dinfo->cfg;
2038 	struct resource_list_entry *rle;
2039 	int actual, error, i, irqs[32];
2040 	uint16_t ctrl;
2041 
2042 	/* Don't let count == 0 get us into trouble. */
2043 	if (*count == 0)
2044 		return (EINVAL);
2045 
2046 	/* If rid 0 is allocated, then fail. */
2047 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2048 	if (rle != NULL && rle->res != NULL)
2049 		return (ENXIO);
2050 
2051 	/* Already have allocated messages? */
2052 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2053 		return (ENXIO);
2054 
2055 	/* If MSI is blacklisted for this system, fail. */
2056 	if (pci_msi_blacklisted())
2057 		return (ENXIO);
2058 
2059 	/* MSI capability present? */
2060 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2061 		return (ENODEV);
2062 
2063 	if (bootverbose)
2064 		device_printf(child,
2065 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2066 		    *count, cfg->msi.msi_msgnum);
2067 
2068 	/* Don't ask for more than the device supports. */
2069 	actual = min(*count, cfg->msi.msi_msgnum);
2070 
2071 	/* Don't ask for more than 32 messages. */
2072 	actual = min(actual, 32);
2073 
2074 	/* MSI requires power of 2 number of messages. */
2075 	if (!powerof2(actual))
2076 		return (EINVAL);
2077 
2078 	for (;;) {
2079 		/* Try to allocate N messages. */
2080 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2081 		    cfg->msi.msi_msgnum, irqs);
2082 		if (error == 0)
2083 			break;
2084 		if (actual == 1)
2085 			return (error);
2086 
2087 		/* Try N / 2. */
2088 		actual >>= 1;
2089 	}
2090 
2091 	/*
2092 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2093 	 * resources in the irqs[] array, so add new resources
2094 	 * starting at rid 1.
2095 	 */
2096 	for (i = 0; i < actual; i++)
2097 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2098 		    irqs[i], irqs[i], 1, -1);
2099 
2100 	if (bootverbose) {
2101 		if (actual == 1)
2102 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2103 		else {
2104 			int run;
2105 
2106 			/*
2107 			 * Be fancy and try to print contiguous runs
2108 			 * of IRQ values as ranges.  'run' is true if
2109 			 * we are in a range.
2110 			 */
2111 			device_printf(child, "using IRQs %d", irqs[0]);
2112 			run = 0;
2113 			for (i = 1; i < actual; i++) {
2114 
2115 				/* Still in a run? */
2116 				if (irqs[i] == irqs[i - 1] + 1) {
2117 					run = 1;
2118 					continue;
2119 				}
2120 
2121 				/* Finish previous range. */
2122 				if (run) {
2123 					kprintf("-%d", irqs[i - 1]);
2124 					run = 0;
2125 				}
2126 
2127 				/* Start new range. */
2128 				kprintf(",%d", irqs[i]);
2129 			}
2130 
2131 			/* Unfinished range? */
2132 			if (run)
2133 				kprintf("-%d", irqs[actual - 1]);
2134 			kprintf(" for MSI\n");
2135 		}
2136 	}
2137 
2138 	/* Update control register with actual count. */
2139 	ctrl = cfg->msi.msi_ctrl;
2140 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2141 	ctrl |= (ffs(actual) - 1) << 4;
2142 	cfg->msi.msi_ctrl = ctrl;
2143 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2144 
2145 	/* Update counts of alloc'd messages. */
2146 	cfg->msi.msi_alloc = actual;
2147 	cfg->msi.msi_handlers = 0;
2148 	*count = actual;
2149 	return (0);
2150 }
2151 
2152 /* Release the MSI messages associated with this device. */
2153 int
2154 pci_release_msi_method(device_t dev, device_t child)
2155 {
2156 	struct pci_devinfo *dinfo = device_get_ivars(child);
2157 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2158 	struct resource_list_entry *rle;
2159 	int error, i, irqs[32];
2160 
2161 	/* Try MSI-X first. */
2162 	error = pci_release_msix(dev, child);
2163 	if (error != ENODEV)
2164 		return (error);
2165 
2166 	/* Do we have any messages to release? */
2167 	if (msi->msi_alloc == 0)
2168 		return (ENODEV);
2169 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2170 
2171 	/* Make sure none of the resources are allocated. */
2172 	if (msi->msi_handlers > 0)
2173 		return (EBUSY);
2174 	for (i = 0; i < msi->msi_alloc; i++) {
2175 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2176 		KASSERT(rle != NULL, ("missing MSI resource"));
2177 		if (rle->res != NULL)
2178 			return (EBUSY);
2179 		irqs[i] = rle->start;
2180 	}
2181 
2182 	/* Update control register with 0 count. */
2183 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2184 	    ("%s: MSI still enabled", __func__));
2185 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2186 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2187 	    msi->msi_ctrl, 2);
2188 
2189 	/* Release the messages. */
2190 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2191 	for (i = 0; i < msi->msi_alloc; i++)
2192 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2193 
2194 	/* Update alloc count. */
2195 	msi->msi_alloc = 0;
2196 	msi->msi_addr = 0;
2197 	msi->msi_data = 0;
2198 	return (0);
2199 }
2200 
2201 /*
2202  * Return the max supported MSI messages this device supports.
2203  * Basically, assuming the MD code can alloc messages, this function
2204  * should return the maximum value that pci_alloc_msi() can return.
2205  * Thus, it is subject to the tunables, etc.
2206  */
2207 int
2208 pci_msi_count_method(device_t dev, device_t child)
2209 {
2210 	struct pci_devinfo *dinfo = device_get_ivars(child);
2211 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2212 
2213 	if (pci_do_msi && msi->msi_location != 0)
2214 		return (msi->msi_msgnum);
2215 	return (0);
2216 }
2217 
2218 /* kfree pcicfgregs structure and all depending data structures */
2219 
2220 int
2221 pci_freecfg(struct pci_devinfo *dinfo)
2222 {
2223 	struct devlist *devlist_head;
2224 	int i;
2225 
2226 	devlist_head = &pci_devq;
2227 
2228 	if (dinfo->cfg.vpd.vpd_reg) {
2229 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2230 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2231 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2232 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2233 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2234 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2235 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2236 	}
2237 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2238 	kfree(dinfo, M_DEVBUF);
2239 
2240 	/* increment the generation count */
2241 	pci_generation++;
2242 
2243 	/* we're losing one device */
2244 	pci_numdevs--;
2245 	return (0);
2246 }
2247 
2248 /*
2249  * PCI power manangement
2250  */
2251 int
2252 pci_set_powerstate_method(device_t dev, device_t child, int state)
2253 {
2254 	struct pci_devinfo *dinfo = device_get_ivars(child);
2255 	pcicfgregs *cfg = &dinfo->cfg;
2256 	uint16_t status;
2257 	int result, oldstate, highest, delay;
2258 
2259 	if (cfg->pp.pp_cap == 0)
2260 		return (EOPNOTSUPP);
2261 
2262 	/*
2263 	 * Optimize a no state change request away.  While it would be OK to
2264 	 * write to the hardware in theory, some devices have shown odd
2265 	 * behavior when going from D3 -> D3.
2266 	 */
2267 	oldstate = pci_get_powerstate(child);
2268 	if (oldstate == state)
2269 		return (0);
2270 
2271 	/*
2272 	 * The PCI power management specification states that after a state
2273 	 * transition between PCI power states, system software must
2274 	 * guarantee a minimal delay before the function accesses the device.
2275 	 * Compute the worst case delay that we need to guarantee before we
2276 	 * access the device.  Many devices will be responsive much more
2277 	 * quickly than this delay, but there are some that don't respond
2278 	 * instantly to state changes.  Transitions to/from D3 state require
2279 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2280 	 * is done below with DELAY rather than a sleeper function because
2281 	 * this function can be called from contexts where we cannot sleep.
2282 	 */
2283 	highest = (oldstate > state) ? oldstate : state;
2284 	if (highest == PCI_POWERSTATE_D3)
2285 	    delay = 10000;
2286 	else if (highest == PCI_POWERSTATE_D2)
2287 	    delay = 200;
2288 	else
2289 	    delay = 0;
2290 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2291 	    & ~PCIM_PSTAT_DMASK;
2292 	result = 0;
2293 	switch (state) {
2294 	case PCI_POWERSTATE_D0:
2295 		status |= PCIM_PSTAT_D0;
2296 		break;
2297 	case PCI_POWERSTATE_D1:
2298 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2299 			return (EOPNOTSUPP);
2300 		status |= PCIM_PSTAT_D1;
2301 		break;
2302 	case PCI_POWERSTATE_D2:
2303 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2304 			return (EOPNOTSUPP);
2305 		status |= PCIM_PSTAT_D2;
2306 		break;
2307 	case PCI_POWERSTATE_D3:
2308 		status |= PCIM_PSTAT_D3;
2309 		break;
2310 	default:
2311 		return (EINVAL);
2312 	}
2313 
2314 	if (bootverbose)
2315 		kprintf(
2316 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2317 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2318 		    dinfo->cfg.func, oldstate, state);
2319 
2320 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2321 	if (delay)
2322 		DELAY(delay);
2323 	return (0);
2324 }
2325 
2326 int
2327 pci_get_powerstate_method(device_t dev, device_t child)
2328 {
2329 	struct pci_devinfo *dinfo = device_get_ivars(child);
2330 	pcicfgregs *cfg = &dinfo->cfg;
2331 	uint16_t status;
2332 	int result;
2333 
2334 	if (cfg->pp.pp_cap != 0) {
2335 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2336 		switch (status & PCIM_PSTAT_DMASK) {
2337 		case PCIM_PSTAT_D0:
2338 			result = PCI_POWERSTATE_D0;
2339 			break;
2340 		case PCIM_PSTAT_D1:
2341 			result = PCI_POWERSTATE_D1;
2342 			break;
2343 		case PCIM_PSTAT_D2:
2344 			result = PCI_POWERSTATE_D2;
2345 			break;
2346 		case PCIM_PSTAT_D3:
2347 			result = PCI_POWERSTATE_D3;
2348 			break;
2349 		default:
2350 			result = PCI_POWERSTATE_UNKNOWN;
2351 			break;
2352 		}
2353 	} else {
2354 		/* No support, device is always at D0 */
2355 		result = PCI_POWERSTATE_D0;
2356 	}
2357 	return (result);
2358 }
2359 
2360 /*
2361  * Some convenience functions for PCI device drivers.
2362  */
2363 
2364 static __inline void
2365 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2366 {
2367 	uint16_t	command;
2368 
2369 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2370 	command |= bit;
2371 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2372 }
2373 
2374 static __inline void
2375 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2376 {
2377 	uint16_t	command;
2378 
2379 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2380 	command &= ~bit;
2381 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2382 }
2383 
2384 int
2385 pci_enable_busmaster_method(device_t dev, device_t child)
2386 {
2387 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2388 	return (0);
2389 }
2390 
2391 int
2392 pci_disable_busmaster_method(device_t dev, device_t child)
2393 {
2394 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2395 	return (0);
2396 }
2397 
2398 int
2399 pci_enable_io_method(device_t dev, device_t child, int space)
2400 {
2401 	uint16_t command;
2402 	uint16_t bit;
2403 	char *error;
2404 
2405 	bit = 0;
2406 	error = NULL;
2407 
2408 	switch(space) {
2409 	case SYS_RES_IOPORT:
2410 		bit = PCIM_CMD_PORTEN;
2411 		error = "port";
2412 		break;
2413 	case SYS_RES_MEMORY:
2414 		bit = PCIM_CMD_MEMEN;
2415 		error = "memory";
2416 		break;
2417 	default:
2418 		return (EINVAL);
2419 	}
2420 	pci_set_command_bit(dev, child, bit);
2421 	/* Some devices seem to need a brief stall here, what do to? */
2422 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2423 	if (command & bit)
2424 		return (0);
2425 	device_printf(child, "failed to enable %s mapping!\n", error);
2426 	return (ENXIO);
2427 }
2428 
2429 int
2430 pci_disable_io_method(device_t dev, device_t child, int space)
2431 {
2432 	uint16_t command;
2433 	uint16_t bit;
2434 	char *error;
2435 
2436 	bit = 0;
2437 	error = NULL;
2438 
2439 	switch(space) {
2440 	case SYS_RES_IOPORT:
2441 		bit = PCIM_CMD_PORTEN;
2442 		error = "port";
2443 		break;
2444 	case SYS_RES_MEMORY:
2445 		bit = PCIM_CMD_MEMEN;
2446 		error = "memory";
2447 		break;
2448 	default:
2449 		return (EINVAL);
2450 	}
2451 	pci_clear_command_bit(dev, child, bit);
2452 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2453 	if (command & bit) {
2454 		device_printf(child, "failed to disable %s mapping!\n", error);
2455 		return (ENXIO);
2456 	}
2457 	return (0);
2458 }
2459 
2460 /*
2461  * New style pci driver.  Parent device is either a pci-host-bridge or a
2462  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2463  */
2464 
2465 void
2466 pci_print_verbose(struct pci_devinfo *dinfo)
2467 {
2468 
2469 	if (bootverbose) {
2470 		pcicfgregs *cfg = &dinfo->cfg;
2471 
2472 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2473 		    cfg->vendor, cfg->device, cfg->revid);
2474 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2475 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2476 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2477 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2478 		    cfg->mfdev);
2479 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2480 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2481 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2482 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2483 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2484 		if (cfg->intpin > 0)
2485 			kprintf("\tintpin=%c, irq=%d\n",
2486 			    cfg->intpin +'a' -1, cfg->intline);
2487 		if (cfg->pp.pp_cap) {
2488 			uint16_t status;
2489 
2490 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2491 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2492 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2493 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2494 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2495 			    status & PCIM_PSTAT_DMASK);
2496 		}
2497 		if (cfg->msi.msi_location) {
2498 			int ctrl;
2499 
2500 			ctrl = cfg->msi.msi_ctrl;
2501 			kprintf("\tMSI supports %d message%s%s%s\n",
2502 			    cfg->msi.msi_msgnum,
2503 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2504 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2505 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2506 		}
2507 		if (cfg->msix.msix_location) {
2508 			kprintf("\tMSI-X supports %d message%s ",
2509 			    cfg->msix.msix_msgnum,
2510 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2511 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2512 				kprintf("in map 0x%x\n",
2513 				    cfg->msix.msix_table_bar);
2514 			else
2515 				kprintf("in maps 0x%x and 0x%x\n",
2516 				    cfg->msix.msix_table_bar,
2517 				    cfg->msix.msix_pba_bar);
2518 		}
2519 		pci_print_verbose_expr(cfg);
2520 	}
2521 }
2522 
2523 static void
2524 pci_print_verbose_expr(const pcicfgregs *cfg)
2525 {
2526 	const struct pcicfg_expr *expr = &cfg->expr;
2527 	const char *port_name;
2528 	uint16_t port_type;
2529 
2530 	if (!bootverbose)
2531 		return;
2532 
2533 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2534 		return;
2535 
2536 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2537 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2538 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2539 		goto back;
2540 
2541 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2542 
2543 	switch (port_type) {
2544 	case PCIE_END_POINT:
2545 		port_name = "DEVICE";
2546 		break;
2547 	case PCIE_LEG_END_POINT:
2548 		port_name = "LEGDEV";
2549 		break;
2550 	case PCIE_ROOT_PORT:
2551 		port_name = "ROOT";
2552 		break;
2553 	case PCIE_UP_STREAM_PORT:
2554 		port_name = "UPSTREAM";
2555 		break;
2556 	case PCIE_DOWN_STREAM_PORT:
2557 		port_name = "DOWNSTRM";
2558 		break;
2559 	case PCIE_PCIE2PCI_BRIDGE:
2560 		port_name = "PCIE2PCI";
2561 		break;
2562 	case PCIE_PCI2PCIE_BRIDGE:
2563 		port_name = "PCI2PCIE";
2564 		break;
2565 	default:
2566 		port_name = NULL;
2567 		break;
2568 	}
2569 	if ((port_type == PCIE_ROOT_PORT ||
2570 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2571 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2572 		port_name = NULL;
2573 	if (port_name != NULL)
2574 		kprintf("[%s]", port_name);
2575 
2576 	if (pcie_slotimpl(cfg)) {
2577 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2578 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2579 			kprintf("[HOTPLUG]");
2580 	}
2581 back:
2582 	kprintf("\n");
2583 }
2584 
2585 static int
2586 pci_porten(device_t pcib, int b, int s, int f)
2587 {
2588 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2589 		& PCIM_CMD_PORTEN) != 0;
2590 }
2591 
2592 static int
2593 pci_memen(device_t pcib, int b, int s, int f)
2594 {
2595 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2596 		& PCIM_CMD_MEMEN) != 0;
2597 }
2598 
2599 /*
2600  * Add a resource based on a pci map register. Return 1 if the map
2601  * register is a 32bit map register or 2 if it is a 64bit register.
2602  */
2603 static int
2604 pci_add_map(device_t pcib, device_t bus, device_t dev,
2605     int b, int s, int f, int reg, struct resource_list *rl, int force,
2606     int prefetch)
2607 {
2608 	uint32_t map;
2609 	pci_addr_t base;
2610 	pci_addr_t start, end, count;
2611 	uint8_t ln2size;
2612 	uint8_t ln2range;
2613 	uint32_t testval;
2614 	uint16_t cmd;
2615 	int type;
2616 	int barlen;
2617 	struct resource *res;
2618 
2619 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2620 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2621 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2622 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2623 
2624 	if (PCI_BAR_MEM(map)) {
2625 		type = SYS_RES_MEMORY;
2626 		if (map & PCIM_BAR_MEM_PREFETCH)
2627 			prefetch = 1;
2628 	} else
2629 		type = SYS_RES_IOPORT;
2630 	ln2size = pci_mapsize(testval);
2631 	ln2range = pci_maprange(testval);
2632 	base = pci_mapbase(map);
2633 	barlen = ln2range == 64 ? 2 : 1;
2634 
2635 	/*
2636 	 * For I/O registers, if bottom bit is set, and the next bit up
2637 	 * isn't clear, we know we have a BAR that doesn't conform to the
2638 	 * spec, so ignore it.  Also, sanity check the size of the data
2639 	 * areas to the type of memory involved.  Memory must be at least
2640 	 * 16 bytes in size, while I/O ranges must be at least 4.
2641 	 */
2642 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2643 		return (barlen);
2644 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2645 	    (type == SYS_RES_IOPORT && ln2size < 2))
2646 		return (barlen);
2647 
2648 	if (ln2range == 64)
2649 		/* Read the other half of a 64bit map register */
2650 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2651 	if (bootverbose) {
2652 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2653 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2654 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2655 			kprintf(", port disabled\n");
2656 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2657 			kprintf(", memory disabled\n");
2658 		else
2659 			kprintf(", enabled\n");
2660 	}
2661 
2662 	/*
2663 	 * If base is 0, then we have problems.  It is best to ignore
2664 	 * such entries for the moment.  These will be allocated later if
2665 	 * the driver specifically requests them.  However, some
2666 	 * removable busses look better when all resources are allocated,
2667 	 * so allow '0' to be overriden.
2668 	 *
2669 	 * Similarly treat maps whose values is the same as the test value
2670 	 * read back.  These maps have had all f's written to them by the
2671 	 * BIOS in an attempt to disable the resources.
2672 	 */
2673 	if (!force && (base == 0 || map == testval))
2674 		return (barlen);
2675 	if ((u_long)base != base) {
2676 		device_printf(bus,
2677 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2678 		    pci_get_domain(dev), b, s, f, reg);
2679 		return (barlen);
2680 	}
2681 
2682 	/*
2683 	 * This code theoretically does the right thing, but has
2684 	 * undesirable side effects in some cases where peripherals
2685 	 * respond oddly to having these bits enabled.  Let the user
2686 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2687 	 * default).
2688 	 */
2689 	if (pci_enable_io_modes) {
2690 		/* Turn on resources that have been left off by a lazy BIOS */
2691 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2692 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2693 			cmd |= PCIM_CMD_PORTEN;
2694 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2695 		}
2696 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2697 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2698 			cmd |= PCIM_CMD_MEMEN;
2699 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2700 		}
2701 	} else {
2702 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2703 			return (barlen);
2704 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2705 			return (barlen);
2706 	}
2707 
2708 	count = 1 << ln2size;
2709 	if (base == 0 || base == pci_mapbase(testval)) {
2710 		start = 0;	/* Let the parent decide. */
2711 		end = ~0ULL;
2712 	} else {
2713 		start = base;
2714 		end = base + (1 << ln2size) - 1;
2715 	}
2716 	resource_list_add(rl, type, reg, start, end, count, -1);
2717 
2718 	/*
2719 	 * Try to allocate the resource for this BAR from our parent
2720 	 * so that this resource range is already reserved.  The
2721 	 * driver for this device will later inherit this resource in
2722 	 * pci_alloc_resource().
2723 	 */
2724 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2725 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2726 	if (res == NULL) {
2727 		/*
2728 		 * If the allocation fails, delete the resource list
2729 		 * entry to force pci_alloc_resource() to allocate
2730 		 * resources from the parent.
2731 		 */
2732 		resource_list_delete(rl, type, reg);
2733 #ifdef PCI_BAR_CLEAR
2734 		/* Clear the BAR */
2735 		start = 0;
2736 #else	/* !PCI_BAR_CLEAR */
2737 		/*
2738 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2739 		 * PCI function, clearing the BAR causes HPET timer
2740 		 * stop ticking.
2741 		 */
2742 		if (bootverbose) {
2743 			kprintf("pci:%d:%d:%d: resource reservation failed "
2744 				"%#jx - %#jx\n", b, s, f,
2745 				(intmax_t)start, (intmax_t)end);
2746 		}
2747 		return (barlen);
2748 #endif	/* PCI_BAR_CLEAR */
2749 	} else {
2750 		start = rman_get_start(res);
2751 	}
2752 	pci_write_config(dev, reg, start, 4);
2753 	if (ln2range == 64)
2754 		pci_write_config(dev, reg + 4, start >> 32, 4);
2755 	return (barlen);
2756 }
2757 
2758 /*
2759  * For ATA devices we need to decide early what addressing mode to use.
2760  * Legacy demands that the primary and secondary ATA ports sits on the
2761  * same addresses that old ISA hardware did. This dictates that we use
2762  * those addresses and ignore the BAR's if we cannot set PCI native
2763  * addressing mode.
2764  */
2765 static void
2766 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2767     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2768 {
2769 	int rid, type, progif;
2770 #if 0
2771 	/* if this device supports PCI native addressing use it */
2772 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2773 	if ((progif & 0x8a) == 0x8a) {
2774 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2775 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2776 			kprintf("Trying ATA native PCI addressing mode\n");
2777 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2778 		}
2779 	}
2780 #endif
2781 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2782 	type = SYS_RES_IOPORT;
2783 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2784 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2785 		    prefetchmask & (1 << 0));
2786 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2787 		    prefetchmask & (1 << 1));
2788 	} else {
2789 		rid = PCIR_BAR(0);
2790 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2791 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2792 		    0, -1);
2793 		rid = PCIR_BAR(1);
2794 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2795 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2796 		    0, -1);
2797 	}
2798 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2799 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2800 		    prefetchmask & (1 << 2));
2801 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2802 		    prefetchmask & (1 << 3));
2803 	} else {
2804 		rid = PCIR_BAR(2);
2805 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2806 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2807 		    0, -1);
2808 		rid = PCIR_BAR(3);
2809 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2810 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2811 		    0, -1);
2812 	}
2813 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2814 	    prefetchmask & (1 << 4));
2815 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2816 	    prefetchmask & (1 << 5));
2817 }
2818 
2819 static void
2820 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2821 {
2822 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2823 	pcicfgregs *cfg = &dinfo->cfg;
2824 	char tunable_name[64];
2825 	int irq;
2826 
2827 	/* Has to have an intpin to have an interrupt. */
2828 	if (cfg->intpin == 0)
2829 		return;
2830 
2831 	/* Let the user override the IRQ with a tunable. */
2832 	irq = PCI_INVALID_IRQ;
2833 	ksnprintf(tunable_name, sizeof(tunable_name),
2834 	    "hw.pci%d.%d.%d.INT%c.irq",
2835 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2836 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2837 		irq = PCI_INVALID_IRQ;
2838 
2839 	/*
2840 	 * If we didn't get an IRQ via the tunable, then we either use the
2841 	 * IRQ value in the intline register or we ask the bus to route an
2842 	 * interrupt for us.  If force_route is true, then we only use the
2843 	 * value in the intline register if the bus was unable to assign an
2844 	 * IRQ.
2845 	 */
2846 	if (!PCI_INTERRUPT_VALID(irq)) {
2847 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2848 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2849 		if (!PCI_INTERRUPT_VALID(irq))
2850 			irq = cfg->intline;
2851 	}
2852 
2853 	/* If after all that we don't have an IRQ, just bail. */
2854 	if (!PCI_INTERRUPT_VALID(irq))
2855 		return;
2856 
2857 	/* Update the config register if it changed. */
2858 	if (irq != cfg->intline) {
2859 		cfg->intline = irq;
2860 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2861 	}
2862 
2863 	/* Add this IRQ as rid 0 interrupt resource. */
2864 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2865 	    machintr_intr_cpuid(irq));
2866 }
2867 
2868 void
2869 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2870 {
2871 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2872 	pcicfgregs *cfg = &dinfo->cfg;
2873 	struct resource_list *rl = &dinfo->resources;
2874 	struct pci_quirk *q;
2875 	int b, i, f, s;
2876 
2877 	b = cfg->bus;
2878 	s = cfg->slot;
2879 	f = cfg->func;
2880 
2881 	/* ATA devices needs special map treatment */
2882 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2883 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2884 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2885 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2886 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2887 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2888 	else
2889 		for (i = 0; i < cfg->nummaps;)
2890 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2891 			    rl, force, prefetchmask & (1 << i));
2892 
2893 	/*
2894 	 * Add additional, quirked resources.
2895 	 */
2896 	for (q = &pci_quirks[0]; q->devid; q++) {
2897 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2898 		    && q->type == PCI_QUIRK_MAP_REG)
2899 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2900 			  force, 0);
2901 	}
2902 
2903 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2904 		/*
2905 		 * Try to re-route interrupts. Sometimes the BIOS or
2906 		 * firmware may leave bogus values in these registers.
2907 		 * If the re-route fails, then just stick with what we
2908 		 * have.
2909 		 */
2910 		pci_assign_interrupt(bus, dev, 1);
2911 	}
2912 }
2913 
2914 void
2915 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2916 {
2917 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2918 	device_t pcib = device_get_parent(dev);
2919 	struct pci_devinfo *dinfo;
2920 	int maxslots;
2921 	int s, f, pcifunchigh;
2922 	uint8_t hdrtype;
2923 
2924 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2925 	    ("dinfo_size too small"));
2926 	maxslots = PCIB_MAXSLOTS(pcib);
2927 	for (s = 0; s <= maxslots; s++) {
2928 		pcifunchigh = 0;
2929 		f = 0;
2930 		DELAY(1);
2931 		hdrtype = REG(PCIR_HDRTYPE, 1);
2932 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2933 			continue;
2934 		if (hdrtype & PCIM_MFDEV)
2935 			pcifunchigh = PCI_FUNCMAX;
2936 		for (f = 0; f <= pcifunchigh; f++) {
2937 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2938 			    dinfo_size);
2939 			if (dinfo != NULL) {
2940 				pci_add_child(dev, dinfo);
2941 			}
2942 		}
2943 	}
2944 #undef REG
2945 }
2946 
2947 void
2948 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2949 {
2950 	device_t pcib;
2951 
2952 	pcib = device_get_parent(bus);
2953 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2954 	device_set_ivars(dinfo->cfg.dev, dinfo);
2955 	resource_list_init(&dinfo->resources);
2956 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2957 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2958 	pci_print_verbose(dinfo);
2959 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2960 }
2961 
2962 static int
2963 pci_probe(device_t dev)
2964 {
2965 	device_set_desc(dev, "PCI bus");
2966 
2967 	/* Allow other subclasses to override this driver. */
2968 	return (-1000);
2969 }
2970 
2971 static int
2972 pci_attach(device_t dev)
2973 {
2974 	int busno, domain;
2975 
2976 	/*
2977 	 * Since there can be multiple independantly numbered PCI
2978 	 * busses on systems with multiple PCI domains, we can't use
2979 	 * the unit number to decide which bus we are probing. We ask
2980 	 * the parent pcib what our domain and bus numbers are.
2981 	 */
2982 	domain = pcib_get_domain(dev);
2983 	busno = pcib_get_bus(dev);
2984 	if (bootverbose)
2985 		device_printf(dev, "domain=%d, physical bus=%d\n",
2986 		    domain, busno);
2987 
2988 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2989 
2990 	return (bus_generic_attach(dev));
2991 }
2992 
2993 int
2994 pci_suspend(device_t dev)
2995 {
2996 	int dstate, error, i, numdevs;
2997 	device_t acpi_dev, child, *devlist;
2998 	struct pci_devinfo *dinfo;
2999 
3000 	/*
3001 	 * Save the PCI configuration space for each child and set the
3002 	 * device in the appropriate power state for this sleep state.
3003 	 */
3004 	acpi_dev = NULL;
3005 	if (pci_do_power_resume)
3006 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3007 	device_get_children(dev, &devlist, &numdevs);
3008 	for (i = 0; i < numdevs; i++) {
3009 		child = devlist[i];
3010 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3011 		pci_cfg_save(child, dinfo, 0);
3012 	}
3013 
3014 	/* Suspend devices before potentially powering them down. */
3015 	error = bus_generic_suspend(dev);
3016 	if (error) {
3017 		kfree(devlist, M_TEMP);
3018 		return (error);
3019 	}
3020 
3021 	/*
3022 	 * Always set the device to D3.  If ACPI suggests a different
3023 	 * power state, use it instead.  If ACPI is not present, the
3024 	 * firmware is responsible for managing device power.  Skip
3025 	 * children who aren't attached since they are powered down
3026 	 * separately.  Only manage type 0 devices for now.
3027 	 */
3028 	for (i = 0; acpi_dev && i < numdevs; i++) {
3029 		child = devlist[i];
3030 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3031 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3032 			dstate = PCI_POWERSTATE_D3;
3033 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3034 			pci_set_powerstate(child, dstate);
3035 		}
3036 	}
3037 	kfree(devlist, M_TEMP);
3038 	return (0);
3039 }
3040 
3041 int
3042 pci_resume(device_t dev)
3043 {
3044 	int i, numdevs;
3045 	device_t acpi_dev, child, *devlist;
3046 	struct pci_devinfo *dinfo;
3047 
3048 	/*
3049 	 * Set each child to D0 and restore its PCI configuration space.
3050 	 */
3051 	acpi_dev = NULL;
3052 	if (pci_do_power_resume)
3053 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3054 	device_get_children(dev, &devlist, &numdevs);
3055 	for (i = 0; i < numdevs; i++) {
3056 		/*
3057 		 * Notify ACPI we're going to D0 but ignore the result.  If
3058 		 * ACPI is not present, the firmware is responsible for
3059 		 * managing device power.  Only manage type 0 devices for now.
3060 		 */
3061 		child = devlist[i];
3062 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3063 		if (acpi_dev && device_is_attached(child) &&
3064 		    dinfo->cfg.hdrtype == 0) {
3065 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3066 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3067 		}
3068 
3069 		/* Now the device is powered up, restore its config space. */
3070 		pci_cfg_restore(child, dinfo);
3071 	}
3072 	kfree(devlist, M_TEMP);
3073 	return (bus_generic_resume(dev));
3074 }
3075 
3076 static void
3077 pci_load_vendor_data(void)
3078 {
3079 	caddr_t vendordata, info;
3080 
3081 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3082 		info = preload_search_info(vendordata, MODINFO_ADDR);
3083 		pci_vendordata = *(char **)info;
3084 		info = preload_search_info(vendordata, MODINFO_SIZE);
3085 		pci_vendordata_size = *(size_t *)info;
3086 		/* terminate the database */
3087 		pci_vendordata[pci_vendordata_size] = '\n';
3088 	}
3089 }
3090 
3091 void
3092 pci_driver_added(device_t dev, driver_t *driver)
3093 {
3094 	int numdevs;
3095 	device_t *devlist;
3096 	device_t child;
3097 	struct pci_devinfo *dinfo;
3098 	int i;
3099 
3100 	if (bootverbose)
3101 		device_printf(dev, "driver added\n");
3102 	DEVICE_IDENTIFY(driver, dev);
3103 	device_get_children(dev, &devlist, &numdevs);
3104 	for (i = 0; i < numdevs; i++) {
3105 		child = devlist[i];
3106 		if (device_get_state(child) != DS_NOTPRESENT)
3107 			continue;
3108 		dinfo = device_get_ivars(child);
3109 		pci_print_verbose(dinfo);
3110 		if (bootverbose)
3111 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3112 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3113 			    dinfo->cfg.func);
3114 		pci_cfg_restore(child, dinfo);
3115 		if (device_probe_and_attach(child) != 0)
3116 			pci_cfg_save(child, dinfo, 1);
3117 	}
3118 	kfree(devlist, M_TEMP);
3119 }
3120 
3121 static void
3122 pci_child_detached(device_t parent __unused, device_t child)
3123 {
3124 	/* Turn child's power off */
3125 	pci_cfg_save(child, device_get_ivars(child), 1);
3126 }
3127 
3128 int
3129 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3130     driver_intr_t *intr, void *arg, void **cookiep, lwkt_serialize_t serializer)
3131 {
3132 #ifdef MSI
3133 	struct pci_devinfo *dinfo;
3134 	struct msix_table_entry *mte;
3135 	struct msix_vector *mv;
3136 	uint64_t addr;
3137 	uint32_t data;
3138 	int rid;
3139 #endif
3140 	int error;
3141 	void *cookie;
3142 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3143 	    arg, &cookie, serializer);
3144 	if (error)
3145 		return (error);
3146 
3147 	/* If this is not a direct child, just bail out. */
3148 	if (device_get_parent(child) != dev) {
3149 		*cookiep = cookie;
3150 		return(0);
3151 	}
3152 
3153 	pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3154 #ifdef MSI
3155 	rid = rman_get_rid(irq);
3156 	if (rid == 0) {
3157 		/* Make sure that INTx is enabled */
3158 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3159 	} else {
3160 		/*
3161 		 * Check to see if the interrupt is MSI or MSI-X.
3162 		 * Ask our parent to map the MSI and give
3163 		 * us the address and data register values.
3164 		 * If we fail for some reason, teardown the
3165 		 * interrupt handler.
3166 		 */
3167 		dinfo = device_get_ivars(child);
3168 		if (dinfo->cfg.msi.msi_alloc > 0) {
3169 			if (dinfo->cfg.msi.msi_addr == 0) {
3170 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3171 			    ("MSI has handlers, but vectors not mapped"));
3172 				error = PCIB_MAP_MSI(device_get_parent(dev),
3173 				    child, rman_get_start(irq), &addr, &data);
3174 				if (error)
3175 					goto bad;
3176 				dinfo->cfg.msi.msi_addr = addr;
3177 				dinfo->cfg.msi.msi_data = data;
3178 				pci_enable_msi(child, addr, data);
3179 			}
3180 			dinfo->cfg.msi.msi_handlers++;
3181 		} else {
3182 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3183 			    ("No MSI or MSI-X interrupts allocated"));
3184 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3185 			    ("MSI-X index too high"));
3186 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3187 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3188 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3189 			KASSERT(mv->mv_irq == rman_get_start(irq),
3190 			    ("IRQ mismatch"));
3191 			if (mv->mv_address == 0) {
3192 				KASSERT(mte->mte_handlers == 0,
3193 		    ("MSI-X table entry has handlers, but vector not mapped"));
3194 				error = PCIB_MAP_MSI(device_get_parent(dev),
3195 				    child, rman_get_start(irq), &addr, &data);
3196 				if (error)
3197 					goto bad;
3198 				mv->mv_address = addr;
3199 				mv->mv_data = data;
3200 			}
3201 			if (mte->mte_handlers == 0) {
3202 				pci_enable_msix(child, rid - 1, mv->mv_address,
3203 				    mv->mv_data);
3204 				pci_unmask_msix(child, rid - 1);
3205 			}
3206 			mte->mte_handlers++;
3207 		}
3208 
3209 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3210 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3211 	bad:
3212 		if (error) {
3213 			(void)bus_generic_teardown_intr(dev, child, irq,
3214 			    cookie);
3215 			return (error);
3216 		}
3217 	}
3218 #endif
3219 	*cookiep = cookie;
3220 	return (0);
3221 }
3222 
3223 int
3224 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3225     void *cookie)
3226 {
3227 #ifdef MSI
3228 	struct msix_table_entry *mte;
3229 	struct resource_list_entry *rle;
3230 	struct pci_devinfo *dinfo;
3231 	int rid;
3232 #endif
3233 	int error;
3234 
3235 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3236 		return (EINVAL);
3237 
3238 	/* If this isn't a direct child, just bail out */
3239 	if (device_get_parent(child) != dev)
3240 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3241 
3242 	pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3243 #ifdef MSI
3244 	rid = rman_get_rid(irq);
3245 	if (rid == 0) {
3246 		/* Mask INTx */
3247 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3248 	} else {
3249 		/*
3250 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3251 		 * decrement the appropriate handlers count and mask the
3252 		 * MSI-X message, or disable MSI messages if the count
3253 		 * drops to 0.
3254 		 */
3255 		dinfo = device_get_ivars(child);
3256 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3257 		if (rle->res != irq)
3258 			return (EINVAL);
3259 		if (dinfo->cfg.msi.msi_alloc > 0) {
3260 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3261 			    ("MSI-X index too high"));
3262 			if (dinfo->cfg.msi.msi_handlers == 0)
3263 				return (EINVAL);
3264 			dinfo->cfg.msi.msi_handlers--;
3265 			if (dinfo->cfg.msi.msi_handlers == 0)
3266 				pci_disable_msi(child);
3267 		} else {
3268 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3269 			    ("No MSI or MSI-X interrupts allocated"));
3270 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3271 			    ("MSI-X index too high"));
3272 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3273 			if (mte->mte_handlers == 0)
3274 				return (EINVAL);
3275 			mte->mte_handlers--;
3276 			if (mte->mte_handlers == 0)
3277 				pci_mask_msix(child, rid - 1);
3278 		}
3279 	}
3280 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3281 	if (rid > 0)
3282 		KASSERT(error == 0,
3283 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3284 #endif
3285 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3286 	return (error);
3287 }
3288 
3289 int
3290 pci_print_child(device_t dev, device_t child)
3291 {
3292 	struct pci_devinfo *dinfo;
3293 	struct resource_list *rl;
3294 	int retval = 0;
3295 
3296 	dinfo = device_get_ivars(child);
3297 	rl = &dinfo->resources;
3298 
3299 	retval += bus_print_child_header(dev, child);
3300 
3301 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3302 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3303 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3304 	if (device_get_flags(dev))
3305 		retval += kprintf(" flags %#x", device_get_flags(dev));
3306 
3307 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3308 	    pci_get_function(child));
3309 
3310 	retval += bus_print_child_footer(dev, child);
3311 
3312 	return (retval);
3313 }
3314 
3315 static struct
3316 {
3317 	int	class;
3318 	int	subclass;
3319 	char	*desc;
3320 } pci_nomatch_tab[] = {
3321 	{PCIC_OLD,		-1,			"old"},
3322 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3323 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3324 	{PCIC_STORAGE,		-1,			"mass storage"},
3325 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3326 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3327 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3328 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3329 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3330 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3331 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3332 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3333 	{PCIC_NETWORK,		-1,			"network"},
3334 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3335 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3336 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3337 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3338 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3339 	{PCIC_DISPLAY,		-1,			"display"},
3340 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3341 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3342 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3343 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3344 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3345 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3346 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3347 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3348 	{PCIC_MEMORY,		-1,			"memory"},
3349 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3350 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3351 	{PCIC_BRIDGE,		-1,			"bridge"},
3352 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3353 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3354 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3355 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3356 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3357 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3358 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3359 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3360 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3361 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3362 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3363 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3364 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3365 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3366 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3367 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3368 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3369 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3370 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3371 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3372 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3373 	{PCIC_INPUTDEV,		-1,			"input device"},
3374 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3375 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3376 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3377 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3378 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3379 	{PCIC_DOCKING,		-1,			"docking station"},
3380 	{PCIC_PROCESSOR,	-1,			"processor"},
3381 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3382 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3383 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3384 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3385 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3386 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3387 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3388 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3389 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3390 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3391 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3392 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3393 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3394 	{PCIC_SATCOM,		-1,			"satellite communication"},
3395 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3396 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3397 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3398 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3399 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3400 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3401 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3402 	{PCIC_DASP,		-1,			"dasp"},
3403 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3404 	{0, 0,		NULL}
3405 };
3406 
3407 void
3408 pci_probe_nomatch(device_t dev, device_t child)
3409 {
3410 	int	i;
3411 	char	*cp, *scp, *device;
3412 
3413 	/*
3414 	 * Look for a listing for this device in a loaded device database.
3415 	 */
3416 	if ((device = pci_describe_device(child)) != NULL) {
3417 		device_printf(dev, "<%s>", device);
3418 		kfree(device, M_DEVBUF);
3419 	} else {
3420 		/*
3421 		 * Scan the class/subclass descriptions for a general
3422 		 * description.
3423 		 */
3424 		cp = "unknown";
3425 		scp = NULL;
3426 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3427 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3428 				if (pci_nomatch_tab[i].subclass == -1) {
3429 					cp = pci_nomatch_tab[i].desc;
3430 				} else if (pci_nomatch_tab[i].subclass ==
3431 				    pci_get_subclass(child)) {
3432 					scp = pci_nomatch_tab[i].desc;
3433 				}
3434 			}
3435 		}
3436 		device_printf(dev, "<%s%s%s>",
3437 		    cp ? cp : "",
3438 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3439 		    scp ? scp : "");
3440 	}
3441 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3442 		pci_get_vendor(child), pci_get_device(child),
3443 		pci_get_slot(child), pci_get_function(child));
3444 	if (pci_get_intpin(child) > 0) {
3445 		int irq;
3446 
3447 		irq = pci_get_irq(child);
3448 		if (PCI_INTERRUPT_VALID(irq))
3449 			kprintf(" irq %d", irq);
3450 	}
3451 	kprintf("\n");
3452 
3453 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3454 }
3455 
3456 /*
3457  * Parse the PCI device database, if loaded, and return a pointer to a
3458  * description of the device.
3459  *
3460  * The database is flat text formatted as follows:
3461  *
3462  * Any line not in a valid format is ignored.
3463  * Lines are terminated with newline '\n' characters.
3464  *
3465  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3466  * the vendor name.
3467  *
3468  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3469  * - devices cannot be listed without a corresponding VENDOR line.
3470  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3471  * another TAB, then the device name.
3472  */
3473 
3474 /*
3475  * Assuming (ptr) points to the beginning of a line in the database,
3476  * return the vendor or device and description of the next entry.
3477  * The value of (vendor) or (device) inappropriate for the entry type
3478  * is set to -1.  Returns nonzero at the end of the database.
3479  *
3480  * Note that this is slightly unrobust in the face of corrupt data;
3481  * we attempt to safeguard against this by spamming the end of the
3482  * database with a newline when we initialise.
3483  */
3484 static int
3485 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3486 {
3487 	char	*cp = *ptr;
3488 	int	left;
3489 
3490 	*device = -1;
3491 	*vendor = -1;
3492 	**desc = '\0';
3493 	for (;;) {
3494 		left = pci_vendordata_size - (cp - pci_vendordata);
3495 		if (left <= 0) {
3496 			*ptr = cp;
3497 			return(1);
3498 		}
3499 
3500 		/* vendor entry? */
3501 		if (*cp != '\t' &&
3502 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3503 			break;
3504 		/* device entry? */
3505 		if (*cp == '\t' &&
3506 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3507 			break;
3508 
3509 		/* skip to next line */
3510 		while (*cp != '\n' && left > 0) {
3511 			cp++;
3512 			left--;
3513 		}
3514 		if (*cp == '\n') {
3515 			cp++;
3516 			left--;
3517 		}
3518 	}
3519 	/* skip to next line */
3520 	while (*cp != '\n' && left > 0) {
3521 		cp++;
3522 		left--;
3523 	}
3524 	if (*cp == '\n' && left > 0)
3525 		cp++;
3526 	*ptr = cp;
3527 	return(0);
3528 }
3529 
3530 static char *
3531 pci_describe_device(device_t dev)
3532 {
3533 	int	vendor, device;
3534 	char	*desc, *vp, *dp, *line;
3535 
3536 	desc = vp = dp = NULL;
3537 
3538 	/*
3539 	 * If we have no vendor data, we can't do anything.
3540 	 */
3541 	if (pci_vendordata == NULL)
3542 		goto out;
3543 
3544 	/*
3545 	 * Scan the vendor data looking for this device
3546 	 */
3547 	line = pci_vendordata;
3548 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3549 		goto out;
3550 	for (;;) {
3551 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3552 			goto out;
3553 		if (vendor == pci_get_vendor(dev))
3554 			break;
3555 	}
3556 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3557 		goto out;
3558 	for (;;) {
3559 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3560 			*dp = 0;
3561 			break;
3562 		}
3563 		if (vendor != -1) {
3564 			*dp = 0;
3565 			break;
3566 		}
3567 		if (device == pci_get_device(dev))
3568 			break;
3569 	}
3570 	if (dp[0] == '\0')
3571 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3572 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3573 	    NULL)
3574 		ksprintf(desc, "%s, %s", vp, dp);
3575  out:
3576 	if (vp != NULL)
3577 		kfree(vp, M_DEVBUF);
3578 	if (dp != NULL)
3579 		kfree(dp, M_DEVBUF);
3580 	return(desc);
3581 }
3582 
3583 int
3584 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3585 {
3586 	struct pci_devinfo *dinfo;
3587 	pcicfgregs *cfg;
3588 
3589 	dinfo = device_get_ivars(child);
3590 	cfg = &dinfo->cfg;
3591 
3592 	switch (which) {
3593 	case PCI_IVAR_ETHADDR:
3594 		/*
3595 		 * The generic accessor doesn't deal with failure, so
3596 		 * we set the return value, then return an error.
3597 		 */
3598 		*((uint8_t **) result) = NULL;
3599 		return (EINVAL);
3600 	case PCI_IVAR_SUBVENDOR:
3601 		*result = cfg->subvendor;
3602 		break;
3603 	case PCI_IVAR_SUBDEVICE:
3604 		*result = cfg->subdevice;
3605 		break;
3606 	case PCI_IVAR_VENDOR:
3607 		*result = cfg->vendor;
3608 		break;
3609 	case PCI_IVAR_DEVICE:
3610 		*result = cfg->device;
3611 		break;
3612 	case PCI_IVAR_DEVID:
3613 		*result = (cfg->device << 16) | cfg->vendor;
3614 		break;
3615 	case PCI_IVAR_CLASS:
3616 		*result = cfg->baseclass;
3617 		break;
3618 	case PCI_IVAR_SUBCLASS:
3619 		*result = cfg->subclass;
3620 		break;
3621 	case PCI_IVAR_PROGIF:
3622 		*result = cfg->progif;
3623 		break;
3624 	case PCI_IVAR_REVID:
3625 		*result = cfg->revid;
3626 		break;
3627 	case PCI_IVAR_INTPIN:
3628 		*result = cfg->intpin;
3629 		break;
3630 	case PCI_IVAR_IRQ:
3631 		*result = cfg->intline;
3632 		break;
3633 	case PCI_IVAR_DOMAIN:
3634 		*result = cfg->domain;
3635 		break;
3636 	case PCI_IVAR_BUS:
3637 		*result = cfg->bus;
3638 		break;
3639 	case PCI_IVAR_SLOT:
3640 		*result = cfg->slot;
3641 		break;
3642 	case PCI_IVAR_FUNCTION:
3643 		*result = cfg->func;
3644 		break;
3645 	case PCI_IVAR_CMDREG:
3646 		*result = cfg->cmdreg;
3647 		break;
3648 	case PCI_IVAR_CACHELNSZ:
3649 		*result = cfg->cachelnsz;
3650 		break;
3651 	case PCI_IVAR_MINGNT:
3652 		*result = cfg->mingnt;
3653 		break;
3654 	case PCI_IVAR_MAXLAT:
3655 		*result = cfg->maxlat;
3656 		break;
3657 	case PCI_IVAR_LATTIMER:
3658 		*result = cfg->lattimer;
3659 		break;
3660 	case PCI_IVAR_PCIXCAP_PTR:
3661 		*result = cfg->pcix.pcix_ptr;
3662 		break;
3663 	case PCI_IVAR_PCIECAP_PTR:
3664 		*result = cfg->expr.expr_ptr;
3665 		break;
3666 	case PCI_IVAR_VPDCAP_PTR:
3667 		*result = cfg->vpd.vpd_reg;
3668 		break;
3669 	default:
3670 		return (ENOENT);
3671 	}
3672 	return (0);
3673 }
3674 
3675 int
3676 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3677 {
3678 	struct pci_devinfo *dinfo;
3679 
3680 	dinfo = device_get_ivars(child);
3681 
3682 	switch (which) {
3683 	case PCI_IVAR_INTPIN:
3684 		dinfo->cfg.intpin = value;
3685 		return (0);
3686 	case PCI_IVAR_ETHADDR:
3687 	case PCI_IVAR_SUBVENDOR:
3688 	case PCI_IVAR_SUBDEVICE:
3689 	case PCI_IVAR_VENDOR:
3690 	case PCI_IVAR_DEVICE:
3691 	case PCI_IVAR_DEVID:
3692 	case PCI_IVAR_CLASS:
3693 	case PCI_IVAR_SUBCLASS:
3694 	case PCI_IVAR_PROGIF:
3695 	case PCI_IVAR_REVID:
3696 	case PCI_IVAR_IRQ:
3697 	case PCI_IVAR_DOMAIN:
3698 	case PCI_IVAR_BUS:
3699 	case PCI_IVAR_SLOT:
3700 	case PCI_IVAR_FUNCTION:
3701 		return (EINVAL);	/* disallow for now */
3702 
3703 	default:
3704 		return (ENOENT);
3705 	}
3706 }
3707 #ifdef notyet
3708 #include "opt_ddb.h"
3709 #ifdef DDB
3710 #include <ddb/ddb.h>
3711 #include <sys/cons.h>
3712 
3713 /*
3714  * List resources based on pci map registers, used for within ddb
3715  */
3716 
3717 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3718 {
3719 	struct pci_devinfo *dinfo;
3720 	struct devlist *devlist_head;
3721 	struct pci_conf *p;
3722 	const char *name;
3723 	int i, error, none_count;
3724 
3725 	none_count = 0;
3726 	/* get the head of the device queue */
3727 	devlist_head = &pci_devq;
3728 
3729 	/*
3730 	 * Go through the list of devices and print out devices
3731 	 */
3732 	for (error = 0, i = 0,
3733 	     dinfo = STAILQ_FIRST(devlist_head);
3734 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3735 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3736 
3737 		/* Populate pd_name and pd_unit */
3738 		name = NULL;
3739 		if (dinfo->cfg.dev)
3740 			name = device_get_name(dinfo->cfg.dev);
3741 
3742 		p = &dinfo->conf;
3743 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3744 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3745 			(name && *name) ? name : "none",
3746 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3747 			none_count++,
3748 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3749 			p->pc_sel.pc_func, (p->pc_class << 16) |
3750 			(p->pc_subclass << 8) | p->pc_progif,
3751 			(p->pc_subdevice << 16) | p->pc_subvendor,
3752 			(p->pc_device << 16) | p->pc_vendor,
3753 			p->pc_revid, p->pc_hdr);
3754 	}
3755 }
3756 #endif /* DDB */
3757 #endif
3758 
3759 static struct resource *
3760 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3761     u_long start, u_long end, u_long count, u_int flags)
3762 {
3763 	struct pci_devinfo *dinfo = device_get_ivars(child);
3764 	struct resource_list *rl = &dinfo->resources;
3765 	struct resource_list_entry *rle;
3766 	struct resource *res;
3767 	pci_addr_t map, testval;
3768 	int mapsize;
3769 
3770 	/*
3771 	 * Weed out the bogons, and figure out how large the BAR/map
3772 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3773 	 * Note: atapci in legacy mode are special and handled elsewhere
3774 	 * in the code.  If you have a atapci device in legacy mode and
3775 	 * it fails here, that other code is broken.
3776 	 */
3777 	res = NULL;
3778 	map = pci_read_config(child, *rid, 4);
3779 	pci_write_config(child, *rid, 0xffffffff, 4);
3780 	testval = pci_read_config(child, *rid, 4);
3781 	if (pci_maprange(testval) == 64)
3782 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3783 	if (pci_mapbase(testval) == 0)
3784 		goto out;
3785 
3786 	/*
3787 	 * Restore the original value of the BAR.  We may have reprogrammed
3788 	 * the BAR of the low-level console device and when booting verbose,
3789 	 * we need the console device addressable.
3790 	 */
3791 	pci_write_config(child, *rid, map, 4);
3792 
3793 	if (PCI_BAR_MEM(testval)) {
3794 		if (type != SYS_RES_MEMORY) {
3795 			if (bootverbose)
3796 				device_printf(dev,
3797 				    "child %s requested type %d for rid %#x,"
3798 				    " but the BAR says it is an memio\n",
3799 				    device_get_nameunit(child), type, *rid);
3800 			goto out;
3801 		}
3802 	} else {
3803 		if (type != SYS_RES_IOPORT) {
3804 			if (bootverbose)
3805 				device_printf(dev,
3806 				    "child %s requested type %d for rid %#x,"
3807 				    " but the BAR says it is an ioport\n",
3808 				    device_get_nameunit(child), type, *rid);
3809 			goto out;
3810 		}
3811 	}
3812 	/*
3813 	 * For real BARs, we need to override the size that
3814 	 * the driver requests, because that's what the BAR
3815 	 * actually uses and we would otherwise have a
3816 	 * situation where we might allocate the excess to
3817 	 * another driver, which won't work.
3818 	 */
3819 	mapsize = pci_mapsize(testval);
3820 	count = 1UL << mapsize;
3821 	if (RF_ALIGNMENT(flags) < mapsize)
3822 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3823 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3824 		flags |= RF_PREFETCHABLE;
3825 
3826 	/*
3827 	 * Allocate enough resource, and then write back the
3828 	 * appropriate bar for that resource.
3829 	 */
3830 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3831 	    start, end, count, flags, -1);
3832 	if (res == NULL) {
3833 		device_printf(child,
3834 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3835 		    count, *rid, type, start, end);
3836 		goto out;
3837 	}
3838 	resource_list_add(rl, type, *rid, start, end, count, -1);
3839 	rle = resource_list_find(rl, type, *rid);
3840 	if (rle == NULL)
3841 		panic("pci_alloc_map: unexpectedly can't find resource.");
3842 	rle->res = res;
3843 	rle->start = rman_get_start(res);
3844 	rle->end = rman_get_end(res);
3845 	rle->count = count;
3846 	if (bootverbose)
3847 		device_printf(child,
3848 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3849 		    count, *rid, type, rman_get_start(res));
3850 	map = rman_get_start(res);
3851 out:;
3852 	pci_write_config(child, *rid, map, 4);
3853 	if (pci_maprange(testval) == 64)
3854 		pci_write_config(child, *rid + 4, map >> 32, 4);
3855 	return (res);
3856 }
3857 
3858 
3859 struct resource *
3860 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3861     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3862 {
3863 	struct pci_devinfo *dinfo = device_get_ivars(child);
3864 	struct resource_list *rl = &dinfo->resources;
3865 	struct resource_list_entry *rle;
3866 	pcicfgregs *cfg = &dinfo->cfg;
3867 
3868 	/*
3869 	 * Perform lazy resource allocation
3870 	 */
3871 	if (device_get_parent(child) == dev) {
3872 		switch (type) {
3873 		case SYS_RES_IRQ:
3874 			/*
3875 			 * Can't alloc legacy interrupt once MSI messages
3876 			 * have been allocated.
3877 			 */
3878 #ifdef MSI
3879 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3880 			    cfg->msix.msix_alloc > 0))
3881 				return (NULL);
3882 #endif
3883 			/*
3884 			 * If the child device doesn't have an
3885 			 * interrupt routed and is deserving of an
3886 			 * interrupt, try to assign it one.
3887 			 */
3888 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3889 			    (cfg->intpin != 0))
3890 				pci_assign_interrupt(dev, child, 0);
3891 			break;
3892 		case SYS_RES_IOPORT:
3893 		case SYS_RES_MEMORY:
3894 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3895 				/*
3896 				 * Enable the I/O mode.  We should
3897 				 * also be assigning resources too
3898 				 * when none are present.  The
3899 				 * resource_list_alloc kind of sorta does
3900 				 * this...
3901 				 */
3902 				if (PCI_ENABLE_IO(dev, child, type))
3903 					return (NULL);
3904 			}
3905 			rle = resource_list_find(rl, type, *rid);
3906 			if (rle == NULL)
3907 				return (pci_alloc_map(dev, child, type, rid,
3908 				    start, end, count, flags));
3909 			break;
3910 		}
3911 		/*
3912 		 * If we've already allocated the resource, then
3913 		 * return it now.  But first we may need to activate
3914 		 * it, since we don't allocate the resource as active
3915 		 * above.  Normally this would be done down in the
3916 		 * nexus, but since we short-circuit that path we have
3917 		 * to do its job here.  Not sure if we should kfree the
3918 		 * resource if it fails to activate.
3919 		 */
3920 		rle = resource_list_find(rl, type, *rid);
3921 		if (rle != NULL && rle->res != NULL) {
3922 			if (bootverbose)
3923 				device_printf(child,
3924 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3925 				    rman_get_size(rle->res), *rid, type,
3926 				    rman_get_start(rle->res));
3927 			if ((flags & RF_ACTIVE) &&
3928 			    bus_generic_activate_resource(dev, child, type,
3929 			    *rid, rle->res) != 0)
3930 				return (NULL);
3931 			return (rle->res);
3932 		}
3933 	}
3934 	return (resource_list_alloc(rl, dev, child, type, rid,
3935 	    start, end, count, flags, cpuid));
3936 }
3937 
3938 void
3939 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3940 {
3941 	struct pci_devinfo *dinfo;
3942 	struct resource_list *rl;
3943 	struct resource_list_entry *rle;
3944 
3945 	if (device_get_parent(child) != dev)
3946 		return;
3947 
3948 	dinfo = device_get_ivars(child);
3949 	rl = &dinfo->resources;
3950 	rle = resource_list_find(rl, type, rid);
3951 	if (rle) {
3952 		if (rle->res) {
3953 			if (rman_get_device(rle->res) != dev ||
3954 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3955 				device_printf(dev, "delete_resource: "
3956 				    "Resource still owned by child, oops. "
3957 				    "(type=%d, rid=%d, addr=%lx)\n",
3958 				    rle->type, rle->rid,
3959 				    rman_get_start(rle->res));
3960 				return;
3961 			}
3962 			bus_release_resource(dev, type, rid, rle->res);
3963 		}
3964 		resource_list_delete(rl, type, rid);
3965 	}
3966 	/*
3967 	 * Why do we turn off the PCI configuration BAR when we delete a
3968 	 * resource? -- imp
3969 	 */
3970 	pci_write_config(child, rid, 0, 4);
3971 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3972 }
3973 
3974 struct resource_list *
3975 pci_get_resource_list (device_t dev, device_t child)
3976 {
3977 	struct pci_devinfo *dinfo = device_get_ivars(child);
3978 
3979 	if (dinfo == NULL)
3980 		return (NULL);
3981 
3982 	return (&dinfo->resources);
3983 }
3984 
3985 uint32_t
3986 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3987 {
3988 	struct pci_devinfo *dinfo = device_get_ivars(child);
3989 	pcicfgregs *cfg = &dinfo->cfg;
3990 
3991 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3992 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3993 }
3994 
3995 void
3996 pci_write_config_method(device_t dev, device_t child, int reg,
3997     uint32_t val, int width)
3998 {
3999 	struct pci_devinfo *dinfo = device_get_ivars(child);
4000 	pcicfgregs *cfg = &dinfo->cfg;
4001 
4002 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4003 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4004 }
4005 
4006 int
4007 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4008     size_t buflen)
4009 {
4010 
4011 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4012 	    pci_get_function(child));
4013 	return (0);
4014 }
4015 
4016 int
4017 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4018     size_t buflen)
4019 {
4020 	struct pci_devinfo *dinfo;
4021 	pcicfgregs *cfg;
4022 
4023 	dinfo = device_get_ivars(child);
4024 	cfg = &dinfo->cfg;
4025 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4026 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4027 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4028 	    cfg->progif);
4029 	return (0);
4030 }
4031 
4032 int
4033 pci_assign_interrupt_method(device_t dev, device_t child)
4034 {
4035 	struct pci_devinfo *dinfo = device_get_ivars(child);
4036 	pcicfgregs *cfg = &dinfo->cfg;
4037 
4038 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4039 	    cfg->intpin));
4040 }
4041 
4042 static int
4043 pci_modevent(module_t mod, int what, void *arg)
4044 {
4045 	static struct cdev *pci_cdev;
4046 
4047 	switch (what) {
4048 	case MOD_LOAD:
4049 		STAILQ_INIT(&pci_devq);
4050 		pci_generation = 0;
4051 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4052 				    "pci");
4053 		pci_load_vendor_data();
4054 		break;
4055 
4056 	case MOD_UNLOAD:
4057 		destroy_dev(pci_cdev);
4058 		break;
4059 	}
4060 
4061 	return (0);
4062 }
4063 
4064 void
4065 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4066 {
4067 	int i;
4068 
4069 	/*
4070 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4071 	 * which we know need special treatment.  Type 2 devices are
4072 	 * cardbus bridges which also require special treatment.
4073 	 * Other types are unknown, and we err on the side of safety
4074 	 * by ignoring them.
4075 	 */
4076 	if (dinfo->cfg.hdrtype != 0)
4077 		return;
4078 
4079 	/*
4080 	 * Restore the device to full power mode.  We must do this
4081 	 * before we restore the registers because moving from D3 to
4082 	 * D0 will cause the chip's BARs and some other registers to
4083 	 * be reset to some unknown power on reset values.  Cut down
4084 	 * the noise on boot by doing nothing if we are already in
4085 	 * state D0.
4086 	 */
4087 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4088 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4089 	}
4090 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4091 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4092 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4093 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4094 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4095 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4096 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4097 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4098 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4099 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4100 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4101 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4102 
4103 	/* Restore MSI and MSI-X configurations if they are present. */
4104 	if (dinfo->cfg.msi.msi_location != 0)
4105 		pci_resume_msi(dev);
4106 	if (dinfo->cfg.msix.msix_location != 0)
4107 		pci_resume_msix(dev);
4108 }
4109 
4110 void
4111 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4112 {
4113 	int i;
4114 	uint32_t cls;
4115 	int ps;
4116 
4117 	/*
4118 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4119 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4120 	 * which also require special treatment.  Other types are unknown, and
4121 	 * we err on the side of safety by ignoring them.  Powering down
4122 	 * bridges should not be undertaken lightly.
4123 	 */
4124 	if (dinfo->cfg.hdrtype != 0)
4125 		return;
4126 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4127 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4128 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4129 
4130 	/*
4131 	 * Some drivers apparently write to these registers w/o updating our
4132 	 * cached copy.  No harm happens if we update the copy, so do so here
4133 	 * so we can restore them.  The COMMAND register is modified by the
4134 	 * bus w/o updating the cache.  This should represent the normally
4135 	 * writable portion of the 'defined' part of type 0 headers.  In
4136 	 * theory we also need to save/restore the PCI capability structures
4137 	 * we know about, but apart from power we don't know any that are
4138 	 * writable.
4139 	 */
4140 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4141 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4142 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4143 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4144 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4145 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4146 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4147 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4148 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4149 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4150 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4151 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4152 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4153 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4154 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4155 
4156 	/*
4157 	 * don't set the state for display devices, base peripherals and
4158 	 * memory devices since bad things happen when they are powered down.
4159 	 * We should (a) have drivers that can easily detach and (b) use
4160 	 * generic drivers for these devices so that some device actually
4161 	 * attaches.  We need to make sure that when we implement (a) we don't
4162 	 * power the device down on a reattach.
4163 	 */
4164 	cls = pci_get_class(dev);
4165 	if (!setstate)
4166 		return;
4167 	switch (pci_do_power_nodriver)
4168 	{
4169 		case 0:		/* NO powerdown at all */
4170 			return;
4171 		case 1:		/* Conservative about what to power down */
4172 			if (cls == PCIC_STORAGE)
4173 				return;
4174 			/*FALLTHROUGH*/
4175 		case 2:		/* Agressive about what to power down */
4176 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4177 			    cls == PCIC_BASEPERIPH)
4178 				return;
4179 			/*FALLTHROUGH*/
4180 		case 3:		/* Power down everything */
4181 			break;
4182 	}
4183 	/*
4184 	 * PCI spec says we can only go into D3 state from D0 state.
4185 	 * Transition from D[12] into D0 before going to D3 state.
4186 	 */
4187 	ps = pci_get_powerstate(dev);
4188 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4189 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4190 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4191 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4192 }
4193 
4194 #ifdef COMPAT_OLDPCI
4195 
4196 /*
4197  * Locate the parent of a PCI device by scanning the PCI devlist
4198  * and return the entry for the parent.
4199  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4200  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4201  */
4202 pcicfgregs *
4203 pci_devlist_get_parent(pcicfgregs *cfg)
4204 {
4205 	struct devlist *devlist_head;
4206 	struct pci_devinfo *dinfo;
4207 	pcicfgregs *bridge_cfg;
4208 	int i;
4209 
4210 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4211 
4212 	/* If the device is on PCI bus 0, look for the host */
4213 	if (cfg->bus == 0) {
4214 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4215 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4216 			bridge_cfg = &dinfo->cfg;
4217 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4218 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4219 		    		&& bridge_cfg->bus == cfg->bus) {
4220 				return bridge_cfg;
4221 			}
4222 		}
4223 	}
4224 
4225 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4226 	if (cfg->bus > 0) {
4227 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4228 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4229 			bridge_cfg = &dinfo->cfg;
4230 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4231 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4232 				&& bridge_cfg->secondarybus == cfg->bus) {
4233 				return bridge_cfg;
4234 			}
4235 		}
4236 	}
4237 
4238 	return NULL;
4239 }
4240 
4241 #endif	/* COMPAT_OLDPCI */
4242