xref: /dragonfly/sys/bus/pci/pci.c (revision 28c26f7e)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_bus.h"
32 #include "opt_acpi.h"
33 #include "opt_compat_oldpci.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 
47 #include <vm/vm.h>
48 #include <vm/pmap.h>
49 #include <vm/vm_extern.h>
50 
51 #include <sys/bus.h>
52 #include <sys/rman.h>
53 #include <sys/device.h>
54 
55 #include <sys/pciio.h>
56 #include <bus/pci/pcireg.h>
57 #include <bus/pci/pcivar.h>
58 #include <bus/pci/pci_private.h>
59 
60 #include "pcib_if.h"
61 #include "pci_if.h"
62 
63 #ifdef __HAVE_ACPI
64 #include <contrib/dev/acpica/acpi.h>
65 #include "acpi_if.h"
66 #else
67 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
68 #endif
69 
70 extern struct dev_ops pcic_ops;	/* XXX */
71 
72 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
73 
74 static uint32_t		pci_mapbase(unsigned mapreg);
75 static const char	*pci_maptype(unsigned mapreg);
76 static int		pci_mapsize(unsigned testval);
77 static int		pci_maprange(unsigned mapreg);
78 static void		pci_fixancient(pcicfgregs *cfg);
79 
80 static int		pci_porten(device_t pcib, int b, int s, int f);
81 static int		pci_memen(device_t pcib, int b, int s, int f);
82 static void		pci_assign_interrupt(device_t bus, device_t dev,
83 			    int force_route);
84 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
85 			    int b, int s, int f, int reg,
86 			    struct resource_list *rl, int force, int prefetch);
87 static int		pci_probe(device_t dev);
88 static int		pci_attach(device_t dev);
89 static void		pci_child_detached(device_t, device_t);
90 static void		pci_load_vendor_data(void);
91 static int		pci_describe_parse_line(char **ptr, int *vendor,
92 			    int *device, char **desc);
93 static char		*pci_describe_device(device_t dev);
94 static int		pci_modevent(module_t mod, int what, void *arg);
95 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
96 			    pcicfgregs *cfg);
97 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
98 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
99 			    int reg, uint32_t *data);
100 #if 0
101 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t data);
103 #endif
104 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
105 static void		pci_disable_msi(device_t dev);
106 static void		pci_enable_msi(device_t dev, uint64_t address,
107 			    uint16_t data);
108 static void		pci_enable_msix(device_t dev, u_int index,
109 			    uint64_t address, uint32_t data);
110 static void		pci_mask_msix(device_t dev, u_int index);
111 static void		pci_unmask_msix(device_t dev, u_int index);
112 static int		pci_msi_blacklisted(void);
113 static void		pci_resume_msi(device_t dev);
114 static void		pci_resume_msix(device_t dev);
115 static int		pcie_slotimpl(const pcicfgregs *);
116 static void		pci_print_verbose_expr(const pcicfgregs *);
117 
118 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
119 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
120 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
121 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
122 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_subvendor(device_t, int, int,
124 			    pcicfgregs *);
125 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
127 
128 static device_method_t pci_methods[] = {
129 	/* Device interface */
130 	DEVMETHOD(device_probe,		pci_probe),
131 	DEVMETHOD(device_attach,	pci_attach),
132 	DEVMETHOD(device_detach,	bus_generic_detach),
133 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
134 	DEVMETHOD(device_suspend,	pci_suspend),
135 	DEVMETHOD(device_resume,	pci_resume),
136 
137 	/* Bus interface */
138 	DEVMETHOD(bus_print_child,	pci_print_child),
139 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
140 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
141 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
142 	DEVMETHOD(bus_driver_added,	pci_driver_added),
143 	DEVMETHOD(bus_child_detached,	pci_child_detached),
144 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
145 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
146 
147 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
148 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
149 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
150 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
151 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
152 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
153 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
154 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
155 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
156 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
157 
158 	/* PCI interface */
159 	DEVMETHOD(pci_read_config,	pci_read_config_method),
160 	DEVMETHOD(pci_write_config,	pci_write_config_method),
161 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
162 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
163 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
164 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
165 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
166 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
167 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
168 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
169 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
170 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
171 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
172 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
173 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
174 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
175 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
176 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
177 
178 	{ 0, 0 }
179 };
180 
181 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
182 
183 static devclass_t pci_devclass;
184 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, 0);
185 MODULE_VERSION(pci, 1);
186 
187 static char	*pci_vendordata;
188 static size_t	pci_vendordata_size;
189 
190 
191 static const struct pci_read_cap {
192 	int		cap;
193 	pci_read_cap_t	read_cap;
194 } pci_read_caps[] = {
195 	{ PCIY_PMG,		pci_read_cap_pmgt },
196 	{ PCIY_HT,		pci_read_cap_ht },
197 	{ PCIY_MSI,		pci_read_cap_msi },
198 	{ PCIY_MSIX,		pci_read_cap_msix },
199 	{ PCIY_VPD,		pci_read_cap_vpd },
200 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
201 	{ PCIY_PCIX,		pci_read_cap_pcix },
202 	{ PCIY_EXPRESS,		pci_read_cap_express },
203 	{ 0, NULL } /* required last entry */
204 };
205 
206 struct pci_quirk {
207 	uint32_t devid;	/* Vendor/device of the card */
208 	int	type;
209 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
210 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
211 	int	arg1;
212 	int	arg2;
213 };
214 
215 struct pci_quirk pci_quirks[] = {
216 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
217 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
218 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
219 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
220 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221 
222 	/*
223 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
224 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
225 	 */
226 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
227 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
228 
229 	/*
230 	 * MSI doesn't work on earlier Intel chipsets including
231 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
232 	 */
233 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
236 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 
241 	/*
242 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
243 	 * bridge.
244 	 */
245 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 
247 	{ 0 }
248 };
249 
250 /* map register information */
251 #define	PCI_MAPMEM	0x01	/* memory map */
252 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
253 #define	PCI_MAPPORT	0x04	/* port map */
254 
255 struct devlist pci_devq;
256 uint32_t pci_generation;
257 uint32_t pci_numdevs = 0;
258 static int pcie_chipset, pcix_chipset;
259 
260 /* sysctl vars */
261 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
262 
263 static int pci_enable_io_modes = 1;
264 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
265 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
266     &pci_enable_io_modes, 1,
267     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
268 enable these bits correctly.  We'd like to do this all the time, but there\n\
269 are some peripherals that this causes problems with.");
270 
271 static int pci_do_power_nodriver = 0;
272 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
273 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
274     &pci_do_power_nodriver, 0,
275   "Place a function into D3 state when no driver attaches to it.  0 means\n\
276 disable.  1 means conservatively place devices into D3 state.  2 means\n\
277 aggressively place devices into D3 state.  3 means put absolutely everything\n\
278 in D3 state.");
279 
280 static int pci_do_power_resume = 1;
281 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
282 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
283     &pci_do_power_resume, 1,
284   "Transition from D3 -> D0 on resume.");
285 
286 static int pci_do_msi = 1;
287 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
288 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
289     "Enable support for MSI interrupts");
290 
291 static int pci_do_msix = 1;
292 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
293 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
294     "Enable support for MSI-X interrupts");
295 
296 static int pci_honor_msi_blacklist = 1;
297 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
298 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
299     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
300 
301 /* Find a device_t by bus/slot/function in domain 0 */
302 
303 device_t
304 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
305 {
306 
307 	return (pci_find_dbsf(0, bus, slot, func));
308 }
309 
310 /* Find a device_t by domain/bus/slot/function */
311 
312 device_t
313 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 	struct pci_devinfo *dinfo;
316 
317 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
318 		if ((dinfo->cfg.domain == domain) &&
319 		    (dinfo->cfg.bus == bus) &&
320 		    (dinfo->cfg.slot == slot) &&
321 		    (dinfo->cfg.func == func)) {
322 			return (dinfo->cfg.dev);
323 		}
324 	}
325 
326 	return (NULL);
327 }
328 
329 /* Find a device_t by vendor/device ID */
330 
331 device_t
332 pci_find_device(uint16_t vendor, uint16_t device)
333 {
334 	struct pci_devinfo *dinfo;
335 
336 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
337 		if ((dinfo->cfg.vendor == vendor) &&
338 		    (dinfo->cfg.device == device)) {
339 			return (dinfo->cfg.dev);
340 		}
341 	}
342 
343 	return (NULL);
344 }
345 
346 /* return base address of memory or port map */
347 
348 static uint32_t
349 pci_mapbase(uint32_t mapreg)
350 {
351 
352 	if (PCI_BAR_MEM(mapreg))
353 		return (mapreg & PCIM_BAR_MEM_BASE);
354 	else
355 		return (mapreg & PCIM_BAR_IO_BASE);
356 }
357 
358 /* return map type of memory or port map */
359 
360 static const char *
361 pci_maptype(unsigned mapreg)
362 {
363 
364 	if (PCI_BAR_IO(mapreg))
365 		return ("I/O Port");
366 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
367 		return ("Prefetchable Memory");
368 	return ("Memory");
369 }
370 
371 /* return log2 of map size decoded for memory or port map */
372 
373 static int
374 pci_mapsize(uint32_t testval)
375 {
376 	int ln2size;
377 
378 	testval = pci_mapbase(testval);
379 	ln2size = 0;
380 	if (testval != 0) {
381 		while ((testval & 1) == 0)
382 		{
383 			ln2size++;
384 			testval >>= 1;
385 		}
386 	}
387 	return (ln2size);
388 }
389 
390 /* return log2 of address range supported by map register */
391 
392 static int
393 pci_maprange(unsigned mapreg)
394 {
395 	int ln2range = 0;
396 
397 	if (PCI_BAR_IO(mapreg))
398 		ln2range = 32;
399 	else
400 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
401 		case PCIM_BAR_MEM_32:
402 			ln2range = 32;
403 			break;
404 		case PCIM_BAR_MEM_1MB:
405 			ln2range = 20;
406 			break;
407 		case PCIM_BAR_MEM_64:
408 			ln2range = 64;
409 			break;
410 		}
411 	return (ln2range);
412 }
413 
414 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
415 
416 static void
417 pci_fixancient(pcicfgregs *cfg)
418 {
419 	if (cfg->hdrtype != 0)
420 		return;
421 
422 	/* PCI to PCI bridges use header type 1 */
423 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
424 		cfg->hdrtype = 1;
425 }
426 
427 /* extract header type specific config data */
428 
429 static void
430 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
431 {
432 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
433 	switch (cfg->hdrtype) {
434 	case 0:
435 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
436 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
437 		cfg->nummaps	    = PCI_MAXMAPS_0;
438 		break;
439 	case 1:
440 		cfg->nummaps	    = PCI_MAXMAPS_1;
441 #ifdef COMPAT_OLDPCI
442 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
443 #endif
444 		break;
445 	case 2:
446 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
447 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
448 		cfg->nummaps	    = PCI_MAXMAPS_2;
449 #ifdef COMPAT_OLDPCI
450 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
451 #endif
452 		break;
453 	}
454 #undef REG
455 }
456 
457 /* read configuration header into pcicfgregs structure */
458 struct pci_devinfo *
459 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
460 {
461 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
462 	pcicfgregs *cfg = NULL;
463 	struct pci_devinfo *devlist_entry;
464 	struct devlist *devlist_head;
465 
466 	devlist_head = &pci_devq;
467 
468 	devlist_entry = NULL;
469 
470 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
471 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
472 
473 		cfg = &devlist_entry->cfg;
474 
475 		cfg->domain		= d;
476 		cfg->bus		= b;
477 		cfg->slot		= s;
478 		cfg->func		= f;
479 		cfg->vendor		= REG(PCIR_VENDOR, 2);
480 		cfg->device		= REG(PCIR_DEVICE, 2);
481 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
482 		cfg->statreg		= REG(PCIR_STATUS, 2);
483 		cfg->baseclass		= REG(PCIR_CLASS, 1);
484 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
485 		cfg->progif		= REG(PCIR_PROGIF, 1);
486 		cfg->revid		= REG(PCIR_REVID, 1);
487 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
488 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
489 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
490 		cfg->intpin		= REG(PCIR_INTPIN, 1);
491 		cfg->intline		= REG(PCIR_INTLINE, 1);
492 
493 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
494 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
495 
496 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
497 		cfg->hdrtype		&= ~PCIM_MFDEV;
498 
499 		pci_fixancient(cfg);
500 		pci_hdrtypedata(pcib, b, s, f, cfg);
501 
502 		pci_read_capabilities(pcib, cfg);
503 
504 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
505 
506 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
507 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
508 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
509 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
510 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
511 
512 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
513 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
514 		devlist_entry->conf.pc_vendor = cfg->vendor;
515 		devlist_entry->conf.pc_device = cfg->device;
516 
517 		devlist_entry->conf.pc_class = cfg->baseclass;
518 		devlist_entry->conf.pc_subclass = cfg->subclass;
519 		devlist_entry->conf.pc_progif = cfg->progif;
520 		devlist_entry->conf.pc_revid = cfg->revid;
521 
522 		pci_numdevs++;
523 		pci_generation++;
524 	}
525 	return (devlist_entry);
526 #undef REG
527 }
528 
529 static int
530 pci_fixup_nextptr(int *nextptr0)
531 {
532 	int nextptr = *nextptr0;
533 
534 	/* "Next pointer" is only one byte */
535 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d\n", nextptr));
536 
537 	if (nextptr & 0x3) {
538 		/*
539 		 * PCI local bus spec 3.0:
540 		 *
541 		 * "... The bottom two bits of all pointers are reserved
542 		 *  and must be implemented as 00b although software must
543 		 *  mask them to allow for future uses of these bits ..."
544 		 */
545 		if (bootverbose) {
546 			kprintf("Illegal PCI extended capability "
547 				"offset, fixup 0x%02x -> 0x%02x\n",
548 				nextptr, nextptr & ~0x3);
549 		}
550 		nextptr &= ~0x3;
551 	}
552 	*nextptr0 = nextptr;
553 
554 	if (nextptr < 0x40) {
555 		if (nextptr != 0) {
556 			kprintf("Illegal PCI extended capability "
557 				"offset 0x%02x", nextptr);
558 		}
559 		return 0;
560 	}
561 	return 1;
562 }
563 
564 static void
565 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
566 {
567 #define REG(n, w)	\
568 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
569 
570 	struct pcicfg_pp *pp = &cfg->pp;
571 
572 	if (pp->pp_cap)
573 		return;
574 
575 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
576 	pp->pp_status = ptr + PCIR_POWER_STATUS;
577 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
578 
579 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
580 		/*
581 		 * XXX
582 		 * We should write to data_select and read back from
583 		 * data_scale to determine whether data register is
584 		 * implemented.
585 		 */
586 #ifdef foo
587 		pp->pp_data = ptr + PCIR_POWER_DATA;
588 #else
589 		pp->pp_data = 0;
590 #endif
591 	}
592 
593 #undef REG
594 }
595 
596 static void
597 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
598 {
599 #ifdef notyet
600 #if defined(__i386__) || defined(__x86_64__)
601 
602 #define REG(n, w)	\
603 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
604 
605 	struct pcicfg_ht *ht = &cfg->ht;
606 	uint64_t addr;
607 	uint32_t val;
608 
609 	/* Determine HT-specific capability type. */
610 	val = REG(ptr + PCIR_HT_COMMAND, 2);
611 
612 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
613 		return;
614 
615 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
616 		/* Sanity check the mapping window. */
617 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
618 		addr <<= 32;
619 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
620 		if (addr != MSI_INTEL_ADDR_BASE) {
621 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
622 				"has non-default MSI window 0x%llx\n",
623 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
624 				(long long)addr);
625 		}
626 	} else {
627 		addr = MSI_INTEL_ADDR_BASE;
628 	}
629 
630 	ht->ht_msimap = ptr;
631 	ht->ht_msictrl = val;
632 	ht->ht_msiaddr = addr;
633 
634 #undef REG
635 
636 #endif	/* __i386__ || __x86_64__ */
637 #endif	/* notyet */
638 }
639 
640 static void
641 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
642 {
643 #define REG(n, w)	\
644 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
645 
646 	struct pcicfg_msi *msi = &cfg->msi;
647 
648 	msi->msi_location = ptr;
649 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
650 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
651 
652 #undef REG
653 }
654 
655 static void
656 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
657 {
658 #define REG(n, w)	\
659 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
660 
661 	struct pcicfg_msix *msix = &cfg->msix;
662 	uint32_t val;
663 
664 	msix->msix_location = ptr;
665 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
666 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
667 
668 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
669 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
670 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
671 
672 	val = REG(ptr + PCIR_MSIX_PBA, 4);
673 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
674 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
675 
676 #undef REG
677 }
678 
679 static void
680 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
681 {
682 	cfg->vpd.vpd_reg = ptr;
683 }
684 
685 static void
686 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
687 {
688 #define REG(n, w)	\
689 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
690 
691 	/* Should always be true. */
692 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
693 		uint32_t val;
694 
695 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
696 		cfg->subvendor = val & 0xffff;
697 		cfg->subdevice = val >> 16;
698 	}
699 
700 #undef REG
701 }
702 
703 static void
704 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
705 {
706 	/*
707 	 * Assume we have a PCI-X chipset if we have
708 	 * at least one PCI-PCI bridge with a PCI-X
709 	 * capability.  Note that some systems with
710 	 * PCI-express or HT chipsets might match on
711 	 * this check as well.
712 	 */
713 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
714 		pcix_chipset = 1;
715 
716 	cfg->pcix.pcix_ptr = ptr;
717 }
718 
719 static int
720 pcie_slotimpl(const pcicfgregs *cfg)
721 {
722 	const struct pcicfg_expr *expr = &cfg->expr;
723 	uint16_t port_type;
724 
725 	/*
726 	 * Only version 1 can be parsed currently
727 	 */
728 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
729 		return 0;
730 
731 	/*
732 	 * - Slot implemented bit is meaningful iff current port is
733 	 *   root port or down stream port.
734 	 * - Testing for root port or down stream port is meanningful
735 	 *   iff PCI configure has type 1 header.
736 	 */
737 
738 	if (cfg->hdrtype != 1)
739 		return 0;
740 
741 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
742 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
743 		return 0;
744 
745 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
746 		return 0;
747 
748 	return 1;
749 }
750 
751 static void
752 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
753 {
754 #define REG(n, w)	\
755 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
756 
757 	struct pcicfg_expr *expr = &cfg->expr;
758 
759 	/*
760 	 * Assume we have a PCI-express chipset if we have
761 	 * at least one PCI-express device.
762 	 */
763 	pcie_chipset = 1;
764 
765 	expr->expr_ptr = ptr;
766 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
767 
768 	/*
769 	 * Only version 1 can be parsed currently
770 	 */
771 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
772 		return;
773 
774 	/*
775 	 * Read slot capabilities.  Slot capabilities exists iff
776 	 * current port's slot is implemented
777 	 */
778 	if (pcie_slotimpl(cfg))
779 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
780 
781 #undef REG
782 }
783 
784 static void
785 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
786 {
787 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
788 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
789 
790 	uint32_t val;
791 	int nextptr, ptrptr;
792 
793 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
794 		/* No capabilities */
795 		return;
796 	}
797 
798 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
799 	case 0:
800 	case 1:
801 		ptrptr = PCIR_CAP_PTR;
802 		break;
803 	case 2:
804 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
805 		break;
806 	default:
807 		return;				/* no capabilities support */
808 	}
809 	nextptr = REG(ptrptr, 1);	/* sanity check? */
810 
811 	/*
812 	 * Read capability entries.
813 	 */
814 	while (pci_fixup_nextptr(&nextptr)) {
815 		const struct pci_read_cap *rc;
816 		int ptr = nextptr;
817 
818 		/* Find the next entry */
819 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
820 
821 		/* Process this entry */
822 		val = REG(ptr + PCICAP_ID, 1);
823 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
824 			if (rc->cap == val) {
825 				rc->read_cap(pcib, ptr, nextptr, cfg);
826 				break;
827 			}
828 		}
829 	}
830 /* REG and WREG use carry through to next functions */
831 }
832 
833 /*
834  * PCI Vital Product Data
835  */
836 
837 #define	PCI_VPD_TIMEOUT		1000000
838 
839 static int
840 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
841 {
842 	int count = PCI_VPD_TIMEOUT;
843 
844 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
845 
846 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
847 
848 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
849 		if (--count < 0)
850 			return (ENXIO);
851 		DELAY(1);	/* limit looping */
852 	}
853 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
854 
855 	return (0);
856 }
857 
858 #if 0
859 static int
860 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
861 {
862 	int count = PCI_VPD_TIMEOUT;
863 
864 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
865 
866 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
867 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
868 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
869 		if (--count < 0)
870 			return (ENXIO);
871 		DELAY(1);	/* limit looping */
872 	}
873 
874 	return (0);
875 }
876 #endif
877 
878 #undef PCI_VPD_TIMEOUT
879 
880 struct vpd_readstate {
881 	device_t	pcib;
882 	pcicfgregs	*cfg;
883 	uint32_t	val;
884 	int		bytesinval;
885 	int		off;
886 	uint8_t		cksum;
887 };
888 
889 static int
890 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
891 {
892 	uint32_t reg;
893 	uint8_t byte;
894 
895 	if (vrs->bytesinval == 0) {
896 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
897 			return (ENXIO);
898 		vrs->val = le32toh(reg);
899 		vrs->off += 4;
900 		byte = vrs->val & 0xff;
901 		vrs->bytesinval = 3;
902 	} else {
903 		vrs->val = vrs->val >> 8;
904 		byte = vrs->val & 0xff;
905 		vrs->bytesinval--;
906 	}
907 
908 	vrs->cksum += byte;
909 	*data = byte;
910 	return (0);
911 }
912 
913 int
914 pcie_slot_implemented(device_t dev)
915 {
916 	struct pci_devinfo *dinfo = device_get_ivars(dev);
917 
918 	return pcie_slotimpl(&dinfo->cfg);
919 }
920 
921 void
922 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
923 {
924 	uint8_t expr_ptr;
925 	uint16_t val;
926 
927 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
928 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
929 		panic("%s: invalid max read request size 0x%02x\n",
930 		      device_get_nameunit(dev), rqsize);
931 	}
932 
933 	expr_ptr = pci_get_pciecap_ptr(dev);
934 	if (!expr_ptr)
935 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
936 
937 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
938 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
939 		if (bootverbose)
940 			device_printf(dev, "adjust device control 0x%04x", val);
941 
942 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
943 		val |= rqsize;
944 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
945 
946 		if (bootverbose)
947 			kprintf(" -> 0x%04x\n", val);
948 	}
949 }
950 
951 static void
952 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
953 {
954 	struct vpd_readstate vrs;
955 	int state;
956 	int name;
957 	int remain;
958 	int i;
959 	int alloc, off;		/* alloc/off for RO/W arrays */
960 	int cksumvalid;
961 	int dflen;
962 	uint8_t byte;
963 	uint8_t byte2;
964 
965 	/* init vpd reader */
966 	vrs.bytesinval = 0;
967 	vrs.off = 0;
968 	vrs.pcib = pcib;
969 	vrs.cfg = cfg;
970 	vrs.cksum = 0;
971 
972 	state = 0;
973 	name = remain = i = 0;	/* shut up stupid gcc */
974 	alloc = off = 0;	/* shut up stupid gcc */
975 	dflen = 0;		/* shut up stupid gcc */
976 	cksumvalid = -1;
977 	while (state >= 0) {
978 		if (vpd_nextbyte(&vrs, &byte)) {
979 			state = -2;
980 			break;
981 		}
982 #if 0
983 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
984 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
985 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
986 #endif
987 		switch (state) {
988 		case 0:		/* item name */
989 			if (byte & 0x80) {
990 				if (vpd_nextbyte(&vrs, &byte2)) {
991 					state = -2;
992 					break;
993 				}
994 				remain = byte2;
995 				if (vpd_nextbyte(&vrs, &byte2)) {
996 					state = -2;
997 					break;
998 				}
999 				remain |= byte2 << 8;
1000 				if (remain > (0x7f*4 - vrs.off)) {
1001 					state = -1;
1002 					kprintf(
1003 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1004 					    cfg->domain, cfg->bus, cfg->slot,
1005 					    cfg->func, remain);
1006 				}
1007 				name = byte & 0x7f;
1008 			} else {
1009 				remain = byte & 0x7;
1010 				name = (byte >> 3) & 0xf;
1011 			}
1012 			switch (name) {
1013 			case 0x2:	/* String */
1014 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1015 				    M_DEVBUF, M_WAITOK);
1016 				i = 0;
1017 				state = 1;
1018 				break;
1019 			case 0xf:	/* End */
1020 				state = -1;
1021 				break;
1022 			case 0x10:	/* VPD-R */
1023 				alloc = 8;
1024 				off = 0;
1025 				cfg->vpd.vpd_ros = kmalloc(alloc *
1026 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1027 				    M_WAITOK | M_ZERO);
1028 				state = 2;
1029 				break;
1030 			case 0x11:	/* VPD-W */
1031 				alloc = 8;
1032 				off = 0;
1033 				cfg->vpd.vpd_w = kmalloc(alloc *
1034 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1035 				    M_WAITOK | M_ZERO);
1036 				state = 5;
1037 				break;
1038 			default:	/* Invalid data, abort */
1039 				state = -1;
1040 				break;
1041 			}
1042 			break;
1043 
1044 		case 1:	/* Identifier String */
1045 			cfg->vpd.vpd_ident[i++] = byte;
1046 			remain--;
1047 			if (remain == 0)  {
1048 				cfg->vpd.vpd_ident[i] = '\0';
1049 				state = 0;
1050 			}
1051 			break;
1052 
1053 		case 2:	/* VPD-R Keyword Header */
1054 			if (off == alloc) {
1055 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1056 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1057 				    M_DEVBUF, M_WAITOK | M_ZERO);
1058 			}
1059 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1060 			if (vpd_nextbyte(&vrs, &byte2)) {
1061 				state = -2;
1062 				break;
1063 			}
1064 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1065 			if (vpd_nextbyte(&vrs, &byte2)) {
1066 				state = -2;
1067 				break;
1068 			}
1069 			dflen = byte2;
1070 			if (dflen == 0 &&
1071 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1072 			    2) == 0) {
1073 				/*
1074 				 * if this happens, we can't trust the rest
1075 				 * of the VPD.
1076 				 */
1077 				kprintf(
1078 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1079 				    cfg->domain, cfg->bus, cfg->slot,
1080 				    cfg->func, dflen);
1081 				cksumvalid = 0;
1082 				state = -1;
1083 				break;
1084 			} else if (dflen == 0) {
1085 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1086 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1087 				    M_DEVBUF, M_WAITOK);
1088 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1089 			} else
1090 				cfg->vpd.vpd_ros[off].value = kmalloc(
1091 				    (dflen + 1) *
1092 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1093 				    M_DEVBUF, M_WAITOK);
1094 			remain -= 3;
1095 			i = 0;
1096 			/* keep in sync w/ state 3's transistions */
1097 			if (dflen == 0 && remain == 0)
1098 				state = 0;
1099 			else if (dflen == 0)
1100 				state = 2;
1101 			else
1102 				state = 3;
1103 			break;
1104 
1105 		case 3:	/* VPD-R Keyword Value */
1106 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1107 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1108 			    "RV", 2) == 0 && cksumvalid == -1) {
1109 				if (vrs.cksum == 0)
1110 					cksumvalid = 1;
1111 				else {
1112 					if (bootverbose)
1113 						kprintf(
1114 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1115 						    cfg->domain, cfg->bus,
1116 						    cfg->slot, cfg->func,
1117 						    vrs.cksum);
1118 					cksumvalid = 0;
1119 					state = -1;
1120 					break;
1121 				}
1122 			}
1123 			dflen--;
1124 			remain--;
1125 			/* keep in sync w/ state 2's transistions */
1126 			if (dflen == 0)
1127 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1128 			if (dflen == 0 && remain == 0) {
1129 				cfg->vpd.vpd_rocnt = off;
1130 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1131 				    off * sizeof(*cfg->vpd.vpd_ros),
1132 				    M_DEVBUF, M_WAITOK | M_ZERO);
1133 				state = 0;
1134 			} else if (dflen == 0)
1135 				state = 2;
1136 			break;
1137 
1138 		case 4:
1139 			remain--;
1140 			if (remain == 0)
1141 				state = 0;
1142 			break;
1143 
1144 		case 5:	/* VPD-W Keyword Header */
1145 			if (off == alloc) {
1146 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1147 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1148 				    M_DEVBUF, M_WAITOK | M_ZERO);
1149 			}
1150 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1151 			if (vpd_nextbyte(&vrs, &byte2)) {
1152 				state = -2;
1153 				break;
1154 			}
1155 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1156 			if (vpd_nextbyte(&vrs, &byte2)) {
1157 				state = -2;
1158 				break;
1159 			}
1160 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1161 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1162 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1163 			    sizeof(*cfg->vpd.vpd_w[off].value),
1164 			    M_DEVBUF, M_WAITOK);
1165 			remain -= 3;
1166 			i = 0;
1167 			/* keep in sync w/ state 6's transistions */
1168 			if (dflen == 0 && remain == 0)
1169 				state = 0;
1170 			else if (dflen == 0)
1171 				state = 5;
1172 			else
1173 				state = 6;
1174 			break;
1175 
1176 		case 6:	/* VPD-W Keyword Value */
1177 			cfg->vpd.vpd_w[off].value[i++] = byte;
1178 			dflen--;
1179 			remain--;
1180 			/* keep in sync w/ state 5's transistions */
1181 			if (dflen == 0)
1182 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1183 			if (dflen == 0 && remain == 0) {
1184 				cfg->vpd.vpd_wcnt = off;
1185 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1186 				    off * sizeof(*cfg->vpd.vpd_w),
1187 				    M_DEVBUF, M_WAITOK | M_ZERO);
1188 				state = 0;
1189 			} else if (dflen == 0)
1190 				state = 5;
1191 			break;
1192 
1193 		default:
1194 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1195 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1196 			    state);
1197 			state = -1;
1198 			break;
1199 		}
1200 	}
1201 
1202 	if (cksumvalid == 0 || state < -1) {
1203 		/* read-only data bad, clean up */
1204 		if (cfg->vpd.vpd_ros != NULL) {
1205 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1206 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1207 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1208 			cfg->vpd.vpd_ros = NULL;
1209 		}
1210 	}
1211 	if (state < -1) {
1212 		/* I/O error, clean up */
1213 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1214 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1215 		if (cfg->vpd.vpd_ident != NULL) {
1216 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1217 			cfg->vpd.vpd_ident = NULL;
1218 		}
1219 		if (cfg->vpd.vpd_w != NULL) {
1220 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1221 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1222 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1223 			cfg->vpd.vpd_w = NULL;
1224 		}
1225 	}
1226 	cfg->vpd.vpd_cached = 1;
1227 #undef REG
1228 #undef WREG
1229 }
1230 
1231 int
1232 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1233 {
1234 	struct pci_devinfo *dinfo = device_get_ivars(child);
1235 	pcicfgregs *cfg = &dinfo->cfg;
1236 
1237 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1238 		pci_read_vpd(device_get_parent(dev), cfg);
1239 
1240 	*identptr = cfg->vpd.vpd_ident;
1241 
1242 	if (*identptr == NULL)
1243 		return (ENXIO);
1244 
1245 	return (0);
1246 }
1247 
1248 int
1249 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1250 	const char **vptr)
1251 {
1252 	struct pci_devinfo *dinfo = device_get_ivars(child);
1253 	pcicfgregs *cfg = &dinfo->cfg;
1254 	int i;
1255 
1256 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1257 		pci_read_vpd(device_get_parent(dev), cfg);
1258 
1259 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1260 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1261 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1262 			*vptr = cfg->vpd.vpd_ros[i].value;
1263 		}
1264 
1265 	if (i != cfg->vpd.vpd_rocnt)
1266 		return (0);
1267 
1268 	*vptr = NULL;
1269 	return (ENXIO);
1270 }
1271 
1272 /*
1273  * Return the offset in configuration space of the requested extended
1274  * capability entry or 0 if the specified capability was not found.
1275  */
1276 int
1277 pci_find_extcap_method(device_t dev, device_t child, int capability,
1278     int *capreg)
1279 {
1280 	struct pci_devinfo *dinfo = device_get_ivars(child);
1281 	pcicfgregs *cfg = &dinfo->cfg;
1282 	u_int32_t status;
1283 	u_int8_t ptr;
1284 
1285 	/*
1286 	 * Check the CAP_LIST bit of the PCI status register first.
1287 	 */
1288 	status = pci_read_config(child, PCIR_STATUS, 2);
1289 	if (!(status & PCIM_STATUS_CAPPRESENT))
1290 		return (ENXIO);
1291 
1292 	/*
1293 	 * Determine the start pointer of the capabilities list.
1294 	 */
1295 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1296 	case 0:
1297 	case 1:
1298 		ptr = PCIR_CAP_PTR;
1299 		break;
1300 	case 2:
1301 		ptr = PCIR_CAP_PTR_2;
1302 		break;
1303 	default:
1304 		/* XXX: panic? */
1305 		return (ENXIO);		/* no extended capabilities support */
1306 	}
1307 	ptr = pci_read_config(child, ptr, 1);
1308 
1309 	/*
1310 	 * Traverse the capabilities list.
1311 	 */
1312 	while (ptr != 0) {
1313 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1314 			if (capreg != NULL)
1315 				*capreg = ptr;
1316 			return (0);
1317 		}
1318 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1319 	}
1320 
1321 	return (ENOENT);
1322 }
1323 
1324 /*
1325  * Support for MSI-X message interrupts.
1326  */
1327 void
1328 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1329 {
1330 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1331 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1332 	uint32_t offset;
1333 
1334 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1335 	offset = msix->msix_table_offset + index * 16;
1336 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1337 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1338 	bus_write_4(msix->msix_table_res, offset + 8, data);
1339 
1340 	/* Enable MSI -> HT mapping. */
1341 	pci_ht_map_msi(dev, address);
1342 }
1343 
1344 void
1345 pci_mask_msix(device_t dev, u_int index)
1346 {
1347 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1348 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1349 	uint32_t offset, val;
1350 
1351 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1352 	offset = msix->msix_table_offset + index * 16 + 12;
1353 	val = bus_read_4(msix->msix_table_res, offset);
1354 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1355 		val |= PCIM_MSIX_VCTRL_MASK;
1356 		bus_write_4(msix->msix_table_res, offset, val);
1357 	}
1358 }
1359 
1360 void
1361 pci_unmask_msix(device_t dev, u_int index)
1362 {
1363 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1364 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365 	uint32_t offset, val;
1366 
1367 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1368 	offset = msix->msix_table_offset + index * 16 + 12;
1369 	val = bus_read_4(msix->msix_table_res, offset);
1370 	if (val & PCIM_MSIX_VCTRL_MASK) {
1371 		val &= ~PCIM_MSIX_VCTRL_MASK;
1372 		bus_write_4(msix->msix_table_res, offset, val);
1373 	}
1374 }
1375 
1376 int
1377 pci_pending_msix(device_t dev, u_int index)
1378 {
1379 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1380 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1381 	uint32_t offset, bit;
1382 
1383 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1384 	offset = msix->msix_pba_offset + (index / 32) * 4;
1385 	bit = 1 << index % 32;
1386 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1387 }
1388 
1389 /*
1390  * Restore MSI-X registers and table during resume.  If MSI-X is
1391  * enabled then walk the virtual table to restore the actual MSI-X
1392  * table.
1393  */
1394 static void
1395 pci_resume_msix(device_t dev)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	struct msix_table_entry *mte;
1400 	struct msix_vector *mv;
1401 	int i;
1402 
1403 	if (msix->msix_alloc > 0) {
1404 		/* First, mask all vectors. */
1405 		for (i = 0; i < msix->msix_msgnum; i++)
1406 			pci_mask_msix(dev, i);
1407 
1408 		/* Second, program any messages with at least one handler. */
1409 		for (i = 0; i < msix->msix_table_len; i++) {
1410 			mte = &msix->msix_table[i];
1411 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1412 				continue;
1413 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1414 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1415 			pci_unmask_msix(dev, i);
1416 		}
1417 	}
1418 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1419 	    msix->msix_ctrl, 2);
1420 }
1421 
1422 /*
1423  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1424  * returned in *count.  After this function returns, each message will be
1425  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1426  */
1427 int
1428 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(child);
1431 	pcicfgregs *cfg = &dinfo->cfg;
1432 	struct resource_list_entry *rle;
1433 	int actual, error, i, irq, max;
1434 
1435 	/* Don't let count == 0 get us into trouble. */
1436 	if (*count == 0)
1437 		return (EINVAL);
1438 
1439 	/* If rid 0 is allocated, then fail. */
1440 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1441 	if (rle != NULL && rle->res != NULL)
1442 		return (ENXIO);
1443 
1444 	/* Already have allocated messages? */
1445 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1446 		return (ENXIO);
1447 
1448 	/* If MSI is blacklisted for this system, fail. */
1449 	if (pci_msi_blacklisted())
1450 		return (ENXIO);
1451 
1452 	/* MSI-X capability present? */
1453 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1454 		return (ENODEV);
1455 
1456 	/* Make sure the appropriate BARs are mapped. */
1457 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1458 	    cfg->msix.msix_table_bar);
1459 	if (rle == NULL || rle->res == NULL ||
1460 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1461 		return (ENXIO);
1462 	cfg->msix.msix_table_res = rle->res;
1463 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1464 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1465 		    cfg->msix.msix_pba_bar);
1466 		if (rle == NULL || rle->res == NULL ||
1467 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1468 			return (ENXIO);
1469 	}
1470 	cfg->msix.msix_pba_res = rle->res;
1471 
1472 	if (bootverbose)
1473 		device_printf(child,
1474 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1475 		    *count, cfg->msix.msix_msgnum);
1476 	max = min(*count, cfg->msix.msix_msgnum);
1477 	for (i = 0; i < max; i++) {
1478 		/* Allocate a message. */
1479 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1480 		if (error)
1481 			break;
1482 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1483 		    irq, 1);
1484 	}
1485 	actual = i;
1486 
1487 	if (bootverbose) {
1488 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1489 		if (actual == 1)
1490 			device_printf(child, "using IRQ %lu for MSI-X\n",
1491 			    rle->start);
1492 		else {
1493 			int run;
1494 
1495 			/*
1496 			 * Be fancy and try to print contiguous runs of
1497 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1498 			 * 'run' is true if we are in a range.
1499 			 */
1500 			device_printf(child, "using IRQs %lu", rle->start);
1501 			irq = rle->start;
1502 			run = 0;
1503 			for (i = 1; i < actual; i++) {
1504 				rle = resource_list_find(&dinfo->resources,
1505 				    SYS_RES_IRQ, i + 1);
1506 
1507 				/* Still in a run? */
1508 				if (rle->start == irq + 1) {
1509 					run = 1;
1510 					irq++;
1511 					continue;
1512 				}
1513 
1514 				/* Finish previous range. */
1515 				if (run) {
1516 					kprintf("-%d", irq);
1517 					run = 0;
1518 				}
1519 
1520 				/* Start new range. */
1521 				kprintf(",%lu", rle->start);
1522 				irq = rle->start;
1523 			}
1524 
1525 			/* Unfinished range? */
1526 			if (run)
1527 				kprintf("-%d", irq);
1528 			kprintf(" for MSI-X\n");
1529 		}
1530 	}
1531 
1532 	/* Mask all vectors. */
1533 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1534 		pci_mask_msix(child, i);
1535 
1536 	/* Allocate and initialize vector data and virtual table. */
1537 	cfg->msix.msix_vectors = kmalloc(sizeof(struct msix_vector) * actual,
1538 	    M_DEVBUF, M_WAITOK | M_ZERO);
1539 	cfg->msix.msix_table = kmalloc(sizeof(struct msix_table_entry) * actual,
1540 	    M_DEVBUF, M_WAITOK | M_ZERO);
1541 	for (i = 0; i < actual; i++) {
1542 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1543 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1544 		cfg->msix.msix_table[i].mte_vector = i + 1;
1545 	}
1546 
1547 	/* Update control register to enable MSI-X. */
1548 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1549 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1550 	    cfg->msix.msix_ctrl, 2);
1551 
1552 	/* Update counts of alloc'd messages. */
1553 	cfg->msix.msix_alloc = actual;
1554 	cfg->msix.msix_table_len = actual;
1555 	*count = actual;
1556 	return (0);
1557 }
1558 
1559 /*
1560  * By default, pci_alloc_msix() will assign the allocated IRQ
1561  * resources consecutively to the first N messages in the MSI-X table.
1562  * However, device drivers may want to use different layouts if they
1563  * either receive fewer messages than they asked for, or they wish to
1564  * populate the MSI-X table sparsely.  This method allows the driver
1565  * to specify what layout it wants.  It must be called after a
1566  * successful pci_alloc_msix() but before any of the associated
1567  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1568  *
1569  * The 'vectors' array contains 'count' message vectors.  The array
1570  * maps directly to the MSI-X table in that index 0 in the array
1571  * specifies the vector for the first message in the MSI-X table, etc.
1572  * The vector value in each array index can either be 0 to indicate
1573  * that no vector should be assigned to a message slot, or it can be a
1574  * number from 1 to N (where N is the count returned from a
1575  * succcessful call to pci_alloc_msix()) to indicate which message
1576  * vector (IRQ) to be used for the corresponding message.
1577  *
1578  * On successful return, each message with a non-zero vector will have
1579  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1580  * 1.  Additionally, if any of the IRQs allocated via the previous
1581  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1582  * will be kfreed back to the system automatically.
1583  *
1584  * For example, suppose a driver has a MSI-X table with 6 messages and
1585  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1586  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1587  * C.  After the call to pci_alloc_msix(), the device will be setup to
1588  * have an MSI-X table of ABC--- (where - means no vector assigned).
1589  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1590  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1591  * be kfreed back to the system.  This device will also have valid
1592  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1593  *
1594  * In any case, the SYS_RES_IRQ rid X will always map to the message
1595  * at MSI-X table index X - 1 and will only be valid if a vector is
1596  * assigned to that table entry.
1597  */
1598 int
1599 pci_remap_msix_method(device_t dev, device_t child, int count,
1600     const u_int *vectors)
1601 {
1602 	struct pci_devinfo *dinfo = device_get_ivars(child);
1603 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1604 	struct resource_list_entry *rle;
1605 	int i, irq, j, *used;
1606 
1607 	/*
1608 	 * Have to have at least one message in the table but the
1609 	 * table can't be bigger than the actual MSI-X table in the
1610 	 * device.
1611 	 */
1612 	if (count == 0 || count > msix->msix_msgnum)
1613 		return (EINVAL);
1614 
1615 	/* Sanity check the vectors. */
1616 	for (i = 0; i < count; i++)
1617 		if (vectors[i] > msix->msix_alloc)
1618 			return (EINVAL);
1619 
1620 	/*
1621 	 * Make sure there aren't any holes in the vectors to be used.
1622 	 * It's a big pain to support it, and it doesn't really make
1623 	 * sense anyway.  Also, at least one vector must be used.
1624 	 */
1625 	used = kmalloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1626 	    M_ZERO);
1627 	for (i = 0; i < count; i++)
1628 		if (vectors[i] != 0)
1629 			used[vectors[i] - 1] = 1;
1630 	for (i = 0; i < msix->msix_alloc - 1; i++)
1631 		if (used[i] == 0 && used[i + 1] == 1) {
1632 			kfree(used, M_DEVBUF);
1633 			return (EINVAL);
1634 		}
1635 	if (used[0] != 1) {
1636 		kfree(used, M_DEVBUF);
1637 		return (EINVAL);
1638 	}
1639 
1640 	/* Make sure none of the resources are allocated. */
1641 	for (i = 0; i < msix->msix_table_len; i++) {
1642 		if (msix->msix_table[i].mte_vector == 0)
1643 			continue;
1644 		if (msix->msix_table[i].mte_handlers > 0)
1645 			return (EBUSY);
1646 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1647 		KASSERT(rle != NULL, ("missing resource"));
1648 		if (rle->res != NULL)
1649 			return (EBUSY);
1650 	}
1651 
1652 	/* Free the existing resource list entries. */
1653 	for (i = 0; i < msix->msix_table_len; i++) {
1654 		if (msix->msix_table[i].mte_vector == 0)
1655 			continue;
1656 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1657 	}
1658 
1659 	/*
1660 	 * Build the new virtual table keeping track of which vectors are
1661 	 * used.
1662 	 */
1663 	kfree(msix->msix_table, M_DEVBUF);
1664 	msix->msix_table = kmalloc(sizeof(struct msix_table_entry) * count,
1665 	    M_DEVBUF, M_WAITOK | M_ZERO);
1666 	for (i = 0; i < count; i++)
1667 		msix->msix_table[i].mte_vector = vectors[i];
1668 	msix->msix_table_len = count;
1669 
1670 	/* Free any unused IRQs and resize the vectors array if necessary. */
1671 	j = msix->msix_alloc - 1;
1672 	if (used[j] == 0) {
1673 		struct msix_vector *vec;
1674 
1675 		while (used[j] == 0) {
1676 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1677 			    msix->msix_vectors[j].mv_irq);
1678 			j--;
1679 		}
1680 		vec = kmalloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1681 		    M_WAITOK);
1682 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1683 		    (j + 1));
1684 		kfree(msix->msix_vectors, M_DEVBUF);
1685 		msix->msix_vectors = vec;
1686 		msix->msix_alloc = j + 1;
1687 	}
1688 	kfree(used, M_DEVBUF);
1689 
1690 	/* Map the IRQs onto the rids. */
1691 	for (i = 0; i < count; i++) {
1692 		if (vectors[i] == 0)
1693 			continue;
1694 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1695 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1696 		    irq, 1);
1697 	}
1698 
1699 	if (bootverbose) {
1700 		device_printf(child, "Remapped MSI-X IRQs as: ");
1701 		for (i = 0; i < count; i++) {
1702 			if (i != 0)
1703 				kprintf(", ");
1704 			if (vectors[i] == 0)
1705 				kprintf("---");
1706 			else
1707 				kprintf("%d",
1708 				    msix->msix_vectors[vectors[i]].mv_irq);
1709 		}
1710 		kprintf("\n");
1711 	}
1712 
1713 	return (0);
1714 }
1715 
1716 static int
1717 pci_release_msix(device_t dev, device_t child)
1718 {
1719 	struct pci_devinfo *dinfo = device_get_ivars(child);
1720 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1721 	struct resource_list_entry *rle;
1722 	int i;
1723 
1724 	/* Do we have any messages to release? */
1725 	if (msix->msix_alloc == 0)
1726 		return (ENODEV);
1727 
1728 	/* Make sure none of the resources are allocated. */
1729 	for (i = 0; i < msix->msix_table_len; i++) {
1730 		if (msix->msix_table[i].mte_vector == 0)
1731 			continue;
1732 		if (msix->msix_table[i].mte_handlers > 0)
1733 			return (EBUSY);
1734 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1735 		KASSERT(rle != NULL, ("missing resource"));
1736 		if (rle->res != NULL)
1737 			return (EBUSY);
1738 	}
1739 
1740 	/* Update control register to disable MSI-X. */
1741 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1742 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1743 	    msix->msix_ctrl, 2);
1744 
1745 	/* Free the resource list entries. */
1746 	for (i = 0; i < msix->msix_table_len; i++) {
1747 		if (msix->msix_table[i].mte_vector == 0)
1748 			continue;
1749 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1750 	}
1751 	kfree(msix->msix_table, M_DEVBUF);
1752 	msix->msix_table_len = 0;
1753 
1754 	/* Release the IRQs. */
1755 	for (i = 0; i < msix->msix_alloc; i++)
1756 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1757 		    msix->msix_vectors[i].mv_irq);
1758 	kfree(msix->msix_vectors, M_DEVBUF);
1759 	msix->msix_alloc = 0;
1760 	return (0);
1761 }
1762 
1763 /*
1764  * Return the max supported MSI-X messages this device supports.
1765  * Basically, assuming the MD code can alloc messages, this function
1766  * should return the maximum value that pci_alloc_msix() can return.
1767  * Thus, it is subject to the tunables, etc.
1768  */
1769 int
1770 pci_msix_count_method(device_t dev, device_t child)
1771 {
1772 	struct pci_devinfo *dinfo = device_get_ivars(child);
1773 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1774 
1775 	if (pci_do_msix && msix->msix_location != 0)
1776 		return (msix->msix_msgnum);
1777 	return (0);
1778 }
1779 
1780 /*
1781  * HyperTransport MSI mapping control
1782  */
1783 void
1784 pci_ht_map_msi(device_t dev, uint64_t addr)
1785 {
1786 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1787 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1788 
1789 	if (!ht->ht_msimap)
1790 		return;
1791 
1792 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1793 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1794 		/* Enable MSI -> HT mapping. */
1795 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1796 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1797 		    ht->ht_msictrl, 2);
1798 	}
1799 
1800 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1801 		/* Disable MSI -> HT mapping. */
1802 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1803 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1804 		    ht->ht_msictrl, 2);
1805 	}
1806 }
1807 
1808 /*
1809  * Support for MSI message signalled interrupts.
1810  */
1811 void
1812 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1813 {
1814 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1815 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1816 
1817 	/* Write data and address values. */
1818 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1819 	    address & 0xffffffff, 4);
1820 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1821 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1822 		    address >> 32, 4);
1823 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1824 		    data, 2);
1825 	} else
1826 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1827 		    2);
1828 
1829 	/* Enable MSI in the control register. */
1830 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1831 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1832 	    2);
1833 
1834 	/* Enable MSI -> HT mapping. */
1835 	pci_ht_map_msi(dev, address);
1836 }
1837 
1838 void
1839 pci_disable_msi(device_t dev)
1840 {
1841 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1842 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1843 
1844 	/* Disable MSI -> HT mapping. */
1845 	pci_ht_map_msi(dev, 0);
1846 
1847 	/* Disable MSI in the control register. */
1848 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1849 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1850 	    2);
1851 }
1852 
1853 /*
1854  * Restore MSI registers during resume.  If MSI is enabled then
1855  * restore the data and address registers in addition to the control
1856  * register.
1857  */
1858 static void
1859 pci_resume_msi(device_t dev)
1860 {
1861 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1862 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1863 	uint64_t address;
1864 	uint16_t data;
1865 
1866 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1867 		address = msi->msi_addr;
1868 		data = msi->msi_data;
1869 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1870 		    address & 0xffffffff, 4);
1871 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1872 			pci_write_config(dev, msi->msi_location +
1873 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1874 			pci_write_config(dev, msi->msi_location +
1875 			    PCIR_MSI_DATA_64BIT, data, 2);
1876 		} else
1877 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1878 			    data, 2);
1879 	}
1880 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1881 	    2);
1882 }
1883 
1884 int
1885 pci_remap_msi_irq(device_t dev, u_int irq)
1886 {
1887 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1888 	pcicfgregs *cfg = &dinfo->cfg;
1889 	struct resource_list_entry *rle;
1890 	struct msix_table_entry *mte;
1891 	struct msix_vector *mv;
1892 	device_t bus;
1893 	uint64_t addr;
1894 	uint32_t data;
1895 	int error, i, j;
1896 
1897 	bus = device_get_parent(dev);
1898 
1899 	/*
1900 	 * Handle MSI first.  We try to find this IRQ among our list
1901 	 * of MSI IRQs.  If we find it, we request updated address and
1902 	 * data registers and apply the results.
1903 	 */
1904 	if (cfg->msi.msi_alloc > 0) {
1905 
1906 		/* If we don't have any active handlers, nothing to do. */
1907 		if (cfg->msi.msi_handlers == 0)
1908 			return (0);
1909 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1910 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1911 			    i + 1);
1912 			if (rle->start == irq) {
1913 				error = PCIB_MAP_MSI(device_get_parent(bus),
1914 				    dev, irq, &addr, &data);
1915 				if (error)
1916 					return (error);
1917 				pci_disable_msi(dev);
1918 				dinfo->cfg.msi.msi_addr = addr;
1919 				dinfo->cfg.msi.msi_data = data;
1920 				pci_enable_msi(dev, addr, data);
1921 				return (0);
1922 			}
1923 		}
1924 		return (ENOENT);
1925 	}
1926 
1927 	/*
1928 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1929 	 * we request the updated mapping info.  If that works, we go
1930 	 * through all the slots that use this IRQ and update them.
1931 	 */
1932 	if (cfg->msix.msix_alloc > 0) {
1933 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1934 			mv = &cfg->msix.msix_vectors[i];
1935 			if (mv->mv_irq == irq) {
1936 				error = PCIB_MAP_MSI(device_get_parent(bus),
1937 				    dev, irq, &addr, &data);
1938 				if (error)
1939 					return (error);
1940 				mv->mv_address = addr;
1941 				mv->mv_data = data;
1942 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1943 					mte = &cfg->msix.msix_table[j];
1944 					if (mte->mte_vector != i + 1)
1945 						continue;
1946 					if (mte->mte_handlers == 0)
1947 						continue;
1948 					pci_mask_msix(dev, j);
1949 					pci_enable_msix(dev, j, addr, data);
1950 					pci_unmask_msix(dev, j);
1951 				}
1952 			}
1953 		}
1954 		return (ENOENT);
1955 	}
1956 
1957 	return (ENOENT);
1958 }
1959 
1960 /*
1961  * Returns true if the specified device is blacklisted because MSI
1962  * doesn't work.
1963  */
1964 int
1965 pci_msi_device_blacklisted(device_t dev)
1966 {
1967 	struct pci_quirk *q;
1968 
1969 	if (!pci_honor_msi_blacklist)
1970 		return (0);
1971 
1972 	for (q = &pci_quirks[0]; q->devid; q++) {
1973 		if (q->devid == pci_get_devid(dev) &&
1974 		    q->type == PCI_QUIRK_DISABLE_MSI)
1975 			return (1);
1976 	}
1977 	return (0);
1978 }
1979 
1980 /*
1981  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1982  * we just check for blacklisted chipsets as represented by the
1983  * host-PCI bridge at device 0:0:0.  In the future, it may become
1984  * necessary to check other system attributes, such as the kenv values
1985  * that give the motherboard manufacturer and model number.
1986  */
1987 static int
1988 pci_msi_blacklisted(void)
1989 {
1990 	device_t dev;
1991 
1992 	if (!pci_honor_msi_blacklist)
1993 		return (0);
1994 
1995 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1996 	if (!(pcie_chipset || pcix_chipset))
1997 		return (1);
1998 
1999 	dev = pci_find_bsf(0, 0, 0);
2000 	if (dev != NULL)
2001 		return (pci_msi_device_blacklisted(dev));
2002 	return (0);
2003 }
2004 
2005 /*
2006  * Attempt to allocate *count MSI messages.  The actual number allocated is
2007  * returned in *count.  After this function returns, each message will be
2008  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2009  */
2010 int
2011 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2012 {
2013 	struct pci_devinfo *dinfo = device_get_ivars(child);
2014 	pcicfgregs *cfg = &dinfo->cfg;
2015 	struct resource_list_entry *rle;
2016 	int actual, error, i, irqs[32];
2017 	uint16_t ctrl;
2018 
2019 	/* Don't let count == 0 get us into trouble. */
2020 	if (*count == 0)
2021 		return (EINVAL);
2022 
2023 	/* If rid 0 is allocated, then fail. */
2024 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2025 	if (rle != NULL && rle->res != NULL)
2026 		return (ENXIO);
2027 
2028 	/* Already have allocated messages? */
2029 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2030 		return (ENXIO);
2031 
2032 	/* If MSI is blacklisted for this system, fail. */
2033 	if (pci_msi_blacklisted())
2034 		return (ENXIO);
2035 
2036 	/* MSI capability present? */
2037 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2038 		return (ENODEV);
2039 
2040 	if (bootverbose)
2041 		device_printf(child,
2042 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2043 		    *count, cfg->msi.msi_msgnum);
2044 
2045 	/* Don't ask for more than the device supports. */
2046 	actual = min(*count, cfg->msi.msi_msgnum);
2047 
2048 	/* Don't ask for more than 32 messages. */
2049 	actual = min(actual, 32);
2050 
2051 	/* MSI requires power of 2 number of messages. */
2052 	if (!powerof2(actual))
2053 		return (EINVAL);
2054 
2055 	for (;;) {
2056 		/* Try to allocate N messages. */
2057 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2058 		    cfg->msi.msi_msgnum, irqs);
2059 		if (error == 0)
2060 			break;
2061 		if (actual == 1)
2062 			return (error);
2063 
2064 		/* Try N / 2. */
2065 		actual >>= 1;
2066 	}
2067 
2068 	/*
2069 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2070 	 * resources in the irqs[] array, so add new resources
2071 	 * starting at rid 1.
2072 	 */
2073 	for (i = 0; i < actual; i++)
2074 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2075 		    irqs[i], irqs[i], 1);
2076 
2077 	if (bootverbose) {
2078 		if (actual == 1)
2079 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2080 		else {
2081 			int run;
2082 
2083 			/*
2084 			 * Be fancy and try to print contiguous runs
2085 			 * of IRQ values as ranges.  'run' is true if
2086 			 * we are in a range.
2087 			 */
2088 			device_printf(child, "using IRQs %d", irqs[0]);
2089 			run = 0;
2090 			for (i = 1; i < actual; i++) {
2091 
2092 				/* Still in a run? */
2093 				if (irqs[i] == irqs[i - 1] + 1) {
2094 					run = 1;
2095 					continue;
2096 				}
2097 
2098 				/* Finish previous range. */
2099 				if (run) {
2100 					kprintf("-%d", irqs[i - 1]);
2101 					run = 0;
2102 				}
2103 
2104 				/* Start new range. */
2105 				kprintf(",%d", irqs[i]);
2106 			}
2107 
2108 			/* Unfinished range? */
2109 			if (run)
2110 				kprintf("-%d", irqs[actual - 1]);
2111 			kprintf(" for MSI\n");
2112 		}
2113 	}
2114 
2115 	/* Update control register with actual count. */
2116 	ctrl = cfg->msi.msi_ctrl;
2117 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2118 	ctrl |= (ffs(actual) - 1) << 4;
2119 	cfg->msi.msi_ctrl = ctrl;
2120 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2121 
2122 	/* Update counts of alloc'd messages. */
2123 	cfg->msi.msi_alloc = actual;
2124 	cfg->msi.msi_handlers = 0;
2125 	*count = actual;
2126 	return (0);
2127 }
2128 
2129 /* Release the MSI messages associated with this device. */
2130 int
2131 pci_release_msi_method(device_t dev, device_t child)
2132 {
2133 	struct pci_devinfo *dinfo = device_get_ivars(child);
2134 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2135 	struct resource_list_entry *rle;
2136 	int error, i, irqs[32];
2137 
2138 	/* Try MSI-X first. */
2139 	error = pci_release_msix(dev, child);
2140 	if (error != ENODEV)
2141 		return (error);
2142 
2143 	/* Do we have any messages to release? */
2144 	if (msi->msi_alloc == 0)
2145 		return (ENODEV);
2146 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2147 
2148 	/* Make sure none of the resources are allocated. */
2149 	if (msi->msi_handlers > 0)
2150 		return (EBUSY);
2151 	for (i = 0; i < msi->msi_alloc; i++) {
2152 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2153 		KASSERT(rle != NULL, ("missing MSI resource"));
2154 		if (rle->res != NULL)
2155 			return (EBUSY);
2156 		irqs[i] = rle->start;
2157 	}
2158 
2159 	/* Update control register with 0 count. */
2160 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2161 	    ("%s: MSI still enabled", __func__));
2162 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2163 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2164 	    msi->msi_ctrl, 2);
2165 
2166 	/* Release the messages. */
2167 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2168 	for (i = 0; i < msi->msi_alloc; i++)
2169 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2170 
2171 	/* Update alloc count. */
2172 	msi->msi_alloc = 0;
2173 	msi->msi_addr = 0;
2174 	msi->msi_data = 0;
2175 	return (0);
2176 }
2177 
2178 /*
2179  * Return the max supported MSI messages this device supports.
2180  * Basically, assuming the MD code can alloc messages, this function
2181  * should return the maximum value that pci_alloc_msi() can return.
2182  * Thus, it is subject to the tunables, etc.
2183  */
2184 int
2185 pci_msi_count_method(device_t dev, device_t child)
2186 {
2187 	struct pci_devinfo *dinfo = device_get_ivars(child);
2188 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2189 
2190 	if (pci_do_msi && msi->msi_location != 0)
2191 		return (msi->msi_msgnum);
2192 	return (0);
2193 }
2194 
2195 /* kfree pcicfgregs structure and all depending data structures */
2196 
2197 int
2198 pci_freecfg(struct pci_devinfo *dinfo)
2199 {
2200 	struct devlist *devlist_head;
2201 	int i;
2202 
2203 	devlist_head = &pci_devq;
2204 
2205 	if (dinfo->cfg.vpd.vpd_reg) {
2206 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2207 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2208 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2209 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2210 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2211 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2212 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2213 	}
2214 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2215 	kfree(dinfo, M_DEVBUF);
2216 
2217 	/* increment the generation count */
2218 	pci_generation++;
2219 
2220 	/* we're losing one device */
2221 	pci_numdevs--;
2222 	return (0);
2223 }
2224 
2225 /*
2226  * PCI power manangement
2227  */
2228 int
2229 pci_set_powerstate_method(device_t dev, device_t child, int state)
2230 {
2231 	struct pci_devinfo *dinfo = device_get_ivars(child);
2232 	pcicfgregs *cfg = &dinfo->cfg;
2233 	uint16_t status;
2234 	int result, oldstate, highest, delay;
2235 
2236 	if (cfg->pp.pp_cap == 0)
2237 		return (EOPNOTSUPP);
2238 
2239 	/*
2240 	 * Optimize a no state change request away.  While it would be OK to
2241 	 * write to the hardware in theory, some devices have shown odd
2242 	 * behavior when going from D3 -> D3.
2243 	 */
2244 	oldstate = pci_get_powerstate(child);
2245 	if (oldstate == state)
2246 		return (0);
2247 
2248 	/*
2249 	 * The PCI power management specification states that after a state
2250 	 * transition between PCI power states, system software must
2251 	 * guarantee a minimal delay before the function accesses the device.
2252 	 * Compute the worst case delay that we need to guarantee before we
2253 	 * access the device.  Many devices will be responsive much more
2254 	 * quickly than this delay, but there are some that don't respond
2255 	 * instantly to state changes.  Transitions to/from D3 state require
2256 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2257 	 * is done below with DELAY rather than a sleeper function because
2258 	 * this function can be called from contexts where we cannot sleep.
2259 	 */
2260 	highest = (oldstate > state) ? oldstate : state;
2261 	if (highest == PCI_POWERSTATE_D3)
2262 	    delay = 10000;
2263 	else if (highest == PCI_POWERSTATE_D2)
2264 	    delay = 200;
2265 	else
2266 	    delay = 0;
2267 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2268 	    & ~PCIM_PSTAT_DMASK;
2269 	result = 0;
2270 	switch (state) {
2271 	case PCI_POWERSTATE_D0:
2272 		status |= PCIM_PSTAT_D0;
2273 		break;
2274 	case PCI_POWERSTATE_D1:
2275 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2276 			return (EOPNOTSUPP);
2277 		status |= PCIM_PSTAT_D1;
2278 		break;
2279 	case PCI_POWERSTATE_D2:
2280 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2281 			return (EOPNOTSUPP);
2282 		status |= PCIM_PSTAT_D2;
2283 		break;
2284 	case PCI_POWERSTATE_D3:
2285 		status |= PCIM_PSTAT_D3;
2286 		break;
2287 	default:
2288 		return (EINVAL);
2289 	}
2290 
2291 	if (bootverbose)
2292 		kprintf(
2293 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2294 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2295 		    dinfo->cfg.func, oldstate, state);
2296 
2297 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2298 	if (delay)
2299 		DELAY(delay);
2300 	return (0);
2301 }
2302 
2303 int
2304 pci_get_powerstate_method(device_t dev, device_t child)
2305 {
2306 	struct pci_devinfo *dinfo = device_get_ivars(child);
2307 	pcicfgregs *cfg = &dinfo->cfg;
2308 	uint16_t status;
2309 	int result;
2310 
2311 	if (cfg->pp.pp_cap != 0) {
2312 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2313 		switch (status & PCIM_PSTAT_DMASK) {
2314 		case PCIM_PSTAT_D0:
2315 			result = PCI_POWERSTATE_D0;
2316 			break;
2317 		case PCIM_PSTAT_D1:
2318 			result = PCI_POWERSTATE_D1;
2319 			break;
2320 		case PCIM_PSTAT_D2:
2321 			result = PCI_POWERSTATE_D2;
2322 			break;
2323 		case PCIM_PSTAT_D3:
2324 			result = PCI_POWERSTATE_D3;
2325 			break;
2326 		default:
2327 			result = PCI_POWERSTATE_UNKNOWN;
2328 			break;
2329 		}
2330 	} else {
2331 		/* No support, device is always at D0 */
2332 		result = PCI_POWERSTATE_D0;
2333 	}
2334 	return (result);
2335 }
2336 
2337 /*
2338  * Some convenience functions for PCI device drivers.
2339  */
2340 
2341 static __inline void
2342 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2343 {
2344 	uint16_t	command;
2345 
2346 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2347 	command |= bit;
2348 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2349 }
2350 
2351 static __inline void
2352 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2353 {
2354 	uint16_t	command;
2355 
2356 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2357 	command &= ~bit;
2358 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2359 }
2360 
2361 int
2362 pci_enable_busmaster_method(device_t dev, device_t child)
2363 {
2364 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2365 	return (0);
2366 }
2367 
2368 int
2369 pci_disable_busmaster_method(device_t dev, device_t child)
2370 {
2371 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2372 	return (0);
2373 }
2374 
2375 int
2376 pci_enable_io_method(device_t dev, device_t child, int space)
2377 {
2378 	uint16_t command;
2379 	uint16_t bit;
2380 	char *error;
2381 
2382 	bit = 0;
2383 	error = NULL;
2384 
2385 	switch(space) {
2386 	case SYS_RES_IOPORT:
2387 		bit = PCIM_CMD_PORTEN;
2388 		error = "port";
2389 		break;
2390 	case SYS_RES_MEMORY:
2391 		bit = PCIM_CMD_MEMEN;
2392 		error = "memory";
2393 		break;
2394 	default:
2395 		return (EINVAL);
2396 	}
2397 	pci_set_command_bit(dev, child, bit);
2398 	/* Some devices seem to need a brief stall here, what do to? */
2399 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2400 	if (command & bit)
2401 		return (0);
2402 	device_printf(child, "failed to enable %s mapping!\n", error);
2403 	return (ENXIO);
2404 }
2405 
2406 int
2407 pci_disable_io_method(device_t dev, device_t child, int space)
2408 {
2409 	uint16_t command;
2410 	uint16_t bit;
2411 	char *error;
2412 
2413 	bit = 0;
2414 	error = NULL;
2415 
2416 	switch(space) {
2417 	case SYS_RES_IOPORT:
2418 		bit = PCIM_CMD_PORTEN;
2419 		error = "port";
2420 		break;
2421 	case SYS_RES_MEMORY:
2422 		bit = PCIM_CMD_MEMEN;
2423 		error = "memory";
2424 		break;
2425 	default:
2426 		return (EINVAL);
2427 	}
2428 	pci_clear_command_bit(dev, child, bit);
2429 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2430 	if (command & bit) {
2431 		device_printf(child, "failed to disable %s mapping!\n", error);
2432 		return (ENXIO);
2433 	}
2434 	return (0);
2435 }
2436 
2437 /*
2438  * New style pci driver.  Parent device is either a pci-host-bridge or a
2439  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2440  */
2441 
2442 void
2443 pci_print_verbose(struct pci_devinfo *dinfo)
2444 {
2445 
2446 	if (bootverbose) {
2447 		pcicfgregs *cfg = &dinfo->cfg;
2448 
2449 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2450 		    cfg->vendor, cfg->device, cfg->revid);
2451 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2452 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2453 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2454 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2455 		    cfg->mfdev);
2456 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2457 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2458 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2459 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2460 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2461 		if (cfg->intpin > 0)
2462 			kprintf("\tintpin=%c, irq=%d\n",
2463 			    cfg->intpin +'a' -1, cfg->intline);
2464 		if (cfg->pp.pp_cap) {
2465 			uint16_t status;
2466 
2467 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2468 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2469 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2470 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2471 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2472 			    status & PCIM_PSTAT_DMASK);
2473 		}
2474 		if (cfg->msi.msi_location) {
2475 			int ctrl;
2476 
2477 			ctrl = cfg->msi.msi_ctrl;
2478 			kprintf("\tMSI supports %d message%s%s%s\n",
2479 			    cfg->msi.msi_msgnum,
2480 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2481 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2482 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2483 		}
2484 		if (cfg->msix.msix_location) {
2485 			kprintf("\tMSI-X supports %d message%s ",
2486 			    cfg->msix.msix_msgnum,
2487 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2488 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2489 				kprintf("in map 0x%x\n",
2490 				    cfg->msix.msix_table_bar);
2491 			else
2492 				kprintf("in maps 0x%x and 0x%x\n",
2493 				    cfg->msix.msix_table_bar,
2494 				    cfg->msix.msix_pba_bar);
2495 		}
2496 		pci_print_verbose_expr(cfg);
2497 	}
2498 }
2499 
2500 static void
2501 pci_print_verbose_expr(const pcicfgregs *cfg)
2502 {
2503 	const struct pcicfg_expr *expr = &cfg->expr;
2504 	const char *port_name;
2505 	uint16_t port_type;
2506 
2507 	if (!bootverbose)
2508 		return;
2509 
2510 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2511 		return;
2512 
2513 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2514 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2515 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2516 		goto back;
2517 
2518 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2519 
2520 	switch (port_type) {
2521 	case PCIE_END_POINT:
2522 		port_name = "DEVICE";
2523 		break;
2524 	case PCIE_LEG_END_POINT:
2525 		port_name = "LEGDEV";
2526 		break;
2527 	case PCIE_ROOT_PORT:
2528 		port_name = "ROOT";
2529 		break;
2530 	case PCIE_UP_STREAM_PORT:
2531 		port_name = "UPSTREAM";
2532 		break;
2533 	case PCIE_DOWN_STREAM_PORT:
2534 		port_name = "DOWNSTRM";
2535 		break;
2536 	case PCIE_PCIE2PCI_BRIDGE:
2537 		port_name = "PCIE2PCI";
2538 		break;
2539 	case PCIE_PCI2PCIE_BRIDGE:
2540 		port_name = "PCI2PCIE";
2541 		break;
2542 	default:
2543 		port_name = NULL;
2544 		break;
2545 	}
2546 	if ((port_type == PCIE_ROOT_PORT ||
2547 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2548 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2549 		port_name = NULL;
2550 	if (port_name != NULL)
2551 		kprintf("[%s]", port_name);
2552 
2553 	if (pcie_slotimpl(cfg)) {
2554 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2555 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2556 			kprintf("[HOTPLUG]");
2557 	}
2558 back:
2559 	kprintf("\n");
2560 }
2561 
2562 static int
2563 pci_porten(device_t pcib, int b, int s, int f)
2564 {
2565 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2566 		& PCIM_CMD_PORTEN) != 0;
2567 }
2568 
2569 static int
2570 pci_memen(device_t pcib, int b, int s, int f)
2571 {
2572 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2573 		& PCIM_CMD_MEMEN) != 0;
2574 }
2575 
2576 /*
2577  * Add a resource based on a pci map register. Return 1 if the map
2578  * register is a 32bit map register or 2 if it is a 64bit register.
2579  */
2580 static int
2581 pci_add_map(device_t pcib, device_t bus, device_t dev,
2582     int b, int s, int f, int reg, struct resource_list *rl, int force,
2583     int prefetch)
2584 {
2585 	uint32_t map;
2586 	pci_addr_t base;
2587 	pci_addr_t start, end, count;
2588 	uint8_t ln2size;
2589 	uint8_t ln2range;
2590 	uint32_t testval;
2591 	uint16_t cmd;
2592 	int type;
2593 	int barlen;
2594 	struct resource *res;
2595 
2596 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2597 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2598 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2599 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2600 
2601 	if (PCI_BAR_MEM(map)) {
2602 		type = SYS_RES_MEMORY;
2603 		if (map & PCIM_BAR_MEM_PREFETCH)
2604 			prefetch = 1;
2605 	} else
2606 		type = SYS_RES_IOPORT;
2607 	ln2size = pci_mapsize(testval);
2608 	ln2range = pci_maprange(testval);
2609 	base = pci_mapbase(map);
2610 	barlen = ln2range == 64 ? 2 : 1;
2611 
2612 	/*
2613 	 * For I/O registers, if bottom bit is set, and the next bit up
2614 	 * isn't clear, we know we have a BAR that doesn't conform to the
2615 	 * spec, so ignore it.  Also, sanity check the size of the data
2616 	 * areas to the type of memory involved.  Memory must be at least
2617 	 * 16 bytes in size, while I/O ranges must be at least 4.
2618 	 */
2619 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2620 		return (barlen);
2621 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2622 	    (type == SYS_RES_IOPORT && ln2size < 2))
2623 		return (barlen);
2624 
2625 	if (ln2range == 64)
2626 		/* Read the other half of a 64bit map register */
2627 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2628 	if (bootverbose) {
2629 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2630 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2631 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2632 			kprintf(", port disabled\n");
2633 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2634 			kprintf(", memory disabled\n");
2635 		else
2636 			kprintf(", enabled\n");
2637 	}
2638 
2639 	/*
2640 	 * If base is 0, then we have problems.  It is best to ignore
2641 	 * such entries for the moment.  These will be allocated later if
2642 	 * the driver specifically requests them.  However, some
2643 	 * removable busses look better when all resources are allocated,
2644 	 * so allow '0' to be overriden.
2645 	 *
2646 	 * Similarly treat maps whose values is the same as the test value
2647 	 * read back.  These maps have had all f's written to them by the
2648 	 * BIOS in an attempt to disable the resources.
2649 	 */
2650 	if (!force && (base == 0 || map == testval))
2651 		return (barlen);
2652 	if ((u_long)base != base) {
2653 		device_printf(bus,
2654 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2655 		    pci_get_domain(dev), b, s, f, reg);
2656 		return (barlen);
2657 	}
2658 
2659 	/*
2660 	 * This code theoretically does the right thing, but has
2661 	 * undesirable side effects in some cases where peripherals
2662 	 * respond oddly to having these bits enabled.  Let the user
2663 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2664 	 * default).
2665 	 */
2666 	if (pci_enable_io_modes) {
2667 		/* Turn on resources that have been left off by a lazy BIOS */
2668 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2669 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2670 			cmd |= PCIM_CMD_PORTEN;
2671 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2672 		}
2673 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2674 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2675 			cmd |= PCIM_CMD_MEMEN;
2676 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2677 		}
2678 	} else {
2679 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2680 			return (barlen);
2681 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2682 			return (barlen);
2683 	}
2684 
2685 	count = 1 << ln2size;
2686 	if (base == 0 || base == pci_mapbase(testval)) {
2687 		start = 0;	/* Let the parent decide. */
2688 		end = ~0ULL;
2689 	} else {
2690 		start = base;
2691 		end = base + (1 << ln2size) - 1;
2692 	}
2693 	resource_list_add(rl, type, reg, start, end, count);
2694 
2695 	/*
2696 	 * Try to allocate the resource for this BAR from our parent
2697 	 * so that this resource range is already reserved.  The
2698 	 * driver for this device will later inherit this resource in
2699 	 * pci_alloc_resource().
2700 	 */
2701 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2702 	    prefetch ? RF_PREFETCHABLE : 0);
2703 	if (res == NULL) {
2704 		/*
2705 		 * If the allocation fails, delete the resource list
2706 		 * entry to force pci_alloc_resource() to allocate
2707 		 * resources from the parent.
2708 		 */
2709 		resource_list_delete(rl, type, reg);
2710 #ifdef PCI_BAR_CLEAR
2711 		/* Clear the BAR */
2712 		start = 0;
2713 #else	/* !PCI_BAR_CLEAR */
2714 		/*
2715 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2716 		 * PCI function, clearing the BAR causes HPET timer
2717 		 * stop ticking.
2718 		 */
2719 		if (bootverbose) {
2720 			kprintf("pci:%d:%d:%d: resource reservation failed "
2721 				"%#jx - %#jx\n", b, s, f,
2722 				(intmax_t)start, (intmax_t)end);
2723 		}
2724 		return (barlen);
2725 #endif	/* PCI_BAR_CLEAR */
2726 	} else {
2727 		start = rman_get_start(res);
2728 	}
2729 	pci_write_config(dev, reg, start, 4);
2730 	if (ln2range == 64)
2731 		pci_write_config(dev, reg + 4, start >> 32, 4);
2732 	return (barlen);
2733 }
2734 
2735 /*
2736  * For ATA devices we need to decide early what addressing mode to use.
2737  * Legacy demands that the primary and secondary ATA ports sits on the
2738  * same addresses that old ISA hardware did. This dictates that we use
2739  * those addresses and ignore the BAR's if we cannot set PCI native
2740  * addressing mode.
2741  */
2742 static void
2743 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2744     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2745 {
2746 	int rid, type, progif;
2747 #if 0
2748 	/* if this device supports PCI native addressing use it */
2749 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2750 	if ((progif & 0x8a) == 0x8a) {
2751 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2752 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2753 			kprintf("Trying ATA native PCI addressing mode\n");
2754 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2755 		}
2756 	}
2757 #endif
2758 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2759 	type = SYS_RES_IOPORT;
2760 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2761 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2762 		    prefetchmask & (1 << 0));
2763 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2764 		    prefetchmask & (1 << 1));
2765 	} else {
2766 		rid = PCIR_BAR(0);
2767 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8);
2768 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2769 		    0);
2770 		rid = PCIR_BAR(1);
2771 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1);
2772 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2773 		    0);
2774 	}
2775 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2776 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2777 		    prefetchmask & (1 << 2));
2778 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2779 		    prefetchmask & (1 << 3));
2780 	} else {
2781 		rid = PCIR_BAR(2);
2782 		resource_list_add(rl, type, rid, 0x170, 0x177, 8);
2783 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2784 		    0);
2785 		rid = PCIR_BAR(3);
2786 		resource_list_add(rl, type, rid, 0x376, 0x376, 1);
2787 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2788 		    0);
2789 	}
2790 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2791 	    prefetchmask & (1 << 4));
2792 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2793 	    prefetchmask & (1 << 5));
2794 }
2795 
2796 static void
2797 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2798 {
2799 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2800 	pcicfgregs *cfg = &dinfo->cfg;
2801 	char tunable_name[64];
2802 	int irq;
2803 
2804 	/* Has to have an intpin to have an interrupt. */
2805 	if (cfg->intpin == 0)
2806 		return;
2807 
2808 	/* Let the user override the IRQ with a tunable. */
2809 	irq = PCI_INVALID_IRQ;
2810 	ksnprintf(tunable_name, sizeof(tunable_name),
2811 	    "hw.pci%d.%d.%d.INT%c.irq",
2812 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2813 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2814 		irq = PCI_INVALID_IRQ;
2815 
2816 	/*
2817 	 * If we didn't get an IRQ via the tunable, then we either use the
2818 	 * IRQ value in the intline register or we ask the bus to route an
2819 	 * interrupt for us.  If force_route is true, then we only use the
2820 	 * value in the intline register if the bus was unable to assign an
2821 	 * IRQ.
2822 	 */
2823 	if (!PCI_INTERRUPT_VALID(irq)) {
2824 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2825 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2826 		if (!PCI_INTERRUPT_VALID(irq))
2827 			irq = cfg->intline;
2828 	}
2829 
2830 	/* If after all that we don't have an IRQ, just bail. */
2831 	if (!PCI_INTERRUPT_VALID(irq))
2832 		return;
2833 
2834 	/* Update the config register if it changed. */
2835 	if (irq != cfg->intline) {
2836 		cfg->intline = irq;
2837 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2838 	}
2839 
2840 	/* Add this IRQ as rid 0 interrupt resource. */
2841 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1);
2842 }
2843 
2844 void
2845 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2846 {
2847 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2848 	pcicfgregs *cfg = &dinfo->cfg;
2849 	struct resource_list *rl = &dinfo->resources;
2850 	struct pci_quirk *q;
2851 	int b, i, f, s;
2852 
2853 	b = cfg->bus;
2854 	s = cfg->slot;
2855 	f = cfg->func;
2856 
2857 	/* ATA devices needs special map treatment */
2858 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2859 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2860 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2861 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2862 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2863 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2864 	else
2865 		for (i = 0; i < cfg->nummaps;)
2866 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2867 			    rl, force, prefetchmask & (1 << i));
2868 
2869 	/*
2870 	 * Add additional, quirked resources.
2871 	 */
2872 	for (q = &pci_quirks[0]; q->devid; q++) {
2873 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2874 		    && q->type == PCI_QUIRK_MAP_REG)
2875 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2876 			  force, 0);
2877 	}
2878 
2879 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2880 		/*
2881 		 * Try to re-route interrupts. Sometimes the BIOS or
2882 		 * firmware may leave bogus values in these registers.
2883 		 * If the re-route fails, then just stick with what we
2884 		 * have.
2885 		 */
2886 		pci_assign_interrupt(bus, dev, 1);
2887 	}
2888 }
2889 
2890 void
2891 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2892 {
2893 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2894 	device_t pcib = device_get_parent(dev);
2895 	struct pci_devinfo *dinfo;
2896 	int maxslots;
2897 	int s, f, pcifunchigh;
2898 	uint8_t hdrtype;
2899 
2900 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2901 	    ("dinfo_size too small"));
2902 	maxslots = PCIB_MAXSLOTS(pcib);
2903 	for (s = 0; s <= maxslots; s++) {
2904 		pcifunchigh = 0;
2905 		f = 0;
2906 		DELAY(1);
2907 		hdrtype = REG(PCIR_HDRTYPE, 1);
2908 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2909 			continue;
2910 		if (hdrtype & PCIM_MFDEV)
2911 			pcifunchigh = PCI_FUNCMAX;
2912 		for (f = 0; f <= pcifunchigh; f++) {
2913 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2914 			    dinfo_size);
2915 			if (dinfo != NULL) {
2916 				pci_add_child(dev, dinfo);
2917 			}
2918 		}
2919 	}
2920 #undef REG
2921 }
2922 
2923 void
2924 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2925 {
2926 	device_t pcib;
2927 
2928 	pcib = device_get_parent(bus);
2929 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2930 	device_set_ivars(dinfo->cfg.dev, dinfo);
2931 	resource_list_init(&dinfo->resources);
2932 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2933 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2934 	pci_print_verbose(dinfo);
2935 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2936 }
2937 
2938 static int
2939 pci_probe(device_t dev)
2940 {
2941 	device_set_desc(dev, "PCI bus");
2942 
2943 	/* Allow other subclasses to override this driver. */
2944 	return (-1000);
2945 }
2946 
2947 static int
2948 pci_attach(device_t dev)
2949 {
2950 	int busno, domain;
2951 
2952 	/*
2953 	 * Since there can be multiple independantly numbered PCI
2954 	 * busses on systems with multiple PCI domains, we can't use
2955 	 * the unit number to decide which bus we are probing. We ask
2956 	 * the parent pcib what our domain and bus numbers are.
2957 	 */
2958 	domain = pcib_get_domain(dev);
2959 	busno = pcib_get_bus(dev);
2960 	if (bootverbose)
2961 		device_printf(dev, "domain=%d, physical bus=%d\n",
2962 		    domain, busno);
2963 
2964 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2965 
2966 	return (bus_generic_attach(dev));
2967 }
2968 
2969 int
2970 pci_suspend(device_t dev)
2971 {
2972 	int dstate, error, i, numdevs;
2973 	device_t acpi_dev, child, *devlist;
2974 	struct pci_devinfo *dinfo;
2975 
2976 	/*
2977 	 * Save the PCI configuration space for each child and set the
2978 	 * device in the appropriate power state for this sleep state.
2979 	 */
2980 	acpi_dev = NULL;
2981 	if (pci_do_power_resume)
2982 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2983 	device_get_children(dev, &devlist, &numdevs);
2984 	for (i = 0; i < numdevs; i++) {
2985 		child = devlist[i];
2986 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2987 		pci_cfg_save(child, dinfo, 0);
2988 	}
2989 
2990 	/* Suspend devices before potentially powering them down. */
2991 	error = bus_generic_suspend(dev);
2992 	if (error) {
2993 		kfree(devlist, M_TEMP);
2994 		return (error);
2995 	}
2996 
2997 	/*
2998 	 * Always set the device to D3.  If ACPI suggests a different
2999 	 * power state, use it instead.  If ACPI is not present, the
3000 	 * firmware is responsible for managing device power.  Skip
3001 	 * children who aren't attached since they are powered down
3002 	 * separately.  Only manage type 0 devices for now.
3003 	 */
3004 	for (i = 0; acpi_dev && i < numdevs; i++) {
3005 		child = devlist[i];
3006 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3007 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3008 			dstate = PCI_POWERSTATE_D3;
3009 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3010 			pci_set_powerstate(child, dstate);
3011 		}
3012 	}
3013 	kfree(devlist, M_TEMP);
3014 	return (0);
3015 }
3016 
3017 int
3018 pci_resume(device_t dev)
3019 {
3020 	int i, numdevs;
3021 	device_t acpi_dev, child, *devlist;
3022 	struct pci_devinfo *dinfo;
3023 
3024 	/*
3025 	 * Set each child to D0 and restore its PCI configuration space.
3026 	 */
3027 	acpi_dev = NULL;
3028 	if (pci_do_power_resume)
3029 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3030 	device_get_children(dev, &devlist, &numdevs);
3031 	for (i = 0; i < numdevs; i++) {
3032 		/*
3033 		 * Notify ACPI we're going to D0 but ignore the result.  If
3034 		 * ACPI is not present, the firmware is responsible for
3035 		 * managing device power.  Only manage type 0 devices for now.
3036 		 */
3037 		child = devlist[i];
3038 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3039 		if (acpi_dev && device_is_attached(child) &&
3040 		    dinfo->cfg.hdrtype == 0) {
3041 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3042 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3043 		}
3044 
3045 		/* Now the device is powered up, restore its config space. */
3046 		pci_cfg_restore(child, dinfo);
3047 	}
3048 	kfree(devlist, M_TEMP);
3049 	return (bus_generic_resume(dev));
3050 }
3051 
3052 static void
3053 pci_load_vendor_data(void)
3054 {
3055 	caddr_t vendordata, info;
3056 
3057 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3058 		info = preload_search_info(vendordata, MODINFO_ADDR);
3059 		pci_vendordata = *(char **)info;
3060 		info = preload_search_info(vendordata, MODINFO_SIZE);
3061 		pci_vendordata_size = *(size_t *)info;
3062 		/* terminate the database */
3063 		pci_vendordata[pci_vendordata_size] = '\n';
3064 	}
3065 }
3066 
3067 void
3068 pci_driver_added(device_t dev, driver_t *driver)
3069 {
3070 	int numdevs;
3071 	device_t *devlist;
3072 	device_t child;
3073 	struct pci_devinfo *dinfo;
3074 	int i;
3075 
3076 	if (bootverbose)
3077 		device_printf(dev, "driver added\n");
3078 	DEVICE_IDENTIFY(driver, dev);
3079 	device_get_children(dev, &devlist, &numdevs);
3080 	for (i = 0; i < numdevs; i++) {
3081 		child = devlist[i];
3082 		if (device_get_state(child) != DS_NOTPRESENT)
3083 			continue;
3084 		dinfo = device_get_ivars(child);
3085 		pci_print_verbose(dinfo);
3086 		if (bootverbose)
3087 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3088 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3089 			    dinfo->cfg.func);
3090 		pci_cfg_restore(child, dinfo);
3091 		if (device_probe_and_attach(child) != 0)
3092 			pci_cfg_save(child, dinfo, 1);
3093 	}
3094 	kfree(devlist, M_TEMP);
3095 }
3096 
3097 static void
3098 pci_child_detached(device_t parent __unused, device_t child)
3099 {
3100 	/* Turn child's power off */
3101 	pci_cfg_save(child, device_get_ivars(child), 1);
3102 }
3103 
3104 int
3105 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3106     driver_intr_t *intr, void *arg, void **cookiep, lwkt_serialize_t serializer)
3107 {
3108 #ifdef MSI
3109 	struct pci_devinfo *dinfo;
3110 	struct msix_table_entry *mte;
3111 	struct msix_vector *mv;
3112 	uint64_t addr;
3113 	uint32_t data;
3114 	int rid;
3115 #endif
3116 	int error;
3117 	void *cookie;
3118 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3119 	    arg, &cookie, serializer);
3120 	if (error)
3121 		return (error);
3122 
3123 	/* If this is not a direct child, just bail out. */
3124 	if (device_get_parent(child) != dev) {
3125 		*cookiep = cookie;
3126 		return(0);
3127 	}
3128 
3129 	pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3130 #ifdef MSI
3131 	rid = rman_get_rid(irq);
3132 	if (rid == 0) {
3133 		/* Make sure that INTx is enabled */
3134 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3135 	} else {
3136 		/*
3137 		 * Check to see if the interrupt is MSI or MSI-X.
3138 		 * Ask our parent to map the MSI and give
3139 		 * us the address and data register values.
3140 		 * If we fail for some reason, teardown the
3141 		 * interrupt handler.
3142 		 */
3143 		dinfo = device_get_ivars(child);
3144 		if (dinfo->cfg.msi.msi_alloc > 0) {
3145 			if (dinfo->cfg.msi.msi_addr == 0) {
3146 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3147 			    ("MSI has handlers, but vectors not mapped"));
3148 				error = PCIB_MAP_MSI(device_get_parent(dev),
3149 				    child, rman_get_start(irq), &addr, &data);
3150 				if (error)
3151 					goto bad;
3152 				dinfo->cfg.msi.msi_addr = addr;
3153 				dinfo->cfg.msi.msi_data = data;
3154 				pci_enable_msi(child, addr, data);
3155 			}
3156 			dinfo->cfg.msi.msi_handlers++;
3157 		} else {
3158 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3159 			    ("No MSI or MSI-X interrupts allocated"));
3160 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3161 			    ("MSI-X index too high"));
3162 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3163 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3164 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3165 			KASSERT(mv->mv_irq == rman_get_start(irq),
3166 			    ("IRQ mismatch"));
3167 			if (mv->mv_address == 0) {
3168 				KASSERT(mte->mte_handlers == 0,
3169 		    ("MSI-X table entry has handlers, but vector not mapped"));
3170 				error = PCIB_MAP_MSI(device_get_parent(dev),
3171 				    child, rman_get_start(irq), &addr, &data);
3172 				if (error)
3173 					goto bad;
3174 				mv->mv_address = addr;
3175 				mv->mv_data = data;
3176 			}
3177 			if (mte->mte_handlers == 0) {
3178 				pci_enable_msix(child, rid - 1, mv->mv_address,
3179 				    mv->mv_data);
3180 				pci_unmask_msix(child, rid - 1);
3181 			}
3182 			mte->mte_handlers++;
3183 		}
3184 
3185 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3186 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3187 	bad:
3188 		if (error) {
3189 			(void)bus_generic_teardown_intr(dev, child, irq,
3190 			    cookie);
3191 			return (error);
3192 		}
3193 	}
3194 #endif
3195 	*cookiep = cookie;
3196 	return (0);
3197 }
3198 
3199 int
3200 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3201     void *cookie)
3202 {
3203 #ifdef MSI
3204 	struct msix_table_entry *mte;
3205 	struct resource_list_entry *rle;
3206 	struct pci_devinfo *dinfo;
3207 	int rid;
3208 #endif
3209 	int error;
3210 
3211 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3212 		return (EINVAL);
3213 
3214 	/* If this isn't a direct child, just bail out */
3215 	if (device_get_parent(child) != dev)
3216 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3217 
3218 	pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3219 #ifdef MSI
3220 	rid = rman_get_rid(irq);
3221 	if (rid == 0) {
3222 		/* Mask INTx */
3223 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3224 	} else {
3225 		/*
3226 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3227 		 * decrement the appropriate handlers count and mask the
3228 		 * MSI-X message, or disable MSI messages if the count
3229 		 * drops to 0.
3230 		 */
3231 		dinfo = device_get_ivars(child);
3232 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3233 		if (rle->res != irq)
3234 			return (EINVAL);
3235 		if (dinfo->cfg.msi.msi_alloc > 0) {
3236 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3237 			    ("MSI-X index too high"));
3238 			if (dinfo->cfg.msi.msi_handlers == 0)
3239 				return (EINVAL);
3240 			dinfo->cfg.msi.msi_handlers--;
3241 			if (dinfo->cfg.msi.msi_handlers == 0)
3242 				pci_disable_msi(child);
3243 		} else {
3244 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3245 			    ("No MSI or MSI-X interrupts allocated"));
3246 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3247 			    ("MSI-X index too high"));
3248 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3249 			if (mte->mte_handlers == 0)
3250 				return (EINVAL);
3251 			mte->mte_handlers--;
3252 			if (mte->mte_handlers == 0)
3253 				pci_mask_msix(child, rid - 1);
3254 		}
3255 	}
3256 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3257 	if (rid > 0)
3258 		KASSERT(error == 0,
3259 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3260 #endif
3261 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3262 	return (error);
3263 }
3264 
3265 int
3266 pci_print_child(device_t dev, device_t child)
3267 {
3268 	struct pci_devinfo *dinfo;
3269 	struct resource_list *rl;
3270 	int retval = 0;
3271 
3272 	dinfo = device_get_ivars(child);
3273 	rl = &dinfo->resources;
3274 
3275 	retval += bus_print_child_header(dev, child);
3276 
3277 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3278 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3279 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3280 	if (device_get_flags(dev))
3281 		retval += kprintf(" flags %#x", device_get_flags(dev));
3282 
3283 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3284 	    pci_get_function(child));
3285 
3286 	retval += bus_print_child_footer(dev, child);
3287 
3288 	return (retval);
3289 }
3290 
3291 static struct
3292 {
3293 	int	class;
3294 	int	subclass;
3295 	char	*desc;
3296 } pci_nomatch_tab[] = {
3297 	{PCIC_OLD,		-1,			"old"},
3298 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3299 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3300 	{PCIC_STORAGE,		-1,			"mass storage"},
3301 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3302 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3303 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3304 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3305 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3306 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3307 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3308 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3309 	{PCIC_NETWORK,		-1,			"network"},
3310 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3311 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3312 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3313 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3314 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3315 	{PCIC_DISPLAY,		-1,			"display"},
3316 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3317 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3318 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3319 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3320 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3321 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3322 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3323 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3324 	{PCIC_MEMORY,		-1,			"memory"},
3325 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3326 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3327 	{PCIC_BRIDGE,		-1,			"bridge"},
3328 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3329 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3330 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3331 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3332 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3333 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3334 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3335 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3336 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3337 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3338 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3339 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3340 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3341 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3342 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3343 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3344 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3345 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3346 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3347 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3348 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3349 	{PCIC_INPUTDEV,		-1,			"input device"},
3350 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3351 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3352 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3353 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3354 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3355 	{PCIC_DOCKING,		-1,			"docking station"},
3356 	{PCIC_PROCESSOR,	-1,			"processor"},
3357 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3358 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3359 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3360 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3361 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3362 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3363 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3364 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3365 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3366 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3367 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3368 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3369 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3370 	{PCIC_SATCOM,		-1,			"satellite communication"},
3371 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3372 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3373 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3374 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3375 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3376 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3377 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3378 	{PCIC_DASP,		-1,			"dasp"},
3379 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3380 	{0, 0,		NULL}
3381 };
3382 
3383 void
3384 pci_probe_nomatch(device_t dev, device_t child)
3385 {
3386 	int	i;
3387 	char	*cp, *scp, *device;
3388 
3389 	/*
3390 	 * Look for a listing for this device in a loaded device database.
3391 	 */
3392 	if ((device = pci_describe_device(child)) != NULL) {
3393 		device_printf(dev, "<%s>", device);
3394 		kfree(device, M_DEVBUF);
3395 	} else {
3396 		/*
3397 		 * Scan the class/subclass descriptions for a general
3398 		 * description.
3399 		 */
3400 		cp = "unknown";
3401 		scp = NULL;
3402 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3403 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3404 				if (pci_nomatch_tab[i].subclass == -1) {
3405 					cp = pci_nomatch_tab[i].desc;
3406 				} else if (pci_nomatch_tab[i].subclass ==
3407 				    pci_get_subclass(child)) {
3408 					scp = pci_nomatch_tab[i].desc;
3409 				}
3410 			}
3411 		}
3412 		device_printf(dev, "<%s%s%s>",
3413 		    cp ? cp : "",
3414 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3415 		    scp ? scp : "");
3416 	}
3417 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3418 		pci_get_vendor(child), pci_get_device(child),
3419 		pci_get_slot(child), pci_get_function(child));
3420 	if (pci_get_intpin(child) > 0) {
3421 		int irq;
3422 
3423 		irq = pci_get_irq(child);
3424 		if (PCI_INTERRUPT_VALID(irq))
3425 			kprintf(" irq %d", irq);
3426 	}
3427 	kprintf("\n");
3428 
3429 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3430 }
3431 
3432 /*
3433  * Parse the PCI device database, if loaded, and return a pointer to a
3434  * description of the device.
3435  *
3436  * The database is flat text formatted as follows:
3437  *
3438  * Any line not in a valid format is ignored.
3439  * Lines are terminated with newline '\n' characters.
3440  *
3441  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3442  * the vendor name.
3443  *
3444  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3445  * - devices cannot be listed without a corresponding VENDOR line.
3446  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3447  * another TAB, then the device name.
3448  */
3449 
3450 /*
3451  * Assuming (ptr) points to the beginning of a line in the database,
3452  * return the vendor or device and description of the next entry.
3453  * The value of (vendor) or (device) inappropriate for the entry type
3454  * is set to -1.  Returns nonzero at the end of the database.
3455  *
3456  * Note that this is slightly unrobust in the face of corrupt data;
3457  * we attempt to safeguard against this by spamming the end of the
3458  * database with a newline when we initialise.
3459  */
3460 static int
3461 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3462 {
3463 	char	*cp = *ptr;
3464 	int	left;
3465 
3466 	*device = -1;
3467 	*vendor = -1;
3468 	**desc = '\0';
3469 	for (;;) {
3470 		left = pci_vendordata_size - (cp - pci_vendordata);
3471 		if (left <= 0) {
3472 			*ptr = cp;
3473 			return(1);
3474 		}
3475 
3476 		/* vendor entry? */
3477 		if (*cp != '\t' &&
3478 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3479 			break;
3480 		/* device entry? */
3481 		if (*cp == '\t' &&
3482 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3483 			break;
3484 
3485 		/* skip to next line */
3486 		while (*cp != '\n' && left > 0) {
3487 			cp++;
3488 			left--;
3489 		}
3490 		if (*cp == '\n') {
3491 			cp++;
3492 			left--;
3493 		}
3494 	}
3495 	/* skip to next line */
3496 	while (*cp != '\n' && left > 0) {
3497 		cp++;
3498 		left--;
3499 	}
3500 	if (*cp == '\n' && left > 0)
3501 		cp++;
3502 	*ptr = cp;
3503 	return(0);
3504 }
3505 
3506 static char *
3507 pci_describe_device(device_t dev)
3508 {
3509 	int	vendor, device;
3510 	char	*desc, *vp, *dp, *line;
3511 
3512 	desc = vp = dp = NULL;
3513 
3514 	/*
3515 	 * If we have no vendor data, we can't do anything.
3516 	 */
3517 	if (pci_vendordata == NULL)
3518 		goto out;
3519 
3520 	/*
3521 	 * Scan the vendor data looking for this device
3522 	 */
3523 	line = pci_vendordata;
3524 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3525 		goto out;
3526 	for (;;) {
3527 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3528 			goto out;
3529 		if (vendor == pci_get_vendor(dev))
3530 			break;
3531 	}
3532 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3533 		goto out;
3534 	for (;;) {
3535 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3536 			*dp = 0;
3537 			break;
3538 		}
3539 		if (vendor != -1) {
3540 			*dp = 0;
3541 			break;
3542 		}
3543 		if (device == pci_get_device(dev))
3544 			break;
3545 	}
3546 	if (dp[0] == '\0')
3547 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3548 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3549 	    NULL)
3550 		ksprintf(desc, "%s, %s", vp, dp);
3551  out:
3552 	if (vp != NULL)
3553 		kfree(vp, M_DEVBUF);
3554 	if (dp != NULL)
3555 		kfree(dp, M_DEVBUF);
3556 	return(desc);
3557 }
3558 
3559 int
3560 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3561 {
3562 	struct pci_devinfo *dinfo;
3563 	pcicfgregs *cfg;
3564 
3565 	dinfo = device_get_ivars(child);
3566 	cfg = &dinfo->cfg;
3567 
3568 	switch (which) {
3569 	case PCI_IVAR_ETHADDR:
3570 		/*
3571 		 * The generic accessor doesn't deal with failure, so
3572 		 * we set the return value, then return an error.
3573 		 */
3574 		*((uint8_t **) result) = NULL;
3575 		return (EINVAL);
3576 	case PCI_IVAR_SUBVENDOR:
3577 		*result = cfg->subvendor;
3578 		break;
3579 	case PCI_IVAR_SUBDEVICE:
3580 		*result = cfg->subdevice;
3581 		break;
3582 	case PCI_IVAR_VENDOR:
3583 		*result = cfg->vendor;
3584 		break;
3585 	case PCI_IVAR_DEVICE:
3586 		*result = cfg->device;
3587 		break;
3588 	case PCI_IVAR_DEVID:
3589 		*result = (cfg->device << 16) | cfg->vendor;
3590 		break;
3591 	case PCI_IVAR_CLASS:
3592 		*result = cfg->baseclass;
3593 		break;
3594 	case PCI_IVAR_SUBCLASS:
3595 		*result = cfg->subclass;
3596 		break;
3597 	case PCI_IVAR_PROGIF:
3598 		*result = cfg->progif;
3599 		break;
3600 	case PCI_IVAR_REVID:
3601 		*result = cfg->revid;
3602 		break;
3603 	case PCI_IVAR_INTPIN:
3604 		*result = cfg->intpin;
3605 		break;
3606 	case PCI_IVAR_IRQ:
3607 		*result = cfg->intline;
3608 		break;
3609 	case PCI_IVAR_DOMAIN:
3610 		*result = cfg->domain;
3611 		break;
3612 	case PCI_IVAR_BUS:
3613 		*result = cfg->bus;
3614 		break;
3615 	case PCI_IVAR_SLOT:
3616 		*result = cfg->slot;
3617 		break;
3618 	case PCI_IVAR_FUNCTION:
3619 		*result = cfg->func;
3620 		break;
3621 	case PCI_IVAR_CMDREG:
3622 		*result = cfg->cmdreg;
3623 		break;
3624 	case PCI_IVAR_CACHELNSZ:
3625 		*result = cfg->cachelnsz;
3626 		break;
3627 	case PCI_IVAR_MINGNT:
3628 		*result = cfg->mingnt;
3629 		break;
3630 	case PCI_IVAR_MAXLAT:
3631 		*result = cfg->maxlat;
3632 		break;
3633 	case PCI_IVAR_LATTIMER:
3634 		*result = cfg->lattimer;
3635 		break;
3636 	case PCI_IVAR_PCIXCAP_PTR:
3637 		*result = cfg->pcix.pcix_ptr;
3638 		break;
3639 	case PCI_IVAR_PCIECAP_PTR:
3640 		*result = cfg->expr.expr_ptr;
3641 		break;
3642 	case PCI_IVAR_VPDCAP_PTR:
3643 		*result = cfg->vpd.vpd_reg;
3644 		break;
3645 	default:
3646 		return (ENOENT);
3647 	}
3648 	return (0);
3649 }
3650 
3651 int
3652 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3653 {
3654 	struct pci_devinfo *dinfo;
3655 
3656 	dinfo = device_get_ivars(child);
3657 
3658 	switch (which) {
3659 	case PCI_IVAR_INTPIN:
3660 		dinfo->cfg.intpin = value;
3661 		return (0);
3662 	case PCI_IVAR_ETHADDR:
3663 	case PCI_IVAR_SUBVENDOR:
3664 	case PCI_IVAR_SUBDEVICE:
3665 	case PCI_IVAR_VENDOR:
3666 	case PCI_IVAR_DEVICE:
3667 	case PCI_IVAR_DEVID:
3668 	case PCI_IVAR_CLASS:
3669 	case PCI_IVAR_SUBCLASS:
3670 	case PCI_IVAR_PROGIF:
3671 	case PCI_IVAR_REVID:
3672 	case PCI_IVAR_IRQ:
3673 	case PCI_IVAR_DOMAIN:
3674 	case PCI_IVAR_BUS:
3675 	case PCI_IVAR_SLOT:
3676 	case PCI_IVAR_FUNCTION:
3677 		return (EINVAL);	/* disallow for now */
3678 
3679 	default:
3680 		return (ENOENT);
3681 	}
3682 }
3683 #ifdef notyet
3684 #include "opt_ddb.h"
3685 #ifdef DDB
3686 #include <ddb/ddb.h>
3687 #include <sys/cons.h>
3688 
3689 /*
3690  * List resources based on pci map registers, used for within ddb
3691  */
3692 
3693 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3694 {
3695 	struct pci_devinfo *dinfo;
3696 	struct devlist *devlist_head;
3697 	struct pci_conf *p;
3698 	const char *name;
3699 	int i, error, none_count;
3700 
3701 	none_count = 0;
3702 	/* get the head of the device queue */
3703 	devlist_head = &pci_devq;
3704 
3705 	/*
3706 	 * Go through the list of devices and print out devices
3707 	 */
3708 	for (error = 0, i = 0,
3709 	     dinfo = STAILQ_FIRST(devlist_head);
3710 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3711 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3712 
3713 		/* Populate pd_name and pd_unit */
3714 		name = NULL;
3715 		if (dinfo->cfg.dev)
3716 			name = device_get_name(dinfo->cfg.dev);
3717 
3718 		p = &dinfo->conf;
3719 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3720 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3721 			(name && *name) ? name : "none",
3722 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3723 			none_count++,
3724 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3725 			p->pc_sel.pc_func, (p->pc_class << 16) |
3726 			(p->pc_subclass << 8) | p->pc_progif,
3727 			(p->pc_subdevice << 16) | p->pc_subvendor,
3728 			(p->pc_device << 16) | p->pc_vendor,
3729 			p->pc_revid, p->pc_hdr);
3730 	}
3731 }
3732 #endif /* DDB */
3733 #endif
3734 
3735 static struct resource *
3736 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3737     u_long start, u_long end, u_long count, u_int flags)
3738 {
3739 	struct pci_devinfo *dinfo = device_get_ivars(child);
3740 	struct resource_list *rl = &dinfo->resources;
3741 	struct resource_list_entry *rle;
3742 	struct resource *res;
3743 	pci_addr_t map, testval;
3744 	int mapsize;
3745 
3746 	/*
3747 	 * Weed out the bogons, and figure out how large the BAR/map
3748 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3749 	 * Note: atapci in legacy mode are special and handled elsewhere
3750 	 * in the code.  If you have a atapci device in legacy mode and
3751 	 * it fails here, that other code is broken.
3752 	 */
3753 	res = NULL;
3754 	map = pci_read_config(child, *rid, 4);
3755 	pci_write_config(child, *rid, 0xffffffff, 4);
3756 	testval = pci_read_config(child, *rid, 4);
3757 	if (pci_maprange(testval) == 64)
3758 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3759 	if (pci_mapbase(testval) == 0)
3760 		goto out;
3761 
3762 	/*
3763 	 * Restore the original value of the BAR.  We may have reprogrammed
3764 	 * the BAR of the low-level console device and when booting verbose,
3765 	 * we need the console device addressable.
3766 	 */
3767 	pci_write_config(child, *rid, map, 4);
3768 
3769 	if (PCI_BAR_MEM(testval)) {
3770 		if (type != SYS_RES_MEMORY) {
3771 			if (bootverbose)
3772 				device_printf(dev,
3773 				    "child %s requested type %d for rid %#x,"
3774 				    " but the BAR says it is an memio\n",
3775 				    device_get_nameunit(child), type, *rid);
3776 			goto out;
3777 		}
3778 	} else {
3779 		if (type != SYS_RES_IOPORT) {
3780 			if (bootverbose)
3781 				device_printf(dev,
3782 				    "child %s requested type %d for rid %#x,"
3783 				    " but the BAR says it is an ioport\n",
3784 				    device_get_nameunit(child), type, *rid);
3785 			goto out;
3786 		}
3787 	}
3788 	/*
3789 	 * For real BARs, we need to override the size that
3790 	 * the driver requests, because that's what the BAR
3791 	 * actually uses and we would otherwise have a
3792 	 * situation where we might allocate the excess to
3793 	 * another driver, which won't work.
3794 	 */
3795 	mapsize = pci_mapsize(testval);
3796 	count = 1UL << mapsize;
3797 	if (RF_ALIGNMENT(flags) < mapsize)
3798 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3799 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3800 		flags |= RF_PREFETCHABLE;
3801 
3802 	/*
3803 	 * Allocate enough resource, and then write back the
3804 	 * appropriate bar for that resource.
3805 	 */
3806 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3807 	    start, end, count, flags);
3808 	if (res == NULL) {
3809 		device_printf(child,
3810 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3811 		    count, *rid, type, start, end);
3812 		goto out;
3813 	}
3814 	resource_list_add(rl, type, *rid, start, end, count);
3815 	rle = resource_list_find(rl, type, *rid);
3816 	if (rle == NULL)
3817 		panic("pci_alloc_map: unexpectedly can't find resource.");
3818 	rle->res = res;
3819 	rle->start = rman_get_start(res);
3820 	rle->end = rman_get_end(res);
3821 	rle->count = count;
3822 	if (bootverbose)
3823 		device_printf(child,
3824 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3825 		    count, *rid, type, rman_get_start(res));
3826 	map = rman_get_start(res);
3827 out:;
3828 	pci_write_config(child, *rid, map, 4);
3829 	if (pci_maprange(testval) == 64)
3830 		pci_write_config(child, *rid + 4, map >> 32, 4);
3831 	return (res);
3832 }
3833 
3834 
3835 struct resource *
3836 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3837 		   u_long start, u_long end, u_long count, u_int flags)
3838 {
3839 	struct pci_devinfo *dinfo = device_get_ivars(child);
3840 	struct resource_list *rl = &dinfo->resources;
3841 	struct resource_list_entry *rle;
3842 	pcicfgregs *cfg = &dinfo->cfg;
3843 
3844 	/*
3845 	 * Perform lazy resource allocation
3846 	 */
3847 	if (device_get_parent(child) == dev) {
3848 		switch (type) {
3849 		case SYS_RES_IRQ:
3850 			/*
3851 			 * Can't alloc legacy interrupt once MSI messages
3852 			 * have been allocated.
3853 			 */
3854 #ifdef MSI
3855 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3856 			    cfg->msix.msix_alloc > 0))
3857 				return (NULL);
3858 #endif
3859 			/*
3860 			 * If the child device doesn't have an
3861 			 * interrupt routed and is deserving of an
3862 			 * interrupt, try to assign it one.
3863 			 */
3864 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3865 			    (cfg->intpin != 0))
3866 				pci_assign_interrupt(dev, child, 0);
3867 			break;
3868 		case SYS_RES_IOPORT:
3869 		case SYS_RES_MEMORY:
3870 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3871 				/*
3872 				 * Enable the I/O mode.  We should
3873 				 * also be assigning resources too
3874 				 * when none are present.  The
3875 				 * resource_list_alloc kind of sorta does
3876 				 * this...
3877 				 */
3878 				if (PCI_ENABLE_IO(dev, child, type))
3879 					return (NULL);
3880 			}
3881 			rle = resource_list_find(rl, type, *rid);
3882 			if (rle == NULL)
3883 				return (pci_alloc_map(dev, child, type, rid,
3884 				    start, end, count, flags));
3885 			break;
3886 		}
3887 		/*
3888 		 * If we've already allocated the resource, then
3889 		 * return it now.  But first we may need to activate
3890 		 * it, since we don't allocate the resource as active
3891 		 * above.  Normally this would be done down in the
3892 		 * nexus, but since we short-circuit that path we have
3893 		 * to do its job here.  Not sure if we should kfree the
3894 		 * resource if it fails to activate.
3895 		 */
3896 		rle = resource_list_find(rl, type, *rid);
3897 		if (rle != NULL && rle->res != NULL) {
3898 			if (bootverbose)
3899 				device_printf(child,
3900 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3901 				    rman_get_size(rle->res), *rid, type,
3902 				    rman_get_start(rle->res));
3903 			if ((flags & RF_ACTIVE) &&
3904 			    bus_generic_activate_resource(dev, child, type,
3905 			    *rid, rle->res) != 0)
3906 				return (NULL);
3907 			return (rle->res);
3908 		}
3909 	}
3910 	return (resource_list_alloc(rl, dev, child, type, rid,
3911 	    start, end, count, flags));
3912 }
3913 
3914 void
3915 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3916 {
3917 	struct pci_devinfo *dinfo;
3918 	struct resource_list *rl;
3919 	struct resource_list_entry *rle;
3920 
3921 	if (device_get_parent(child) != dev)
3922 		return;
3923 
3924 	dinfo = device_get_ivars(child);
3925 	rl = &dinfo->resources;
3926 	rle = resource_list_find(rl, type, rid);
3927 	if (rle) {
3928 		if (rle->res) {
3929 			if (rman_get_device(rle->res) != dev ||
3930 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3931 				device_printf(dev, "delete_resource: "
3932 				    "Resource still owned by child, oops. "
3933 				    "(type=%d, rid=%d, addr=%lx)\n",
3934 				    rle->type, rle->rid,
3935 				    rman_get_start(rle->res));
3936 				return;
3937 			}
3938 			bus_release_resource(dev, type, rid, rle->res);
3939 		}
3940 		resource_list_delete(rl, type, rid);
3941 	}
3942 	/*
3943 	 * Why do we turn off the PCI configuration BAR when we delete a
3944 	 * resource? -- imp
3945 	 */
3946 	pci_write_config(child, rid, 0, 4);
3947 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3948 }
3949 
3950 struct resource_list *
3951 pci_get_resource_list (device_t dev, device_t child)
3952 {
3953 	struct pci_devinfo *dinfo = device_get_ivars(child);
3954 
3955 	if (dinfo == NULL)
3956 		return (NULL);
3957 
3958 	return (&dinfo->resources);
3959 }
3960 
3961 uint32_t
3962 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3963 {
3964 	struct pci_devinfo *dinfo = device_get_ivars(child);
3965 	pcicfgregs *cfg = &dinfo->cfg;
3966 
3967 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3968 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3969 }
3970 
3971 void
3972 pci_write_config_method(device_t dev, device_t child, int reg,
3973     uint32_t val, int width)
3974 {
3975 	struct pci_devinfo *dinfo = device_get_ivars(child);
3976 	pcicfgregs *cfg = &dinfo->cfg;
3977 
3978 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3979 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3980 }
3981 
3982 int
3983 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3984     size_t buflen)
3985 {
3986 
3987 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3988 	    pci_get_function(child));
3989 	return (0);
3990 }
3991 
3992 int
3993 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3994     size_t buflen)
3995 {
3996 	struct pci_devinfo *dinfo;
3997 	pcicfgregs *cfg;
3998 
3999 	dinfo = device_get_ivars(child);
4000 	cfg = &dinfo->cfg;
4001 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4002 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4003 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4004 	    cfg->progif);
4005 	return (0);
4006 }
4007 
4008 int
4009 pci_assign_interrupt_method(device_t dev, device_t child)
4010 {
4011 	struct pci_devinfo *dinfo = device_get_ivars(child);
4012 	pcicfgregs *cfg = &dinfo->cfg;
4013 
4014 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4015 	    cfg->intpin));
4016 }
4017 
4018 static int
4019 pci_modevent(module_t mod, int what, void *arg)
4020 {
4021 	static struct cdev *pci_cdev;
4022 
4023 	switch (what) {
4024 	case MOD_LOAD:
4025 		STAILQ_INIT(&pci_devq);
4026 		pci_generation = 0;
4027 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4028 				    "pci");
4029 		pci_load_vendor_data();
4030 		break;
4031 
4032 	case MOD_UNLOAD:
4033 		destroy_dev(pci_cdev);
4034 		break;
4035 	}
4036 
4037 	return (0);
4038 }
4039 
4040 void
4041 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4042 {
4043 	int i;
4044 
4045 	/*
4046 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4047 	 * which we know need special treatment.  Type 2 devices are
4048 	 * cardbus bridges which also require special treatment.
4049 	 * Other types are unknown, and we err on the side of safety
4050 	 * by ignoring them.
4051 	 */
4052 	if (dinfo->cfg.hdrtype != 0)
4053 		return;
4054 
4055 	/*
4056 	 * Restore the device to full power mode.  We must do this
4057 	 * before we restore the registers because moving from D3 to
4058 	 * D0 will cause the chip's BARs and some other registers to
4059 	 * be reset to some unknown power on reset values.  Cut down
4060 	 * the noise on boot by doing nothing if we are already in
4061 	 * state D0.
4062 	 */
4063 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4064 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4065 	}
4066 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4067 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4068 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4069 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4070 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4071 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4072 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4073 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4074 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4075 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4076 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4077 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4078 
4079 	/* Restore MSI and MSI-X configurations if they are present. */
4080 	if (dinfo->cfg.msi.msi_location != 0)
4081 		pci_resume_msi(dev);
4082 	if (dinfo->cfg.msix.msix_location != 0)
4083 		pci_resume_msix(dev);
4084 }
4085 
4086 void
4087 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4088 {
4089 	int i;
4090 	uint32_t cls;
4091 	int ps;
4092 
4093 	/*
4094 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4095 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4096 	 * which also require special treatment.  Other types are unknown, and
4097 	 * we err on the side of safety by ignoring them.  Powering down
4098 	 * bridges should not be undertaken lightly.
4099 	 */
4100 	if (dinfo->cfg.hdrtype != 0)
4101 		return;
4102 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4103 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4104 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4105 
4106 	/*
4107 	 * Some drivers apparently write to these registers w/o updating our
4108 	 * cached copy.  No harm happens if we update the copy, so do so here
4109 	 * so we can restore them.  The COMMAND register is modified by the
4110 	 * bus w/o updating the cache.  This should represent the normally
4111 	 * writable portion of the 'defined' part of type 0 headers.  In
4112 	 * theory we also need to save/restore the PCI capability structures
4113 	 * we know about, but apart from power we don't know any that are
4114 	 * writable.
4115 	 */
4116 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4117 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4118 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4119 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4120 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4121 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4122 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4123 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4124 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4125 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4126 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4127 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4128 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4129 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4130 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4131 
4132 	/*
4133 	 * don't set the state for display devices, base peripherals and
4134 	 * memory devices since bad things happen when they are powered down.
4135 	 * We should (a) have drivers that can easily detach and (b) use
4136 	 * generic drivers for these devices so that some device actually
4137 	 * attaches.  We need to make sure that when we implement (a) we don't
4138 	 * power the device down on a reattach.
4139 	 */
4140 	cls = pci_get_class(dev);
4141 	if (!setstate)
4142 		return;
4143 	switch (pci_do_power_nodriver)
4144 	{
4145 		case 0:		/* NO powerdown at all */
4146 			return;
4147 		case 1:		/* Conservative about what to power down */
4148 			if (cls == PCIC_STORAGE)
4149 				return;
4150 			/*FALLTHROUGH*/
4151 		case 2:		/* Agressive about what to power down */
4152 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4153 			    cls == PCIC_BASEPERIPH)
4154 				return;
4155 			/*FALLTHROUGH*/
4156 		case 3:		/* Power down everything */
4157 			break;
4158 	}
4159 	/*
4160 	 * PCI spec says we can only go into D3 state from D0 state.
4161 	 * Transition from D[12] into D0 before going to D3 state.
4162 	 */
4163 	ps = pci_get_powerstate(dev);
4164 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4165 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4166 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4167 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4168 }
4169 
4170 #ifdef COMPAT_OLDPCI
4171 
4172 /*
4173  * Locate the parent of a PCI device by scanning the PCI devlist
4174  * and return the entry for the parent.
4175  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4176  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4177  */
4178 pcicfgregs *
4179 pci_devlist_get_parent(pcicfgregs *cfg)
4180 {
4181 	struct devlist *devlist_head;
4182 	struct pci_devinfo *dinfo;
4183 	pcicfgregs *bridge_cfg;
4184 	int i;
4185 
4186 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4187 
4188 	/* If the device is on PCI bus 0, look for the host */
4189 	if (cfg->bus == 0) {
4190 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4191 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4192 			bridge_cfg = &dinfo->cfg;
4193 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4194 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4195 		    		&& bridge_cfg->bus == cfg->bus) {
4196 				return bridge_cfg;
4197 			}
4198 		}
4199 	}
4200 
4201 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4202 	if (cfg->bus > 0) {
4203 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4204 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4205 			bridge_cfg = &dinfo->cfg;
4206 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4207 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4208 				&& bridge_cfg->secondarybus == cfg->bus) {
4209 				return bridge_cfg;
4210 			}
4211 		}
4212 	}
4213 
4214 	return NULL;
4215 }
4216 
4217 #endif	/* COMPAT_OLDPCI */
4218