xref: /dragonfly/sys/bus/pci/pci.c (revision a32bc35d)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include "pcib_if.h"
63 #include "pci_if.h"
64 
65 #ifdef __HAVE_ACPI
66 #include <contrib/dev/acpica/acpi.h>
67 #include "acpi_if.h"
68 #else
69 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
70 #endif
71 
72 extern struct dev_ops pcic_ops;	/* XXX */
73 
74 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
75 
76 static uint32_t		pci_mapbase(unsigned mapreg);
77 static const char	*pci_maptype(unsigned mapreg);
78 static int		pci_mapsize(unsigned testval);
79 static int		pci_maprange(unsigned mapreg);
80 static void		pci_fixancient(pcicfgregs *cfg);
81 
82 static int		pci_porten(device_t pcib, int b, int s, int f);
83 static int		pci_memen(device_t pcib, int b, int s, int f);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
87 			    int b, int s, int f, int reg,
88 			    struct resource_list *rl, int force, int prefetch);
89 static int		pci_probe(device_t dev);
90 static int		pci_attach(device_t dev);
91 static void		pci_child_detached(device_t, device_t);
92 static void		pci_load_vendor_data(void);
93 static int		pci_describe_parse_line(char **ptr, int *vendor,
94 			    int *device, char **desc);
95 static char		*pci_describe_device(device_t dev);
96 static int		pci_modevent(module_t mod, int what, void *arg);
97 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
98 			    pcicfgregs *cfg);
99 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
100 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t *data);
102 #if 0
103 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t data);
105 #endif
106 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
107 static void		pci_disable_msi(device_t dev);
108 static void		pci_enable_msi(device_t dev, uint64_t address,
109 			    uint16_t data);
110 static void		pci_setup_msix_vector(device_t dev, u_int index,
111 			    uint64_t address, uint32_t data);
112 static void		pci_mask_msix_vector(device_t dev, u_int index);
113 static void		pci_unmask_msix_vector(device_t dev, u_int index);
114 static void		pci_mask_msix_allvectors(device_t dev);
115 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pcie_slotimpl(const pcicfgregs *);
120 static void		pci_print_verbose_expr(const pcicfgregs *);
121 
122 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_subvendor(device_t, int, int,
128 			    pcicfgregs *);
129 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 	DEVMETHOD(device_detach,	bus_generic_detach),
137 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
138 	DEVMETHOD(device_suspend,	pci_suspend),
139 	DEVMETHOD(device_resume,	pci_resume),
140 
141 	/* Bus interface */
142 	DEVMETHOD(bus_print_child,	pci_print_child),
143 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
144 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
145 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
146 	DEVMETHOD(bus_driver_added,	pci_driver_added),
147 	DEVMETHOD(bus_child_detached,	pci_child_detached),
148 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150 
151 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
152 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
153 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
154 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
155 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
156 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
157 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
159 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
160 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
161 
162 	/* PCI interface */
163 	DEVMETHOD(pci_read_config,	pci_read_config_method),
164 	DEVMETHOD(pci_write_config,	pci_write_config_method),
165 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
175 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
176 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
177 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
178 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
179 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
180 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
181 
182 	{ 0, 0 }
183 };
184 
185 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
186 
187 static devclass_t pci_devclass;
188 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
189 MODULE_VERSION(pci, 1);
190 
191 static char	*pci_vendordata;
192 static size_t	pci_vendordata_size;
193 
194 
195 static const struct pci_read_cap {
196 	int		cap;
197 	pci_read_cap_t	read_cap;
198 } pci_read_caps[] = {
199 	{ PCIY_PMG,		pci_read_cap_pmgt },
200 	{ PCIY_HT,		pci_read_cap_ht },
201 	{ PCIY_MSI,		pci_read_cap_msi },
202 	{ PCIY_MSIX,		pci_read_cap_msix },
203 	{ PCIY_VPD,		pci_read_cap_vpd },
204 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
205 	{ PCIY_PCIX,		pci_read_cap_pcix },
206 	{ PCIY_EXPRESS,		pci_read_cap_express },
207 	{ 0, NULL } /* required last entry */
208 };
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
215 	int	arg1;
216 	int	arg2;
217 };
218 
219 struct pci_quirk pci_quirks[] = {
220 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
221 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
224 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 
226 	/*
227 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
228 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
229 	 */
230 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 
233 	/*
234 	 * MSI doesn't work on earlier Intel chipsets including
235 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
236 	 */
237 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
247 	 * bridge.
248 	 */
249 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	{ 0 }
252 };
253 
254 /* map register information */
255 #define	PCI_MAPMEM	0x01	/* memory map */
256 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
257 #define	PCI_MAPPORT	0x04	/* port map */
258 
259 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
260 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
261 
262 struct devlist pci_devq;
263 uint32_t pci_generation;
264 uint32_t pci_numdevs = 0;
265 static int pcie_chipset, pcix_chipset;
266 
267 /* sysctl vars */
268 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
269 
270 static int pci_enable_io_modes = 1;
271 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
272 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
273     &pci_enable_io_modes, 1,
274     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
275 enable these bits correctly.  We'd like to do this all the time, but there\n\
276 are some peripherals that this causes problems with.");
277 
278 static int pci_do_power_nodriver = 0;
279 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
280 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
281     &pci_do_power_nodriver, 0,
282   "Place a function into D3 state when no driver attaches to it.  0 means\n\
283 disable.  1 means conservatively place devices into D3 state.  2 means\n\
284 aggressively place devices into D3 state.  3 means put absolutely everything\n\
285 in D3 state.");
286 
287 static int pci_do_power_resume = 1;
288 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
289 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
290     &pci_do_power_resume, 1,
291   "Transition from D3 -> D0 on resume.");
292 
293 static int pci_do_msi = 1;
294 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
295 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
296     "Enable support for MSI interrupts");
297 
298 static int pci_do_msix = 1;
299 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
300 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
301     "Enable support for MSI-X interrupts");
302 
303 static int pci_honor_msi_blacklist = 1;
304 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
305 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
306     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
307 
308 static int pci_msi_cpuid;
309 
310 /* Find a device_t by bus/slot/function in domain 0 */
311 
312 device_t
313 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 
316 	return (pci_find_dbsf(0, bus, slot, func));
317 }
318 
319 /* Find a device_t by domain/bus/slot/function */
320 
321 device_t
322 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323 {
324 	struct pci_devinfo *dinfo;
325 
326 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327 		if ((dinfo->cfg.domain == domain) &&
328 		    (dinfo->cfg.bus == bus) &&
329 		    (dinfo->cfg.slot == slot) &&
330 		    (dinfo->cfg.func == func)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 /* Find a device_t by vendor/device ID */
339 
340 device_t
341 pci_find_device(uint16_t vendor, uint16_t device)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.vendor == vendor) &&
347 		    (dinfo->cfg.device == device)) {
348 			return (dinfo->cfg.dev);
349 		}
350 	}
351 
352 	return (NULL);
353 }
354 
355 /* return base address of memory or port map */
356 
357 static uint32_t
358 pci_mapbase(uint32_t mapreg)
359 {
360 
361 	if (PCI_BAR_MEM(mapreg))
362 		return (mapreg & PCIM_BAR_MEM_BASE);
363 	else
364 		return (mapreg & PCIM_BAR_IO_BASE);
365 }
366 
367 /* return map type of memory or port map */
368 
369 static const char *
370 pci_maptype(unsigned mapreg)
371 {
372 
373 	if (PCI_BAR_IO(mapreg))
374 		return ("I/O Port");
375 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
376 		return ("Prefetchable Memory");
377 	return ("Memory");
378 }
379 
380 /* return log2 of map size decoded for memory or port map */
381 
382 static int
383 pci_mapsize(uint32_t testval)
384 {
385 	int ln2size;
386 
387 	testval = pci_mapbase(testval);
388 	ln2size = 0;
389 	if (testval != 0) {
390 		while ((testval & 1) == 0)
391 		{
392 			ln2size++;
393 			testval >>= 1;
394 		}
395 	}
396 	return (ln2size);
397 }
398 
399 /* return log2 of address range supported by map register */
400 
401 static int
402 pci_maprange(unsigned mapreg)
403 {
404 	int ln2range = 0;
405 
406 	if (PCI_BAR_IO(mapreg))
407 		ln2range = 32;
408 	else
409 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
410 		case PCIM_BAR_MEM_32:
411 			ln2range = 32;
412 			break;
413 		case PCIM_BAR_MEM_1MB:
414 			ln2range = 20;
415 			break;
416 		case PCIM_BAR_MEM_64:
417 			ln2range = 64;
418 			break;
419 		}
420 	return (ln2range);
421 }
422 
423 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
424 
425 static void
426 pci_fixancient(pcicfgregs *cfg)
427 {
428 	if (cfg->hdrtype != 0)
429 		return;
430 
431 	/* PCI to PCI bridges use header type 1 */
432 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
433 		cfg->hdrtype = 1;
434 }
435 
436 /* extract header type specific config data */
437 
438 static void
439 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	switch (cfg->hdrtype) {
443 	case 0:
444 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
445 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
446 		cfg->nummaps	    = PCI_MAXMAPS_0;
447 		break;
448 	case 1:
449 		cfg->nummaps	    = PCI_MAXMAPS_1;
450 #ifdef COMPAT_OLDPCI
451 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
452 #endif
453 		break;
454 	case 2:
455 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
456 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
457 		cfg->nummaps	    = PCI_MAXMAPS_2;
458 #ifdef COMPAT_OLDPCI
459 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
460 #endif
461 		break;
462 	}
463 #undef REG
464 }
465 
466 /* read configuration header into pcicfgregs structure */
467 struct pci_devinfo *
468 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
469 {
470 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
471 	pcicfgregs *cfg = NULL;
472 	struct pci_devinfo *devlist_entry;
473 	struct devlist *devlist_head;
474 
475 	devlist_head = &pci_devq;
476 
477 	devlist_entry = NULL;
478 
479 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
480 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
481 
482 		cfg = &devlist_entry->cfg;
483 
484 		cfg->domain		= d;
485 		cfg->bus		= b;
486 		cfg->slot		= s;
487 		cfg->func		= f;
488 		cfg->vendor		= REG(PCIR_VENDOR, 2);
489 		cfg->device		= REG(PCIR_DEVICE, 2);
490 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
491 		cfg->statreg		= REG(PCIR_STATUS, 2);
492 		cfg->baseclass		= REG(PCIR_CLASS, 1);
493 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
494 		cfg->progif		= REG(PCIR_PROGIF, 1);
495 		cfg->revid		= REG(PCIR_REVID, 1);
496 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
497 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
498 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
499 		cfg->intpin		= REG(PCIR_INTPIN, 1);
500 		cfg->intline		= REG(PCIR_INTLINE, 1);
501 
502 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
503 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
504 
505 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
506 		cfg->hdrtype		&= ~PCIM_MFDEV;
507 
508 		pci_fixancient(cfg);
509 		pci_hdrtypedata(pcib, b, s, f, cfg);
510 
511 		pci_read_capabilities(pcib, cfg);
512 
513 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
514 
515 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
516 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
517 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
518 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
519 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
520 
521 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
522 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
523 		devlist_entry->conf.pc_vendor = cfg->vendor;
524 		devlist_entry->conf.pc_device = cfg->device;
525 
526 		devlist_entry->conf.pc_class = cfg->baseclass;
527 		devlist_entry->conf.pc_subclass = cfg->subclass;
528 		devlist_entry->conf.pc_progif = cfg->progif;
529 		devlist_entry->conf.pc_revid = cfg->revid;
530 
531 		pci_numdevs++;
532 		pci_generation++;
533 	}
534 	return (devlist_entry);
535 #undef REG
536 }
537 
538 static int
539 pci_fixup_nextptr(int *nextptr0)
540 {
541 	int nextptr = *nextptr0;
542 
543 	/* "Next pointer" is only one byte */
544 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
545 
546 	if (nextptr & 0x3) {
547 		/*
548 		 * PCI local bus spec 3.0:
549 		 *
550 		 * "... The bottom two bits of all pointers are reserved
551 		 *  and must be implemented as 00b although software must
552 		 *  mask them to allow for future uses of these bits ..."
553 		 */
554 		if (bootverbose) {
555 			kprintf("Illegal PCI extended capability "
556 				"offset, fixup 0x%02x -> 0x%02x\n",
557 				nextptr, nextptr & ~0x3);
558 		}
559 		nextptr &= ~0x3;
560 	}
561 	*nextptr0 = nextptr;
562 
563 	if (nextptr < 0x40) {
564 		if (nextptr != 0) {
565 			kprintf("Illegal PCI extended capability "
566 				"offset 0x%02x", nextptr);
567 		}
568 		return 0;
569 	}
570 	return 1;
571 }
572 
573 static void
574 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
575 {
576 #define REG(n, w)	\
577 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
578 
579 	struct pcicfg_pp *pp = &cfg->pp;
580 
581 	if (pp->pp_cap)
582 		return;
583 
584 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
585 	pp->pp_status = ptr + PCIR_POWER_STATUS;
586 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
587 
588 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
589 		/*
590 		 * XXX
591 		 * We should write to data_select and read back from
592 		 * data_scale to determine whether data register is
593 		 * implemented.
594 		 */
595 #ifdef foo
596 		pp->pp_data = ptr + PCIR_POWER_DATA;
597 #else
598 		pp->pp_data = 0;
599 #endif
600 	}
601 
602 #undef REG
603 }
604 
605 static void
606 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
607 {
608 #if defined(__i386__) || defined(__x86_64__)
609 
610 #define REG(n, w)	\
611 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612 
613 	struct pcicfg_ht *ht = &cfg->ht;
614 	uint64_t addr;
615 	uint32_t val;
616 
617 	/* Determine HT-specific capability type. */
618 	val = REG(ptr + PCIR_HT_COMMAND, 2);
619 
620 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
621 		cfg->ht.ht_slave = ptr;
622 
623 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
624 		return;
625 
626 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
627 		/* Sanity check the mapping window. */
628 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
629 		addr <<= 32;
630 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
631 		if (addr != MSI_X86_ADDR_BASE) {
632 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
633 				"has non-default MSI window 0x%llx\n",
634 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
635 				(long long)addr);
636 		}
637 	} else {
638 		addr = MSI_X86_ADDR_BASE;
639 	}
640 
641 	ht->ht_msimap = ptr;
642 	ht->ht_msictrl = val;
643 	ht->ht_msiaddr = addr;
644 
645 #undef REG
646 
647 #endif	/* __i386__ || __x86_64__ */
648 }
649 
650 static void
651 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
652 {
653 #define REG(n, w)	\
654 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
655 
656 	struct pcicfg_msi *msi = &cfg->msi;
657 
658 	msi->msi_location = ptr;
659 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
660 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
661 
662 #undef REG
663 }
664 
665 static void
666 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
667 {
668 #define REG(n, w)	\
669 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
670 
671 	struct pcicfg_msix *msix = &cfg->msix;
672 	uint32_t val;
673 
674 	msix->msix_location = ptr;
675 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
676 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
677 
678 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
679 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
680 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
681 
682 	val = REG(ptr + PCIR_MSIX_PBA, 4);
683 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
684 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
685 
686 	TAILQ_INIT(&msix->msix_vectors);
687 
688 #undef REG
689 }
690 
691 static void
692 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
693 {
694 	cfg->vpd.vpd_reg = ptr;
695 }
696 
697 static void
698 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
699 {
700 #define REG(n, w)	\
701 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
702 
703 	/* Should always be true. */
704 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
705 		uint32_t val;
706 
707 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
708 		cfg->subvendor = val & 0xffff;
709 		cfg->subdevice = val >> 16;
710 	}
711 
712 #undef REG
713 }
714 
715 static void
716 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
717 {
718 	/*
719 	 * Assume we have a PCI-X chipset if we have
720 	 * at least one PCI-PCI bridge with a PCI-X
721 	 * capability.  Note that some systems with
722 	 * PCI-express or HT chipsets might match on
723 	 * this check as well.
724 	 */
725 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
726 		pcix_chipset = 1;
727 
728 	cfg->pcix.pcix_ptr = ptr;
729 }
730 
731 static int
732 pcie_slotimpl(const pcicfgregs *cfg)
733 {
734 	const struct pcicfg_expr *expr = &cfg->expr;
735 	uint16_t port_type;
736 
737 	/*
738 	 * - Slot implemented bit is meaningful iff current port is
739 	 *   root port or down stream port.
740 	 * - Testing for root port or down stream port is meanningful
741 	 *   iff PCI configure has type 1 header.
742 	 */
743 
744 	if (cfg->hdrtype != 1)
745 		return 0;
746 
747 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
748 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
749 		return 0;
750 
751 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
752 		return 0;
753 
754 	return 1;
755 }
756 
757 static void
758 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 #define REG(n, w)	\
761 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
762 
763 	struct pcicfg_expr *expr = &cfg->expr;
764 
765 	/*
766 	 * Assume we have a PCI-express chipset if we have
767 	 * at least one PCI-express device.
768 	 */
769 	pcie_chipset = 1;
770 
771 	expr->expr_ptr = ptr;
772 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
773 
774 	/*
775 	 * Read slot capabilities.  Slot capabilities exists iff
776 	 * current port's slot is implemented
777 	 */
778 	if (pcie_slotimpl(cfg))
779 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
780 
781 #undef REG
782 }
783 
784 static void
785 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
786 {
787 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
788 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
789 
790 	uint32_t val;
791 	int nextptr, ptrptr;
792 
793 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
794 		/* No capabilities */
795 		return;
796 	}
797 
798 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
799 	case 0:
800 	case 1:
801 		ptrptr = PCIR_CAP_PTR;
802 		break;
803 	case 2:
804 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
805 		break;
806 	default:
807 		return;				/* no capabilities support */
808 	}
809 	nextptr = REG(ptrptr, 1);	/* sanity check? */
810 
811 	/*
812 	 * Read capability entries.
813 	 */
814 	while (pci_fixup_nextptr(&nextptr)) {
815 		const struct pci_read_cap *rc;
816 		int ptr = nextptr;
817 
818 		/* Find the next entry */
819 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
820 
821 		/* Process this entry */
822 		val = REG(ptr + PCICAP_ID, 1);
823 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
824 			if (rc->cap == val) {
825 				rc->read_cap(pcib, ptr, nextptr, cfg);
826 				break;
827 			}
828 		}
829 	}
830 
831 #if defined(__i386__) || defined(__x86_64__)
832 	/*
833 	 * Enable the MSI mapping window for all HyperTransport
834 	 * slaves.  PCI-PCI bridges have their windows enabled via
835 	 * PCIB_MAP_MSI().
836 	 */
837 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
838 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
839 		device_printf(pcib,
840 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
841 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
842 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
843 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
844 		     2);
845 	}
846 #endif
847 
848 /* REG and WREG use carry through to next functions */
849 }
850 
851 /*
852  * PCI Vital Product Data
853  */
854 
855 #define	PCI_VPD_TIMEOUT		1000000
856 
857 static int
858 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
859 {
860 	int count = PCI_VPD_TIMEOUT;
861 
862 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
863 
864 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
865 
866 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
867 		if (--count < 0)
868 			return (ENXIO);
869 		DELAY(1);	/* limit looping */
870 	}
871 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
872 
873 	return (0);
874 }
875 
876 #if 0
877 static int
878 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
879 {
880 	int count = PCI_VPD_TIMEOUT;
881 
882 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
883 
884 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
885 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
886 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
887 		if (--count < 0)
888 			return (ENXIO);
889 		DELAY(1);	/* limit looping */
890 	}
891 
892 	return (0);
893 }
894 #endif
895 
896 #undef PCI_VPD_TIMEOUT
897 
898 struct vpd_readstate {
899 	device_t	pcib;
900 	pcicfgregs	*cfg;
901 	uint32_t	val;
902 	int		bytesinval;
903 	int		off;
904 	uint8_t		cksum;
905 };
906 
907 static int
908 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
909 {
910 	uint32_t reg;
911 	uint8_t byte;
912 
913 	if (vrs->bytesinval == 0) {
914 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
915 			return (ENXIO);
916 		vrs->val = le32toh(reg);
917 		vrs->off += 4;
918 		byte = vrs->val & 0xff;
919 		vrs->bytesinval = 3;
920 	} else {
921 		vrs->val = vrs->val >> 8;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval--;
924 	}
925 
926 	vrs->cksum += byte;
927 	*data = byte;
928 	return (0);
929 }
930 
931 int
932 pcie_slot_implemented(device_t dev)
933 {
934 	struct pci_devinfo *dinfo = device_get_ivars(dev);
935 
936 	return pcie_slotimpl(&dinfo->cfg);
937 }
938 
939 void
940 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
941 {
942 	uint8_t expr_ptr;
943 	uint16_t val;
944 
945 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
946 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
947 		panic("%s: invalid max read request size 0x%02x",
948 		      device_get_nameunit(dev), rqsize);
949 	}
950 
951 	expr_ptr = pci_get_pciecap_ptr(dev);
952 	if (!expr_ptr)
953 		panic("%s: not PCIe device", device_get_nameunit(dev));
954 
955 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
956 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
957 		if (bootverbose)
958 			device_printf(dev, "adjust device control 0x%04x", val);
959 
960 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
961 		val |= rqsize;
962 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
963 
964 		if (bootverbose)
965 			kprintf(" -> 0x%04x\n", val);
966 	}
967 }
968 
969 uint16_t
970 pcie_get_max_readrq(device_t dev)
971 {
972 	uint8_t expr_ptr;
973 	uint16_t val;
974 
975 	expr_ptr = pci_get_pciecap_ptr(dev);
976 	if (!expr_ptr)
977 		panic("%s: not PCIe device", device_get_nameunit(dev));
978 
979 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
980 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
981 }
982 
983 static void
984 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
985 {
986 	struct vpd_readstate vrs;
987 	int state;
988 	int name;
989 	int remain;
990 	int i;
991 	int alloc, off;		/* alloc/off for RO/W arrays */
992 	int cksumvalid;
993 	int dflen;
994 	uint8_t byte;
995 	uint8_t byte2;
996 
997 	/* init vpd reader */
998 	vrs.bytesinval = 0;
999 	vrs.off = 0;
1000 	vrs.pcib = pcib;
1001 	vrs.cfg = cfg;
1002 	vrs.cksum = 0;
1003 
1004 	state = 0;
1005 	name = remain = i = 0;	/* shut up stupid gcc */
1006 	alloc = off = 0;	/* shut up stupid gcc */
1007 	dflen = 0;		/* shut up stupid gcc */
1008 	cksumvalid = -1;
1009 	while (state >= 0) {
1010 		if (vpd_nextbyte(&vrs, &byte)) {
1011 			state = -2;
1012 			break;
1013 		}
1014 #if 0
1015 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1016 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1017 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1018 #endif
1019 		switch (state) {
1020 		case 0:		/* item name */
1021 			if (byte & 0x80) {
1022 				if (vpd_nextbyte(&vrs, &byte2)) {
1023 					state = -2;
1024 					break;
1025 				}
1026 				remain = byte2;
1027 				if (vpd_nextbyte(&vrs, &byte2)) {
1028 					state = -2;
1029 					break;
1030 				}
1031 				remain |= byte2 << 8;
1032 				if (remain > (0x7f*4 - vrs.off)) {
1033 					state = -1;
1034 					kprintf(
1035 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1036 					    cfg->domain, cfg->bus, cfg->slot,
1037 					    cfg->func, remain);
1038 				}
1039 				name = byte & 0x7f;
1040 			} else {
1041 				remain = byte & 0x7;
1042 				name = (byte >> 3) & 0xf;
1043 			}
1044 			switch (name) {
1045 			case 0x2:	/* String */
1046 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1047 				    M_DEVBUF, M_WAITOK);
1048 				i = 0;
1049 				state = 1;
1050 				break;
1051 			case 0xf:	/* End */
1052 				state = -1;
1053 				break;
1054 			case 0x10:	/* VPD-R */
1055 				alloc = 8;
1056 				off = 0;
1057 				cfg->vpd.vpd_ros = kmalloc(alloc *
1058 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1059 				    M_WAITOK | M_ZERO);
1060 				state = 2;
1061 				break;
1062 			case 0x11:	/* VPD-W */
1063 				alloc = 8;
1064 				off = 0;
1065 				cfg->vpd.vpd_w = kmalloc(alloc *
1066 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1067 				    M_WAITOK | M_ZERO);
1068 				state = 5;
1069 				break;
1070 			default:	/* Invalid data, abort */
1071 				state = -1;
1072 				break;
1073 			}
1074 			break;
1075 
1076 		case 1:	/* Identifier String */
1077 			cfg->vpd.vpd_ident[i++] = byte;
1078 			remain--;
1079 			if (remain == 0)  {
1080 				cfg->vpd.vpd_ident[i] = '\0';
1081 				state = 0;
1082 			}
1083 			break;
1084 
1085 		case 2:	/* VPD-R Keyword Header */
1086 			if (off == alloc) {
1087 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1088 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1089 				    M_DEVBUF, M_WAITOK | M_ZERO);
1090 			}
1091 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1092 			if (vpd_nextbyte(&vrs, &byte2)) {
1093 				state = -2;
1094 				break;
1095 			}
1096 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1097 			if (vpd_nextbyte(&vrs, &byte2)) {
1098 				state = -2;
1099 				break;
1100 			}
1101 			dflen = byte2;
1102 			if (dflen == 0 &&
1103 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1104 			    2) == 0) {
1105 				/*
1106 				 * if this happens, we can't trust the rest
1107 				 * of the VPD.
1108 				 */
1109 				kprintf(
1110 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1111 				    cfg->domain, cfg->bus, cfg->slot,
1112 				    cfg->func, dflen);
1113 				cksumvalid = 0;
1114 				state = -1;
1115 				break;
1116 			} else if (dflen == 0) {
1117 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1118 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1119 				    M_DEVBUF, M_WAITOK);
1120 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1121 			} else
1122 				cfg->vpd.vpd_ros[off].value = kmalloc(
1123 				    (dflen + 1) *
1124 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1125 				    M_DEVBUF, M_WAITOK);
1126 			remain -= 3;
1127 			i = 0;
1128 			/* keep in sync w/ state 3's transistions */
1129 			if (dflen == 0 && remain == 0)
1130 				state = 0;
1131 			else if (dflen == 0)
1132 				state = 2;
1133 			else
1134 				state = 3;
1135 			break;
1136 
1137 		case 3:	/* VPD-R Keyword Value */
1138 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1139 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1140 			    "RV", 2) == 0 && cksumvalid == -1) {
1141 				if (vrs.cksum == 0)
1142 					cksumvalid = 1;
1143 				else {
1144 					if (bootverbose)
1145 						kprintf(
1146 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1147 						    cfg->domain, cfg->bus,
1148 						    cfg->slot, cfg->func,
1149 						    vrs.cksum);
1150 					cksumvalid = 0;
1151 					state = -1;
1152 					break;
1153 				}
1154 			}
1155 			dflen--;
1156 			remain--;
1157 			/* keep in sync w/ state 2's transistions */
1158 			if (dflen == 0)
1159 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1160 			if (dflen == 0 && remain == 0) {
1161 				cfg->vpd.vpd_rocnt = off;
1162 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1163 				    off * sizeof(*cfg->vpd.vpd_ros),
1164 				    M_DEVBUF, M_WAITOK | M_ZERO);
1165 				state = 0;
1166 			} else if (dflen == 0)
1167 				state = 2;
1168 			break;
1169 
1170 		case 4:
1171 			remain--;
1172 			if (remain == 0)
1173 				state = 0;
1174 			break;
1175 
1176 		case 5:	/* VPD-W Keyword Header */
1177 			if (off == alloc) {
1178 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1179 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1180 				    M_DEVBUF, M_WAITOK | M_ZERO);
1181 			}
1182 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1183 			if (vpd_nextbyte(&vrs, &byte2)) {
1184 				state = -2;
1185 				break;
1186 			}
1187 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1188 			if (vpd_nextbyte(&vrs, &byte2)) {
1189 				state = -2;
1190 				break;
1191 			}
1192 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1193 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1194 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1195 			    sizeof(*cfg->vpd.vpd_w[off].value),
1196 			    M_DEVBUF, M_WAITOK);
1197 			remain -= 3;
1198 			i = 0;
1199 			/* keep in sync w/ state 6's transistions */
1200 			if (dflen == 0 && remain == 0)
1201 				state = 0;
1202 			else if (dflen == 0)
1203 				state = 5;
1204 			else
1205 				state = 6;
1206 			break;
1207 
1208 		case 6:	/* VPD-W Keyword Value */
1209 			cfg->vpd.vpd_w[off].value[i++] = byte;
1210 			dflen--;
1211 			remain--;
1212 			/* keep in sync w/ state 5's transistions */
1213 			if (dflen == 0)
1214 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1215 			if (dflen == 0 && remain == 0) {
1216 				cfg->vpd.vpd_wcnt = off;
1217 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1218 				    off * sizeof(*cfg->vpd.vpd_w),
1219 				    M_DEVBUF, M_WAITOK | M_ZERO);
1220 				state = 0;
1221 			} else if (dflen == 0)
1222 				state = 5;
1223 			break;
1224 
1225 		default:
1226 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1227 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1228 			    state);
1229 			state = -1;
1230 			break;
1231 		}
1232 	}
1233 
1234 	if (cksumvalid == 0 || state < -1) {
1235 		/* read-only data bad, clean up */
1236 		if (cfg->vpd.vpd_ros != NULL) {
1237 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1238 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1239 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1240 			cfg->vpd.vpd_ros = NULL;
1241 		}
1242 	}
1243 	if (state < -1) {
1244 		/* I/O error, clean up */
1245 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1246 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1247 		if (cfg->vpd.vpd_ident != NULL) {
1248 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1249 			cfg->vpd.vpd_ident = NULL;
1250 		}
1251 		if (cfg->vpd.vpd_w != NULL) {
1252 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1253 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1254 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1255 			cfg->vpd.vpd_w = NULL;
1256 		}
1257 	}
1258 	cfg->vpd.vpd_cached = 1;
1259 #undef REG
1260 #undef WREG
1261 }
1262 
1263 int
1264 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1265 {
1266 	struct pci_devinfo *dinfo = device_get_ivars(child);
1267 	pcicfgregs *cfg = &dinfo->cfg;
1268 
1269 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1270 		pci_read_vpd(device_get_parent(dev), cfg);
1271 
1272 	*identptr = cfg->vpd.vpd_ident;
1273 
1274 	if (*identptr == NULL)
1275 		return (ENXIO);
1276 
1277 	return (0);
1278 }
1279 
1280 int
1281 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1282 	const char **vptr)
1283 {
1284 	struct pci_devinfo *dinfo = device_get_ivars(child);
1285 	pcicfgregs *cfg = &dinfo->cfg;
1286 	int i;
1287 
1288 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1289 		pci_read_vpd(device_get_parent(dev), cfg);
1290 
1291 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1292 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1293 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1294 			*vptr = cfg->vpd.vpd_ros[i].value;
1295 		}
1296 
1297 	if (i != cfg->vpd.vpd_rocnt)
1298 		return (0);
1299 
1300 	*vptr = NULL;
1301 	return (ENXIO);
1302 }
1303 
1304 /*
1305  * Return the offset in configuration space of the requested extended
1306  * capability entry or 0 if the specified capability was not found.
1307  */
1308 int
1309 pci_find_extcap_method(device_t dev, device_t child, int capability,
1310     int *capreg)
1311 {
1312 	struct pci_devinfo *dinfo = device_get_ivars(child);
1313 	pcicfgregs *cfg = &dinfo->cfg;
1314 	u_int32_t status;
1315 	u_int8_t ptr;
1316 
1317 	/*
1318 	 * Check the CAP_LIST bit of the PCI status register first.
1319 	 */
1320 	status = pci_read_config(child, PCIR_STATUS, 2);
1321 	if (!(status & PCIM_STATUS_CAPPRESENT))
1322 		return (ENXIO);
1323 
1324 	/*
1325 	 * Determine the start pointer of the capabilities list.
1326 	 */
1327 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1328 	case 0:
1329 	case 1:
1330 		ptr = PCIR_CAP_PTR;
1331 		break;
1332 	case 2:
1333 		ptr = PCIR_CAP_PTR_2;
1334 		break;
1335 	default:
1336 		/* XXX: panic? */
1337 		return (ENXIO);		/* no extended capabilities support */
1338 	}
1339 	ptr = pci_read_config(child, ptr, 1);
1340 
1341 	/*
1342 	 * Traverse the capabilities list.
1343 	 */
1344 	while (ptr != 0) {
1345 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1346 			if (capreg != NULL)
1347 				*capreg = ptr;
1348 			return (0);
1349 		}
1350 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1351 	}
1352 
1353 	return (ENOENT);
1354 }
1355 
1356 /*
1357  * Support for MSI-X message interrupts.
1358  */
1359 static void
1360 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1361     uint32_t data)
1362 {
1363 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1364 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365 	uint32_t offset;
1366 
1367 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1368 	offset = msix->msix_table_offset + index * 16;
1369 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1370 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1371 	bus_write_4(msix->msix_table_res, offset + 8, data);
1372 
1373 	/* Enable MSI -> HT mapping. */
1374 	pci_ht_map_msi(dev, address);
1375 }
1376 
1377 static void
1378 pci_mask_msix_vector(device_t dev, u_int index)
1379 {
1380 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382 	uint32_t offset, val;
1383 
1384 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1385 	offset = msix->msix_table_offset + index * 16 + 12;
1386 	val = bus_read_4(msix->msix_table_res, offset);
1387 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1388 		val |= PCIM_MSIX_VCTRL_MASK;
1389 		bus_write_4(msix->msix_table_res, offset, val);
1390 	}
1391 }
1392 
1393 static void
1394 pci_unmask_msix_vector(device_t dev, u_int index)
1395 {
1396 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1397 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1398 	uint32_t offset, val;
1399 
1400 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1401 	offset = msix->msix_table_offset + index * 16 + 12;
1402 	val = bus_read_4(msix->msix_table_res, offset);
1403 	if (val & PCIM_MSIX_VCTRL_MASK) {
1404 		val &= ~PCIM_MSIX_VCTRL_MASK;
1405 		bus_write_4(msix->msix_table_res, offset, val);
1406 	}
1407 }
1408 
1409 int
1410 pci_pending_msix_vector(device_t dev, u_int index)
1411 {
1412 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1413 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1414 	uint32_t offset, bit;
1415 
1416 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1417 	    ("MSI-X is not setup yet"));
1418 
1419 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1420 	offset = msix->msix_pba_offset + (index / 32) * 4;
1421 	bit = 1 << index % 32;
1422 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1423 }
1424 
1425 /*
1426  * Restore MSI-X registers and table during resume.  If MSI-X is
1427  * enabled then walk the virtual table to restore the actual MSI-X
1428  * table.
1429  */
1430 static void
1431 pci_resume_msix(device_t dev)
1432 {
1433 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1434 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1435 
1436 	if (msix->msix_table_res != NULL) {
1437 		const struct msix_vector *mv;
1438 
1439 		pci_mask_msix_allvectors(dev);
1440 
1441 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1442 			u_int vector;
1443 
1444 			if (mv->mv_address == 0)
1445 				continue;
1446 
1447 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1448 			pci_setup_msix_vector(dev, vector,
1449 			    mv->mv_address, mv->mv_data);
1450 			pci_unmask_msix_vector(dev, vector);
1451 		}
1452 	}
1453 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1454 	    msix->msix_ctrl, 2);
1455 }
1456 
1457 /*
1458  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1459  *
1460  * After this function returns, the MSI-X's rid will be saved in rid0.
1461  */
1462 int
1463 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1464     int *rid0, int cpuid)
1465 {
1466 	struct pci_devinfo *dinfo = device_get_ivars(child);
1467 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1468 	struct msix_vector *mv;
1469 	struct resource_list_entry *rle;
1470 	int error, irq, rid;
1471 
1472 	KASSERT(msix->msix_table_res != NULL &&
1473 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1474 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1475 	KASSERT(vector < msix->msix_msgnum,
1476 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1477 
1478 	if (bootverbose) {
1479 		device_printf(child,
1480 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1481 		    vector, msix->msix_msgnum);
1482 	}
1483 
1484 	/* Set rid according to vector number */
1485 	rid = PCI_MSIX_VEC2RID(vector);
1486 
1487 	/* Vector has already been allocated */
1488 	mv = pci_find_msix_vector(child, rid);
1489 	if (mv != NULL)
1490 		return EBUSY;
1491 
1492 	/* Allocate a message. */
1493 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1494 	if (error)
1495 		return error;
1496 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1497 	    irq, irq, 1, cpuid);
1498 
1499 	if (bootverbose) {
1500 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1501 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1502 		    rle->start, cpuid);
1503 	}
1504 
1505 	/* Update counts of alloc'd messages. */
1506 	msix->msix_alloc++;
1507 
1508 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1509 	mv->mv_rid = rid;
1510 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1511 
1512 	*rid0 = rid;
1513 	return 0;
1514 }
1515 
1516 int
1517 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1518 {
1519 	struct pci_devinfo *dinfo = device_get_ivars(child);
1520 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1521 	struct resource_list_entry *rle;
1522 	struct msix_vector *mv;
1523 	int irq, cpuid;
1524 
1525 	KASSERT(msix->msix_table_res != NULL &&
1526 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1527 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1528 	KASSERT(rid > 0, ("invalid rid %d", rid));
1529 
1530 	mv = pci_find_msix_vector(child, rid);
1531 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1532 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1533 
1534 	/* Make sure resource is no longer allocated. */
1535 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1536 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1537 	KASSERT(rle->res == NULL,
1538 	    ("MSI-X resource is still allocated, rid %d", rid));
1539 
1540 	irq = rle->start;
1541 	cpuid = rle->cpuid;
1542 
1543 	/* Free the resource list entries. */
1544 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1545 
1546 	/* Release the IRQ. */
1547 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1548 
1549 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1550 	kfree(mv, M_DEVBUF);
1551 
1552 	msix->msix_alloc--;
1553 	return (0);
1554 }
1555 
1556 /*
1557  * Return the max supported MSI-X messages this device supports.
1558  * Basically, assuming the MD code can alloc messages, this function
1559  * should return the maximum value that pci_alloc_msix() can return.
1560  * Thus, it is subject to the tunables, etc.
1561  */
1562 int
1563 pci_msix_count_method(device_t dev, device_t child)
1564 {
1565 	struct pci_devinfo *dinfo = device_get_ivars(child);
1566 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1567 
1568 	if (pci_do_msix && msix->msix_location != 0)
1569 		return (msix->msix_msgnum);
1570 	return (0);
1571 }
1572 
1573 int
1574 pci_setup_msix(device_t dev)
1575 {
1576 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1577 	pcicfgregs *cfg = &dinfo->cfg;
1578 	struct resource_list_entry *rle;
1579 	struct resource *table_res, *pba_res;
1580 
1581 	KASSERT(cfg->msix.msix_table_res == NULL &&
1582 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1583 
1584 	/* If rid 0 is allocated, then fail. */
1585 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1586 	if (rle != NULL && rle->res != NULL)
1587 		return (ENXIO);
1588 
1589 	/* Already have allocated MSIs? */
1590 	if (cfg->msi.msi_alloc != 0)
1591 		return (ENXIO);
1592 
1593 	/* If MSI is blacklisted for this system, fail. */
1594 	if (pci_msi_blacklisted())
1595 		return (ENXIO);
1596 
1597 	/* MSI-X capability present? */
1598 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1599 	    !pci_do_msix)
1600 		return (ENODEV);
1601 
1602 	KASSERT(cfg->msix.msix_alloc == 0 &&
1603 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1604 	    ("MSI-X vector has been allocated"));
1605 
1606 	/* Make sure the appropriate BARs are mapped. */
1607 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1608 	    cfg->msix.msix_table_bar);
1609 	if (rle == NULL || rle->res == NULL ||
1610 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1611 		return (ENXIO);
1612 	table_res = rle->res;
1613 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1614 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1615 		    cfg->msix.msix_pba_bar);
1616 		if (rle == NULL || rle->res == NULL ||
1617 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1618 			return (ENXIO);
1619 	}
1620 	pba_res = rle->res;
1621 
1622 	cfg->msix.msix_table_res = table_res;
1623 	cfg->msix.msix_pba_res = pba_res;
1624 
1625 	pci_mask_msix_allvectors(dev);
1626 
1627 	return 0;
1628 }
1629 
1630 void
1631 pci_teardown_msix(device_t dev)
1632 {
1633 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1634 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635 
1636 	KASSERT(msix->msix_table_res != NULL &&
1637 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1638 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1639 	    ("MSI-X vector is still allocated"));
1640 
1641 	pci_mask_msix_allvectors(dev);
1642 
1643 	msix->msix_table_res = NULL;
1644 	msix->msix_pba_res = NULL;
1645 }
1646 
1647 void
1648 pci_enable_msix(device_t dev)
1649 {
1650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1651 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1652 
1653 	KASSERT(msix->msix_table_res != NULL &&
1654 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1655 
1656 	/* Update control register to enable MSI-X. */
1657 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1658 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1659 	    msix->msix_ctrl, 2);
1660 }
1661 
1662 void
1663 pci_disable_msix(device_t dev)
1664 {
1665 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1666 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1667 
1668 	KASSERT(msix->msix_table_res != NULL &&
1669 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1670 
1671 	/* Disable MSI -> HT mapping. */
1672 	pci_ht_map_msi(dev, 0);
1673 
1674 	/* Update control register to disable MSI-X. */
1675 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1676 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1677 	    msix->msix_ctrl, 2);
1678 }
1679 
1680 static void
1681 pci_mask_msix_allvectors(device_t dev)
1682 {
1683 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1684 	u_int i;
1685 
1686 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1687 		pci_mask_msix_vector(dev, i);
1688 }
1689 
1690 static struct msix_vector *
1691 pci_find_msix_vector(device_t dev, int rid)
1692 {
1693 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1694 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1695 	struct msix_vector *mv;
1696 
1697 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1698 		if (mv->mv_rid == rid)
1699 			return mv;
1700 	}
1701 	return NULL;
1702 }
1703 
1704 /*
1705  * HyperTransport MSI mapping control
1706  */
1707 void
1708 pci_ht_map_msi(device_t dev, uint64_t addr)
1709 {
1710 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1711 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1712 
1713 	if (!ht->ht_msimap)
1714 		return;
1715 
1716 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1717 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1718 		/* Enable MSI -> HT mapping. */
1719 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1720 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1721 		    ht->ht_msictrl, 2);
1722 	}
1723 
1724 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1725 		/* Disable MSI -> HT mapping. */
1726 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1727 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1728 		    ht->ht_msictrl, 2);
1729 	}
1730 }
1731 
1732 /*
1733  * Support for MSI message signalled interrupts.
1734  */
1735 void
1736 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1737 {
1738 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1739 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1740 
1741 	/* Write data and address values. */
1742 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1743 	    address & 0xffffffff, 4);
1744 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1745 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1746 		    address >> 32, 4);
1747 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1748 		    data, 2);
1749 	} else
1750 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1751 		    2);
1752 
1753 	/* Enable MSI in the control register. */
1754 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1755 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1756 	    2);
1757 
1758 	/* Enable MSI -> HT mapping. */
1759 	pci_ht_map_msi(dev, address);
1760 }
1761 
1762 void
1763 pci_disable_msi(device_t dev)
1764 {
1765 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1766 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1767 
1768 	/* Disable MSI -> HT mapping. */
1769 	pci_ht_map_msi(dev, 0);
1770 
1771 	/* Disable MSI in the control register. */
1772 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1773 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1774 	    2);
1775 }
1776 
1777 /*
1778  * Restore MSI registers during resume.  If MSI is enabled then
1779  * restore the data and address registers in addition to the control
1780  * register.
1781  */
1782 static void
1783 pci_resume_msi(device_t dev)
1784 {
1785 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1786 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1787 	uint64_t address;
1788 	uint16_t data;
1789 
1790 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1791 		address = msi->msi_addr;
1792 		data = msi->msi_data;
1793 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1794 		    address & 0xffffffff, 4);
1795 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1796 			pci_write_config(dev, msi->msi_location +
1797 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1798 			pci_write_config(dev, msi->msi_location +
1799 			    PCIR_MSI_DATA_64BIT, data, 2);
1800 		} else
1801 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1802 			    data, 2);
1803 	}
1804 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1805 	    2);
1806 }
1807 
1808 /*
1809  * Returns true if the specified device is blacklisted because MSI
1810  * doesn't work.
1811  */
1812 int
1813 pci_msi_device_blacklisted(device_t dev)
1814 {
1815 	struct pci_quirk *q;
1816 
1817 	if (!pci_honor_msi_blacklist)
1818 		return (0);
1819 
1820 	for (q = &pci_quirks[0]; q->devid; q++) {
1821 		if (q->devid == pci_get_devid(dev) &&
1822 		    q->type == PCI_QUIRK_DISABLE_MSI)
1823 			return (1);
1824 	}
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1830  * we just check for blacklisted chipsets as represented by the
1831  * host-PCI bridge at device 0:0:0.  In the future, it may become
1832  * necessary to check other system attributes, such as the kenv values
1833  * that give the motherboard manufacturer and model number.
1834  */
1835 static int
1836 pci_msi_blacklisted(void)
1837 {
1838 	device_t dev;
1839 
1840 	if (!pci_honor_msi_blacklist)
1841 		return (0);
1842 
1843 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1844 	if (!(pcie_chipset || pcix_chipset))
1845 		return (1);
1846 
1847 	dev = pci_find_bsf(0, 0, 0);
1848 	if (dev != NULL)
1849 		return (pci_msi_device_blacklisted(dev));
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Attempt to allocate count MSI messages on start_cpuid.
1855  *
1856  * If start_cpuid < 0, then the MSI messages' target CPU will be
1857  * selected automaticly.
1858  *
1859  * If the caller explicitly specified the MSI messages' target CPU,
1860  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1861  * messages on the specified CPU, if the allocation fails due to MD
1862  * does not have enough vectors (EMSGSIZE), then we will try next
1863  * available CPU, until the allocation fails on all CPUs.
1864  *
1865  * EMSGSIZE will be returned, if all available CPUs does not have
1866  * enough vectors for the requested amount of MSI messages.  Caller
1867  * should either reduce the amount of MSI messages to be requested,
1868  * or simply giving up using MSI.
1869  *
1870  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1871  * returned in 'rid' array, if the allocation succeeds.
1872  */
1873 int
1874 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1875     int start_cpuid)
1876 {
1877 	struct pci_devinfo *dinfo = device_get_ivars(child);
1878 	pcicfgregs *cfg = &dinfo->cfg;
1879 	struct resource_list_entry *rle;
1880 	int error, i, irqs[32], cpuid = 0;
1881 	uint16_t ctrl;
1882 
1883 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1884 	    ("invalid MSI count %d", count));
1885 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1886 
1887 	/* If rid 0 is allocated, then fail. */
1888 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1889 	if (rle != NULL && rle->res != NULL)
1890 		return (ENXIO);
1891 
1892 	/* Already have allocated messages? */
1893 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1894 		return (ENXIO);
1895 
1896 	/* If MSI is blacklisted for this system, fail. */
1897 	if (pci_msi_blacklisted())
1898 		return (ENXIO);
1899 
1900 	/* MSI capability present? */
1901 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1902 	    !pci_do_msi)
1903 		return (ENODEV);
1904 
1905 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1906 	    count, cfg->msi.msi_msgnum));
1907 
1908 	if (bootverbose) {
1909 		device_printf(child,
1910 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1911 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1912 	}
1913 
1914 	if (start_cpuid < 0)
1915 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1916 
1917 	error = EINVAL;
1918 	for (i = 0; i < ncpus; ++i) {
1919 		cpuid = (start_cpuid + i) % ncpus;
1920 
1921 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1922 		    cfg->msi.msi_msgnum, irqs, cpuid);
1923 		if (error == 0)
1924 			break;
1925 		else if (error != EMSGSIZE)
1926 			return error;
1927 	}
1928 	if (error)
1929 		return error;
1930 
1931 	/*
1932 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1933 	 * the irqs[] array, so add new resources starting at rid 1.
1934 	 */
1935 	for (i = 0; i < count; i++) {
1936 		rid[i] = i + 1;
1937 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1938 		    irqs[i], irqs[i], 1, cpuid);
1939 	}
1940 
1941 	if (bootverbose) {
1942 		if (count == 1) {
1943 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1944 			    irqs[0], cpuid);
1945 		} else {
1946 			int run;
1947 
1948 			/*
1949 			 * Be fancy and try to print contiguous runs
1950 			 * of IRQ values as ranges.  'run' is true if
1951 			 * we are in a range.
1952 			 */
1953 			device_printf(child, "using IRQs %d", irqs[0]);
1954 			run = 0;
1955 			for (i = 1; i < count; i++) {
1956 
1957 				/* Still in a run? */
1958 				if (irqs[i] == irqs[i - 1] + 1) {
1959 					run = 1;
1960 					continue;
1961 				}
1962 
1963 				/* Finish previous range. */
1964 				if (run) {
1965 					kprintf("-%d", irqs[i - 1]);
1966 					run = 0;
1967 				}
1968 
1969 				/* Start new range. */
1970 				kprintf(",%d", irqs[i]);
1971 			}
1972 
1973 			/* Unfinished range? */
1974 			if (run)
1975 				kprintf("-%d", irqs[count - 1]);
1976 			kprintf(" for MSI on cpu%d\n", cpuid);
1977 		}
1978 	}
1979 
1980 	/* Update control register with count. */
1981 	ctrl = cfg->msi.msi_ctrl;
1982 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1983 	ctrl |= (ffs(count) - 1) << 4;
1984 	cfg->msi.msi_ctrl = ctrl;
1985 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1986 
1987 	/* Update counts of alloc'd messages. */
1988 	cfg->msi.msi_alloc = count;
1989 	cfg->msi.msi_handlers = 0;
1990 	return (0);
1991 }
1992 
1993 /* Release the MSI messages associated with this device. */
1994 int
1995 pci_release_msi_method(device_t dev, device_t child)
1996 {
1997 	struct pci_devinfo *dinfo = device_get_ivars(child);
1998 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1999 	struct resource_list_entry *rle;
2000 	int i, irqs[32], cpuid = -1;
2001 
2002 	/* Do we have any messages to release? */
2003 	if (msi->msi_alloc == 0)
2004 		return (ENODEV);
2005 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2006 
2007 	/* Make sure none of the resources are allocated. */
2008 	if (msi->msi_handlers > 0)
2009 		return (EBUSY);
2010 	for (i = 0; i < msi->msi_alloc; i++) {
2011 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2012 		KASSERT(rle != NULL, ("missing MSI resource"));
2013 		if (rle->res != NULL)
2014 			return (EBUSY);
2015 		if (i == 0) {
2016 			cpuid = rle->cpuid;
2017 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2018 			    ("invalid MSI target cpuid %d", cpuid));
2019 		} else {
2020 			KASSERT(rle->cpuid == cpuid,
2021 			    ("MSI targets different cpus, "
2022 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2023 		}
2024 		irqs[i] = rle->start;
2025 	}
2026 
2027 	/* Update control register with 0 count. */
2028 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2029 	    ("%s: MSI still enabled", __func__));
2030 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2031 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2032 	    msi->msi_ctrl, 2);
2033 
2034 	/* Release the messages. */
2035 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2036 	    cpuid);
2037 	for (i = 0; i < msi->msi_alloc; i++)
2038 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 
2040 	/* Update alloc count. */
2041 	msi->msi_alloc = 0;
2042 	msi->msi_addr = 0;
2043 	msi->msi_data = 0;
2044 	return (0);
2045 }
2046 
2047 /*
2048  * Return the max supported MSI messages this device supports.
2049  * Basically, assuming the MD code can alloc messages, this function
2050  * should return the maximum value that pci_alloc_msi() can return.
2051  * Thus, it is subject to the tunables, etc.
2052  */
2053 int
2054 pci_msi_count_method(device_t dev, device_t child)
2055 {
2056 	struct pci_devinfo *dinfo = device_get_ivars(child);
2057 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2058 
2059 	if (pci_do_msi && msi->msi_location != 0)
2060 		return (msi->msi_msgnum);
2061 	return (0);
2062 }
2063 
2064 /* kfree pcicfgregs structure and all depending data structures */
2065 
2066 int
2067 pci_freecfg(struct pci_devinfo *dinfo)
2068 {
2069 	struct devlist *devlist_head;
2070 	int i;
2071 
2072 	devlist_head = &pci_devq;
2073 
2074 	if (dinfo->cfg.vpd.vpd_reg) {
2075 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2076 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2077 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2078 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2079 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2080 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2081 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2082 	}
2083 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2084 	kfree(dinfo, M_DEVBUF);
2085 
2086 	/* increment the generation count */
2087 	pci_generation++;
2088 
2089 	/* we're losing one device */
2090 	pci_numdevs--;
2091 	return (0);
2092 }
2093 
2094 /*
2095  * PCI power manangement
2096  */
2097 int
2098 pci_set_powerstate_method(device_t dev, device_t child, int state)
2099 {
2100 	struct pci_devinfo *dinfo = device_get_ivars(child);
2101 	pcicfgregs *cfg = &dinfo->cfg;
2102 	uint16_t status;
2103 	int result, oldstate, highest, delay;
2104 
2105 	if (cfg->pp.pp_cap == 0)
2106 		return (EOPNOTSUPP);
2107 
2108 	/*
2109 	 * Optimize a no state change request away.  While it would be OK to
2110 	 * write to the hardware in theory, some devices have shown odd
2111 	 * behavior when going from D3 -> D3.
2112 	 */
2113 	oldstate = pci_get_powerstate(child);
2114 	if (oldstate == state)
2115 		return (0);
2116 
2117 	/*
2118 	 * The PCI power management specification states that after a state
2119 	 * transition between PCI power states, system software must
2120 	 * guarantee a minimal delay before the function accesses the device.
2121 	 * Compute the worst case delay that we need to guarantee before we
2122 	 * access the device.  Many devices will be responsive much more
2123 	 * quickly than this delay, but there are some that don't respond
2124 	 * instantly to state changes.  Transitions to/from D3 state require
2125 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2126 	 * is done below with DELAY rather than a sleeper function because
2127 	 * this function can be called from contexts where we cannot sleep.
2128 	 */
2129 	highest = (oldstate > state) ? oldstate : state;
2130 	if (highest == PCI_POWERSTATE_D3)
2131 	    delay = 10000;
2132 	else if (highest == PCI_POWERSTATE_D2)
2133 	    delay = 200;
2134 	else
2135 	    delay = 0;
2136 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2137 	    & ~PCIM_PSTAT_DMASK;
2138 	result = 0;
2139 	switch (state) {
2140 	case PCI_POWERSTATE_D0:
2141 		status |= PCIM_PSTAT_D0;
2142 		break;
2143 	case PCI_POWERSTATE_D1:
2144 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2145 			return (EOPNOTSUPP);
2146 		status |= PCIM_PSTAT_D1;
2147 		break;
2148 	case PCI_POWERSTATE_D2:
2149 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2150 			return (EOPNOTSUPP);
2151 		status |= PCIM_PSTAT_D2;
2152 		break;
2153 	case PCI_POWERSTATE_D3:
2154 		status |= PCIM_PSTAT_D3;
2155 		break;
2156 	default:
2157 		return (EINVAL);
2158 	}
2159 
2160 	if (bootverbose)
2161 		kprintf(
2162 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2163 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2164 		    dinfo->cfg.func, oldstate, state);
2165 
2166 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2167 	if (delay)
2168 		DELAY(delay);
2169 	return (0);
2170 }
2171 
2172 int
2173 pci_get_powerstate_method(device_t dev, device_t child)
2174 {
2175 	struct pci_devinfo *dinfo = device_get_ivars(child);
2176 	pcicfgregs *cfg = &dinfo->cfg;
2177 	uint16_t status;
2178 	int result;
2179 
2180 	if (cfg->pp.pp_cap != 0) {
2181 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2182 		switch (status & PCIM_PSTAT_DMASK) {
2183 		case PCIM_PSTAT_D0:
2184 			result = PCI_POWERSTATE_D0;
2185 			break;
2186 		case PCIM_PSTAT_D1:
2187 			result = PCI_POWERSTATE_D1;
2188 			break;
2189 		case PCIM_PSTAT_D2:
2190 			result = PCI_POWERSTATE_D2;
2191 			break;
2192 		case PCIM_PSTAT_D3:
2193 			result = PCI_POWERSTATE_D3;
2194 			break;
2195 		default:
2196 			result = PCI_POWERSTATE_UNKNOWN;
2197 			break;
2198 		}
2199 	} else {
2200 		/* No support, device is always at D0 */
2201 		result = PCI_POWERSTATE_D0;
2202 	}
2203 	return (result);
2204 }
2205 
2206 /*
2207  * Some convenience functions for PCI device drivers.
2208  */
2209 
2210 static __inline void
2211 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2212 {
2213 	uint16_t	command;
2214 
2215 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2216 	command |= bit;
2217 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2218 }
2219 
2220 static __inline void
2221 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2222 {
2223 	uint16_t	command;
2224 
2225 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2226 	command &= ~bit;
2227 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2228 }
2229 
2230 int
2231 pci_enable_busmaster_method(device_t dev, device_t child)
2232 {
2233 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2234 	return (0);
2235 }
2236 
2237 int
2238 pci_disable_busmaster_method(device_t dev, device_t child)
2239 {
2240 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2241 	return (0);
2242 }
2243 
2244 int
2245 pci_enable_io_method(device_t dev, device_t child, int space)
2246 {
2247 	uint16_t command;
2248 	uint16_t bit;
2249 	char *error;
2250 
2251 	bit = 0;
2252 	error = NULL;
2253 
2254 	switch(space) {
2255 	case SYS_RES_IOPORT:
2256 		bit = PCIM_CMD_PORTEN;
2257 		error = "port";
2258 		break;
2259 	case SYS_RES_MEMORY:
2260 		bit = PCIM_CMD_MEMEN;
2261 		error = "memory";
2262 		break;
2263 	default:
2264 		return (EINVAL);
2265 	}
2266 	pci_set_command_bit(dev, child, bit);
2267 	/* Some devices seem to need a brief stall here, what do to? */
2268 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2269 	if (command & bit)
2270 		return (0);
2271 	device_printf(child, "failed to enable %s mapping!\n", error);
2272 	return (ENXIO);
2273 }
2274 
2275 int
2276 pci_disable_io_method(device_t dev, device_t child, int space)
2277 {
2278 	uint16_t command;
2279 	uint16_t bit;
2280 	char *error;
2281 
2282 	bit = 0;
2283 	error = NULL;
2284 
2285 	switch(space) {
2286 	case SYS_RES_IOPORT:
2287 		bit = PCIM_CMD_PORTEN;
2288 		error = "port";
2289 		break;
2290 	case SYS_RES_MEMORY:
2291 		bit = PCIM_CMD_MEMEN;
2292 		error = "memory";
2293 		break;
2294 	default:
2295 		return (EINVAL);
2296 	}
2297 	pci_clear_command_bit(dev, child, bit);
2298 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2299 	if (command & bit) {
2300 		device_printf(child, "failed to disable %s mapping!\n", error);
2301 		return (ENXIO);
2302 	}
2303 	return (0);
2304 }
2305 
2306 /*
2307  * New style pci driver.  Parent device is either a pci-host-bridge or a
2308  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2309  */
2310 
2311 void
2312 pci_print_verbose(struct pci_devinfo *dinfo)
2313 {
2314 
2315 	if (bootverbose) {
2316 		pcicfgregs *cfg = &dinfo->cfg;
2317 
2318 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2319 		    cfg->vendor, cfg->device, cfg->revid);
2320 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2321 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2322 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2323 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2324 		    cfg->mfdev);
2325 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2326 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2327 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2328 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2329 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2330 		if (cfg->intpin > 0)
2331 			kprintf("\tintpin=%c, irq=%d\n",
2332 			    cfg->intpin +'a' -1, cfg->intline);
2333 		if (cfg->pp.pp_cap) {
2334 			uint16_t status;
2335 
2336 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2337 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2338 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2339 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2340 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2341 			    status & PCIM_PSTAT_DMASK);
2342 		}
2343 		if (cfg->msi.msi_location) {
2344 			int ctrl;
2345 
2346 			ctrl = cfg->msi.msi_ctrl;
2347 			kprintf("\tMSI supports %d message%s%s%s\n",
2348 			    cfg->msi.msi_msgnum,
2349 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2350 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2351 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2352 		}
2353 		if (cfg->msix.msix_location) {
2354 			kprintf("\tMSI-X supports %d message%s ",
2355 			    cfg->msix.msix_msgnum,
2356 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2357 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2358 				kprintf("in map 0x%x\n",
2359 				    cfg->msix.msix_table_bar);
2360 			else
2361 				kprintf("in maps 0x%x and 0x%x\n",
2362 				    cfg->msix.msix_table_bar,
2363 				    cfg->msix.msix_pba_bar);
2364 		}
2365 		pci_print_verbose_expr(cfg);
2366 	}
2367 }
2368 
2369 static void
2370 pci_print_verbose_expr(const pcicfgregs *cfg)
2371 {
2372 	const struct pcicfg_expr *expr = &cfg->expr;
2373 	const char *port_name;
2374 	uint16_t port_type;
2375 
2376 	if (!bootverbose)
2377 		return;
2378 
2379 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2380 		return;
2381 
2382 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2383 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2384 
2385 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2386 
2387 	switch (port_type) {
2388 	case PCIE_END_POINT:
2389 		port_name = "DEVICE";
2390 		break;
2391 	case PCIE_LEG_END_POINT:
2392 		port_name = "LEGDEV";
2393 		break;
2394 	case PCIE_ROOT_PORT:
2395 		port_name = "ROOT";
2396 		break;
2397 	case PCIE_UP_STREAM_PORT:
2398 		port_name = "UPSTREAM";
2399 		break;
2400 	case PCIE_DOWN_STREAM_PORT:
2401 		port_name = "DOWNSTRM";
2402 		break;
2403 	case PCIE_PCIE2PCI_BRIDGE:
2404 		port_name = "PCIE2PCI";
2405 		break;
2406 	case PCIE_PCI2PCIE_BRIDGE:
2407 		port_name = "PCI2PCIE";
2408 		break;
2409 	case PCIE_ROOT_END_POINT:
2410 		port_name = "ROOTDEV";
2411 		break;
2412 	case PCIE_ROOT_EVT_COLL:
2413 		port_name = "ROOTEVTC";
2414 		break;
2415 	default:
2416 		port_name = NULL;
2417 		break;
2418 	}
2419 	if ((port_type == PCIE_ROOT_PORT ||
2420 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2421 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2422 		port_name = NULL;
2423 	if (port_name != NULL)
2424 		kprintf("[%s]", port_name);
2425 
2426 	if (pcie_slotimpl(cfg)) {
2427 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2428 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2429 			kprintf("[HOTPLUG]");
2430 	}
2431 	kprintf("\n");
2432 }
2433 
2434 static int
2435 pci_porten(device_t pcib, int b, int s, int f)
2436 {
2437 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2438 		& PCIM_CMD_PORTEN) != 0;
2439 }
2440 
2441 static int
2442 pci_memen(device_t pcib, int b, int s, int f)
2443 {
2444 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2445 		& PCIM_CMD_MEMEN) != 0;
2446 }
2447 
2448 /*
2449  * Add a resource based on a pci map register. Return 1 if the map
2450  * register is a 32bit map register or 2 if it is a 64bit register.
2451  */
2452 static int
2453 pci_add_map(device_t pcib, device_t bus, device_t dev,
2454     int b, int s, int f, int reg, struct resource_list *rl, int force,
2455     int prefetch)
2456 {
2457 	uint32_t map;
2458 	pci_addr_t base;
2459 	pci_addr_t start, end, count;
2460 	uint8_t ln2size;
2461 	uint8_t ln2range;
2462 	uint32_t testval;
2463 	uint16_t cmd;
2464 	int type;
2465 	int barlen;
2466 	struct resource *res;
2467 
2468 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2469 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2470 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2471 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2472 
2473 	if (PCI_BAR_MEM(map)) {
2474 		type = SYS_RES_MEMORY;
2475 		if (map & PCIM_BAR_MEM_PREFETCH)
2476 			prefetch = 1;
2477 	} else
2478 		type = SYS_RES_IOPORT;
2479 	ln2size = pci_mapsize(testval);
2480 	ln2range = pci_maprange(testval);
2481 	base = pci_mapbase(map);
2482 	barlen = ln2range == 64 ? 2 : 1;
2483 
2484 	/*
2485 	 * For I/O registers, if bottom bit is set, and the next bit up
2486 	 * isn't clear, we know we have a BAR that doesn't conform to the
2487 	 * spec, so ignore it.  Also, sanity check the size of the data
2488 	 * areas to the type of memory involved.  Memory must be at least
2489 	 * 16 bytes in size, while I/O ranges must be at least 4.
2490 	 */
2491 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2492 		return (barlen);
2493 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2494 	    (type == SYS_RES_IOPORT && ln2size < 2))
2495 		return (barlen);
2496 
2497 	if (ln2range == 64)
2498 		/* Read the other half of a 64bit map register */
2499 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2500 	if (bootverbose) {
2501 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2502 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2503 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2504 			kprintf(", port disabled\n");
2505 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2506 			kprintf(", memory disabled\n");
2507 		else
2508 			kprintf(", enabled\n");
2509 	}
2510 
2511 	/*
2512 	 * If base is 0, then we have problems.  It is best to ignore
2513 	 * such entries for the moment.  These will be allocated later if
2514 	 * the driver specifically requests them.  However, some
2515 	 * removable busses look better when all resources are allocated,
2516 	 * so allow '0' to be overriden.
2517 	 *
2518 	 * Similarly treat maps whose values is the same as the test value
2519 	 * read back.  These maps have had all f's written to them by the
2520 	 * BIOS in an attempt to disable the resources.
2521 	 */
2522 	if (!force && (base == 0 || map == testval))
2523 		return (barlen);
2524 	if ((u_long)base != base) {
2525 		device_printf(bus,
2526 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2527 		    pci_get_domain(dev), b, s, f, reg);
2528 		return (barlen);
2529 	}
2530 
2531 	/*
2532 	 * This code theoretically does the right thing, but has
2533 	 * undesirable side effects in some cases where peripherals
2534 	 * respond oddly to having these bits enabled.  Let the user
2535 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2536 	 * default).
2537 	 */
2538 	if (pci_enable_io_modes) {
2539 		/* Turn on resources that have been left off by a lazy BIOS */
2540 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2541 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2542 			cmd |= PCIM_CMD_PORTEN;
2543 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2544 		}
2545 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2546 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2547 			cmd |= PCIM_CMD_MEMEN;
2548 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2549 		}
2550 	} else {
2551 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2552 			return (barlen);
2553 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2554 			return (barlen);
2555 	}
2556 
2557 	count = 1 << ln2size;
2558 	if (base == 0 || base == pci_mapbase(testval)) {
2559 		start = 0;	/* Let the parent decide. */
2560 		end = ~0ULL;
2561 	} else {
2562 		start = base;
2563 		end = base + (1 << ln2size) - 1;
2564 	}
2565 	resource_list_add(rl, type, reg, start, end, count, -1);
2566 
2567 	/*
2568 	 * Try to allocate the resource for this BAR from our parent
2569 	 * so that this resource range is already reserved.  The
2570 	 * driver for this device will later inherit this resource in
2571 	 * pci_alloc_resource().
2572 	 */
2573 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2574 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2575 	if (res == NULL) {
2576 		/*
2577 		 * If the allocation fails, delete the resource list
2578 		 * entry to force pci_alloc_resource() to allocate
2579 		 * resources from the parent.
2580 		 */
2581 		resource_list_delete(rl, type, reg);
2582 #ifdef PCI_BAR_CLEAR
2583 		/* Clear the BAR */
2584 		start = 0;
2585 #else	/* !PCI_BAR_CLEAR */
2586 		/*
2587 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2588 		 * PCI function, clearing the BAR causes HPET timer
2589 		 * stop ticking.
2590 		 */
2591 		if (bootverbose) {
2592 			kprintf("pci:%d:%d:%d: resource reservation failed "
2593 				"%#jx - %#jx\n", b, s, f,
2594 				(intmax_t)start, (intmax_t)end);
2595 		}
2596 		return (barlen);
2597 #endif	/* PCI_BAR_CLEAR */
2598 	} else {
2599 		start = rman_get_start(res);
2600 	}
2601 	pci_write_config(dev, reg, start, 4);
2602 	if (ln2range == 64)
2603 		pci_write_config(dev, reg + 4, start >> 32, 4);
2604 	return (barlen);
2605 }
2606 
2607 /*
2608  * For ATA devices we need to decide early what addressing mode to use.
2609  * Legacy demands that the primary and secondary ATA ports sits on the
2610  * same addresses that old ISA hardware did. This dictates that we use
2611  * those addresses and ignore the BAR's if we cannot set PCI native
2612  * addressing mode.
2613  */
2614 static void
2615 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2616     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2617 {
2618 	int rid, type, progif;
2619 #if 0
2620 	/* if this device supports PCI native addressing use it */
2621 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2622 	if ((progif & 0x8a) == 0x8a) {
2623 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2624 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2625 			kprintf("Trying ATA native PCI addressing mode\n");
2626 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2627 		}
2628 	}
2629 #endif
2630 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2631 	type = SYS_RES_IOPORT;
2632 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2633 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2634 		    prefetchmask & (1 << 0));
2635 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2636 		    prefetchmask & (1 << 1));
2637 	} else {
2638 		rid = PCIR_BAR(0);
2639 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2640 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2641 		    0, -1);
2642 		rid = PCIR_BAR(1);
2643 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2644 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2645 		    0, -1);
2646 	}
2647 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2648 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2649 		    prefetchmask & (1 << 2));
2650 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2651 		    prefetchmask & (1 << 3));
2652 	} else {
2653 		rid = PCIR_BAR(2);
2654 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2655 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2656 		    0, -1);
2657 		rid = PCIR_BAR(3);
2658 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2659 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2660 		    0, -1);
2661 	}
2662 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2663 	    prefetchmask & (1 << 4));
2664 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2665 	    prefetchmask & (1 << 5));
2666 }
2667 
2668 static void
2669 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2670 {
2671 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2672 	pcicfgregs *cfg = &dinfo->cfg;
2673 	char tunable_name[64];
2674 	int irq;
2675 
2676 	/* Has to have an intpin to have an interrupt. */
2677 	if (cfg->intpin == 0)
2678 		return;
2679 
2680 	/* Let the user override the IRQ with a tunable. */
2681 	irq = PCI_INVALID_IRQ;
2682 	ksnprintf(tunable_name, sizeof(tunable_name),
2683 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2684 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2685 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2686 		if (irq >= 255 || irq <= 0) {
2687 			irq = PCI_INVALID_IRQ;
2688 		} else {
2689 			BUS_CONFIG_INTR(bus, dev, irq,
2690 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2691 		}
2692 	}
2693 
2694 	/*
2695 	 * If we didn't get an IRQ via the tunable, then we either use the
2696 	 * IRQ value in the intline register or we ask the bus to route an
2697 	 * interrupt for us.  If force_route is true, then we only use the
2698 	 * value in the intline register if the bus was unable to assign an
2699 	 * IRQ.
2700 	 */
2701 	if (!PCI_INTERRUPT_VALID(irq)) {
2702 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2703 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2704 		if (!PCI_INTERRUPT_VALID(irq))
2705 			irq = cfg->intline;
2706 	}
2707 
2708 	/* If after all that we don't have an IRQ, just bail. */
2709 	if (!PCI_INTERRUPT_VALID(irq))
2710 		return;
2711 
2712 	/* Update the config register if it changed. */
2713 	if (irq != cfg->intline) {
2714 		cfg->intline = irq;
2715 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2716 	}
2717 
2718 	/* Add this IRQ as rid 0 interrupt resource. */
2719 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2720 	    machintr_legacy_intr_cpuid(irq));
2721 }
2722 
2723 void
2724 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2725 {
2726 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2727 	pcicfgregs *cfg = &dinfo->cfg;
2728 	struct resource_list *rl = &dinfo->resources;
2729 	struct pci_quirk *q;
2730 	int b, i, f, s;
2731 
2732 	b = cfg->bus;
2733 	s = cfg->slot;
2734 	f = cfg->func;
2735 
2736 	/* ATA devices needs special map treatment */
2737 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2738 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2739 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2740 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2741 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2742 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2743 	else
2744 		for (i = 0; i < cfg->nummaps;)
2745 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2746 			    rl, force, prefetchmask & (1 << i));
2747 
2748 	/*
2749 	 * Add additional, quirked resources.
2750 	 */
2751 	for (q = &pci_quirks[0]; q->devid; q++) {
2752 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2753 		    && q->type == PCI_QUIRK_MAP_REG)
2754 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2755 			  force, 0);
2756 	}
2757 
2758 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2759 		/*
2760 		 * Try to re-route interrupts. Sometimes the BIOS or
2761 		 * firmware may leave bogus values in these registers.
2762 		 * If the re-route fails, then just stick with what we
2763 		 * have.
2764 		 */
2765 		pci_assign_interrupt(bus, dev, 1);
2766 	}
2767 }
2768 
2769 void
2770 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2771 {
2772 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2773 	device_t pcib = device_get_parent(dev);
2774 	struct pci_devinfo *dinfo;
2775 	int maxslots;
2776 	int s, f, pcifunchigh;
2777 	uint8_t hdrtype;
2778 
2779 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2780 	    ("dinfo_size too small"));
2781 	maxslots = PCIB_MAXSLOTS(pcib);
2782 	for (s = 0; s <= maxslots; s++) {
2783 		pcifunchigh = 0;
2784 		f = 0;
2785 		DELAY(1);
2786 		hdrtype = REG(PCIR_HDRTYPE, 1);
2787 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2788 			continue;
2789 		if (hdrtype & PCIM_MFDEV)
2790 			pcifunchigh = PCI_FUNCMAX;
2791 		for (f = 0; f <= pcifunchigh; f++) {
2792 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2793 			    dinfo_size);
2794 			if (dinfo != NULL) {
2795 				pci_add_child(dev, dinfo);
2796 			}
2797 		}
2798 	}
2799 #undef REG
2800 }
2801 
2802 void
2803 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2804 {
2805 	device_t pcib;
2806 
2807 	pcib = device_get_parent(bus);
2808 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2809 	device_set_ivars(dinfo->cfg.dev, dinfo);
2810 	resource_list_init(&dinfo->resources);
2811 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2812 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2813 	pci_print_verbose(dinfo);
2814 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2815 }
2816 
2817 static int
2818 pci_probe(device_t dev)
2819 {
2820 	device_set_desc(dev, "PCI bus");
2821 
2822 	/* Allow other subclasses to override this driver. */
2823 	return (-1000);
2824 }
2825 
2826 static int
2827 pci_attach(device_t dev)
2828 {
2829 	int busno, domain;
2830 
2831 	/*
2832 	 * Since there can be multiple independantly numbered PCI
2833 	 * busses on systems with multiple PCI domains, we can't use
2834 	 * the unit number to decide which bus we are probing. We ask
2835 	 * the parent pcib what our domain and bus numbers are.
2836 	 */
2837 	domain = pcib_get_domain(dev);
2838 	busno = pcib_get_bus(dev);
2839 	if (bootverbose)
2840 		device_printf(dev, "domain=%d, physical bus=%d\n",
2841 		    domain, busno);
2842 
2843 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2844 
2845 	return (bus_generic_attach(dev));
2846 }
2847 
2848 int
2849 pci_suspend(device_t dev)
2850 {
2851 	int dstate, error, i, numdevs;
2852 	device_t acpi_dev, child, *devlist;
2853 	struct pci_devinfo *dinfo;
2854 
2855 	/*
2856 	 * Save the PCI configuration space for each child and set the
2857 	 * device in the appropriate power state for this sleep state.
2858 	 */
2859 	acpi_dev = NULL;
2860 	if (pci_do_power_resume)
2861 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2862 	device_get_children(dev, &devlist, &numdevs);
2863 	for (i = 0; i < numdevs; i++) {
2864 		child = devlist[i];
2865 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2866 		pci_cfg_save(child, dinfo, 0);
2867 	}
2868 
2869 	/* Suspend devices before potentially powering them down. */
2870 	error = bus_generic_suspend(dev);
2871 	if (error) {
2872 		kfree(devlist, M_TEMP);
2873 		return (error);
2874 	}
2875 
2876 	/*
2877 	 * Always set the device to D3.  If ACPI suggests a different
2878 	 * power state, use it instead.  If ACPI is not present, the
2879 	 * firmware is responsible for managing device power.  Skip
2880 	 * children who aren't attached since they are powered down
2881 	 * separately.  Only manage type 0 devices for now.
2882 	 */
2883 	for (i = 0; acpi_dev && i < numdevs; i++) {
2884 		child = devlist[i];
2885 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2886 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2887 			dstate = PCI_POWERSTATE_D3;
2888 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2889 			pci_set_powerstate(child, dstate);
2890 		}
2891 	}
2892 	kfree(devlist, M_TEMP);
2893 	return (0);
2894 }
2895 
2896 int
2897 pci_resume(device_t dev)
2898 {
2899 	int i, numdevs;
2900 	device_t acpi_dev, child, *devlist;
2901 	struct pci_devinfo *dinfo;
2902 
2903 	/*
2904 	 * Set each child to D0 and restore its PCI configuration space.
2905 	 */
2906 	acpi_dev = NULL;
2907 	if (pci_do_power_resume)
2908 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2909 	device_get_children(dev, &devlist, &numdevs);
2910 	for (i = 0; i < numdevs; i++) {
2911 		/*
2912 		 * Notify ACPI we're going to D0 but ignore the result.  If
2913 		 * ACPI is not present, the firmware is responsible for
2914 		 * managing device power.  Only manage type 0 devices for now.
2915 		 */
2916 		child = devlist[i];
2917 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2918 		if (acpi_dev && device_is_attached(child) &&
2919 		    dinfo->cfg.hdrtype == 0) {
2920 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2921 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2922 		}
2923 
2924 		/* Now the device is powered up, restore its config space. */
2925 		pci_cfg_restore(child, dinfo);
2926 	}
2927 	kfree(devlist, M_TEMP);
2928 	return (bus_generic_resume(dev));
2929 }
2930 
2931 static void
2932 pci_load_vendor_data(void)
2933 {
2934 	caddr_t vendordata, info;
2935 
2936 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2937 		info = preload_search_info(vendordata, MODINFO_ADDR);
2938 		pci_vendordata = *(char **)info;
2939 		info = preload_search_info(vendordata, MODINFO_SIZE);
2940 		pci_vendordata_size = *(size_t *)info;
2941 		/* terminate the database */
2942 		pci_vendordata[pci_vendordata_size] = '\n';
2943 	}
2944 }
2945 
2946 void
2947 pci_driver_added(device_t dev, driver_t *driver)
2948 {
2949 	int numdevs;
2950 	device_t *devlist;
2951 	device_t child;
2952 	struct pci_devinfo *dinfo;
2953 	int i;
2954 
2955 	if (bootverbose)
2956 		device_printf(dev, "driver added\n");
2957 	DEVICE_IDENTIFY(driver, dev);
2958 	device_get_children(dev, &devlist, &numdevs);
2959 	for (i = 0; i < numdevs; i++) {
2960 		child = devlist[i];
2961 		if (device_get_state(child) != DS_NOTPRESENT)
2962 			continue;
2963 		dinfo = device_get_ivars(child);
2964 		pci_print_verbose(dinfo);
2965 		if (bootverbose)
2966 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
2967 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2968 			    dinfo->cfg.func);
2969 		pci_cfg_restore(child, dinfo);
2970 		if (device_probe_and_attach(child) != 0)
2971 			pci_cfg_save(child, dinfo, 1);
2972 	}
2973 	kfree(devlist, M_TEMP);
2974 }
2975 
2976 static void
2977 pci_child_detached(device_t parent __unused, device_t child)
2978 {
2979 	/* Turn child's power off */
2980 	pci_cfg_save(child, device_get_ivars(child), 1);
2981 }
2982 
2983 int
2984 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2985     driver_intr_t *intr, void *arg, void **cookiep,
2986     lwkt_serialize_t serializer, const char *desc)
2987 {
2988 	int rid, error;
2989 	void *cookie;
2990 
2991 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
2992 	    arg, &cookie, serializer, desc);
2993 	if (error)
2994 		return (error);
2995 
2996 	/* If this is not a direct child, just bail out. */
2997 	if (device_get_parent(child) != dev) {
2998 		*cookiep = cookie;
2999 		return(0);
3000 	}
3001 
3002 	rid = rman_get_rid(irq);
3003 	if (rid == 0) {
3004 		/* Make sure that INTx is enabled */
3005 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3006 	} else {
3007 		struct pci_devinfo *dinfo = device_get_ivars(child);
3008 		uint64_t addr;
3009 		uint32_t data;
3010 
3011 		/*
3012 		 * Check to see if the interrupt is MSI or MSI-X.
3013 		 * Ask our parent to map the MSI and give
3014 		 * us the address and data register values.
3015 		 * If we fail for some reason, teardown the
3016 		 * interrupt handler.
3017 		 */
3018 		if (dinfo->cfg.msi.msi_alloc > 0) {
3019 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3020 
3021 			if (msi->msi_addr == 0) {
3022 				KASSERT(msi->msi_handlers == 0,
3023 			    ("MSI has handlers, but vectors not mapped"));
3024 				error = PCIB_MAP_MSI(device_get_parent(dev),
3025 				    child, rman_get_start(irq), &addr, &data,
3026 				    rman_get_cpuid(irq));
3027 				if (error)
3028 					goto bad;
3029 				msi->msi_addr = addr;
3030 				msi->msi_data = data;
3031 				pci_enable_msi(child, addr, data);
3032 			}
3033 			msi->msi_handlers++;
3034 		} else {
3035 			struct msix_vector *mv;
3036 			u_int vector;
3037 
3038 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3039 			    ("No MSI-X or MSI rid %d allocated", rid));
3040 
3041 			mv = pci_find_msix_vector(child, rid);
3042 			KASSERT(mv != NULL,
3043 			    ("MSI-X rid %d is not allocated", rid));
3044 			KASSERT(mv->mv_address == 0,
3045 			    ("MSI-X rid %d has been setup", rid));
3046 
3047 			error = PCIB_MAP_MSI(device_get_parent(dev),
3048 			    child, rman_get_start(irq), &addr, &data,
3049 			    rman_get_cpuid(irq));
3050 			if (error)
3051 				goto bad;
3052 			mv->mv_address = addr;
3053 			mv->mv_data = data;
3054 
3055 			vector = PCI_MSIX_RID2VEC(rid);
3056 			pci_setup_msix_vector(child, vector,
3057 			    mv->mv_address, mv->mv_data);
3058 			pci_unmask_msix_vector(child, vector);
3059 		}
3060 
3061 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3062 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3063 	bad:
3064 		if (error) {
3065 			(void)bus_generic_teardown_intr(dev, child, irq,
3066 			    cookie);
3067 			return (error);
3068 		}
3069 	}
3070 	*cookiep = cookie;
3071 	return (0);
3072 }
3073 
3074 int
3075 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3076     void *cookie)
3077 {
3078 	int rid, error;
3079 
3080 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3081 		return (EINVAL);
3082 
3083 	/* If this isn't a direct child, just bail out */
3084 	if (device_get_parent(child) != dev)
3085 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3086 
3087 	rid = rman_get_rid(irq);
3088 	if (rid == 0) {
3089 		/* Mask INTx */
3090 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3091 	} else {
3092 		struct pci_devinfo *dinfo = device_get_ivars(child);
3093 
3094 		/*
3095 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3096 		 * decrement the appropriate handlers count and mask the
3097 		 * MSI-X message, or disable MSI messages if the count
3098 		 * drops to 0.
3099 		 */
3100 		if (dinfo->cfg.msi.msi_alloc > 0) {
3101 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3102 
3103 			KASSERT(rid <= msi->msi_alloc,
3104 			    ("MSI-X index too high"));
3105 			KASSERT(msi->msi_handlers > 0,
3106 			    ("MSI rid %d is not setup", rid));
3107 
3108 			msi->msi_handlers--;
3109 			if (msi->msi_handlers == 0)
3110 				pci_disable_msi(child);
3111 		} else {
3112 			struct msix_vector *mv;
3113 
3114 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3115 			    ("No MSI or MSI-X rid %d allocated", rid));
3116 
3117 			mv = pci_find_msix_vector(child, rid);
3118 			KASSERT(mv != NULL,
3119 			    ("MSI-X rid %d is not allocated", rid));
3120 			KASSERT(mv->mv_address != 0,
3121 			    ("MSI-X rid %d has not been setup", rid));
3122 
3123 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3124 			mv->mv_address = 0;
3125 			mv->mv_data = 0;
3126 		}
3127 	}
3128 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3129 	if (rid > 0)
3130 		KASSERT(error == 0,
3131 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3132 	return (error);
3133 }
3134 
3135 int
3136 pci_print_child(device_t dev, device_t child)
3137 {
3138 	struct pci_devinfo *dinfo;
3139 	struct resource_list *rl;
3140 	int retval = 0;
3141 
3142 	dinfo = device_get_ivars(child);
3143 	rl = &dinfo->resources;
3144 
3145 	retval += bus_print_child_header(dev, child);
3146 
3147 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3148 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3149 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3150 	if (device_get_flags(dev))
3151 		retval += kprintf(" flags %#x", device_get_flags(dev));
3152 
3153 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3154 	    pci_get_function(child));
3155 
3156 	retval += bus_print_child_footer(dev, child);
3157 
3158 	return (retval);
3159 }
3160 
3161 static struct
3162 {
3163 	int	class;
3164 	int	subclass;
3165 	char	*desc;
3166 } pci_nomatch_tab[] = {
3167 	{PCIC_OLD,		-1,			"old"},
3168 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3169 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3170 	{PCIC_STORAGE,		-1,			"mass storage"},
3171 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3172 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3173 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3174 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3175 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3176 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3177 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3178 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3179 	{PCIC_NETWORK,		-1,			"network"},
3180 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3181 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3182 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3183 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3184 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3185 	{PCIC_DISPLAY,		-1,			"display"},
3186 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3187 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3188 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3189 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3190 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3191 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3192 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3193 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3194 	{PCIC_MEMORY,		-1,			"memory"},
3195 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3196 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3197 	{PCIC_BRIDGE,		-1,			"bridge"},
3198 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3199 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3200 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3201 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3202 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3203 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3204 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3205 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3206 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3207 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3208 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3209 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3210 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3211 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3212 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3213 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3214 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3215 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3216 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3217 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3218 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3219 	{PCIC_INPUTDEV,		-1,			"input device"},
3220 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3221 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3222 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3223 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3224 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3225 	{PCIC_DOCKING,		-1,			"docking station"},
3226 	{PCIC_PROCESSOR,	-1,			"processor"},
3227 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3228 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3229 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3230 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3231 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3232 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3233 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3234 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3235 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3236 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3237 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3238 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3239 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3240 	{PCIC_SATCOM,		-1,			"satellite communication"},
3241 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3242 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3243 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3244 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3245 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3246 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3247 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3248 	{PCIC_DASP,		-1,			"dasp"},
3249 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3250 	{0, 0,		NULL}
3251 };
3252 
3253 void
3254 pci_probe_nomatch(device_t dev, device_t child)
3255 {
3256 	int	i;
3257 	char	*cp, *scp, *device;
3258 
3259 	/*
3260 	 * Look for a listing for this device in a loaded device database.
3261 	 */
3262 	if ((device = pci_describe_device(child)) != NULL) {
3263 		device_printf(dev, "<%s>", device);
3264 		kfree(device, M_DEVBUF);
3265 	} else {
3266 		/*
3267 		 * Scan the class/subclass descriptions for a general
3268 		 * description.
3269 		 */
3270 		cp = "unknown";
3271 		scp = NULL;
3272 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3273 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3274 				if (pci_nomatch_tab[i].subclass == -1) {
3275 					cp = pci_nomatch_tab[i].desc;
3276 				} else if (pci_nomatch_tab[i].subclass ==
3277 				    pci_get_subclass(child)) {
3278 					scp = pci_nomatch_tab[i].desc;
3279 				}
3280 			}
3281 		}
3282 		device_printf(dev, "<%s%s%s>",
3283 		    cp ? cp : "",
3284 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3285 		    scp ? scp : "");
3286 	}
3287 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3288 		pci_get_vendor(child), pci_get_device(child),
3289 		pci_get_slot(child), pci_get_function(child));
3290 	if (pci_get_intpin(child) > 0) {
3291 		int irq;
3292 
3293 		irq = pci_get_irq(child);
3294 		if (PCI_INTERRUPT_VALID(irq))
3295 			kprintf(" irq %d", irq);
3296 	}
3297 	kprintf("\n");
3298 
3299 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3300 }
3301 
3302 /*
3303  * Parse the PCI device database, if loaded, and return a pointer to a
3304  * description of the device.
3305  *
3306  * The database is flat text formatted as follows:
3307  *
3308  * Any line not in a valid format is ignored.
3309  * Lines are terminated with newline '\n' characters.
3310  *
3311  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3312  * the vendor name.
3313  *
3314  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3315  * - devices cannot be listed without a corresponding VENDOR line.
3316  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3317  * another TAB, then the device name.
3318  */
3319 
3320 /*
3321  * Assuming (ptr) points to the beginning of a line in the database,
3322  * return the vendor or device and description of the next entry.
3323  * The value of (vendor) or (device) inappropriate for the entry type
3324  * is set to -1.  Returns nonzero at the end of the database.
3325  *
3326  * Note that this is slightly unrobust in the face of corrupt data;
3327  * we attempt to safeguard against this by spamming the end of the
3328  * database with a newline when we initialise.
3329  */
3330 static int
3331 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3332 {
3333 	char	*cp = *ptr;
3334 	int	left;
3335 
3336 	*device = -1;
3337 	*vendor = -1;
3338 	**desc = '\0';
3339 	for (;;) {
3340 		left = pci_vendordata_size - (cp - pci_vendordata);
3341 		if (left <= 0) {
3342 			*ptr = cp;
3343 			return(1);
3344 		}
3345 
3346 		/* vendor entry? */
3347 		if (*cp != '\t' &&
3348 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3349 			break;
3350 		/* device entry? */
3351 		if (*cp == '\t' &&
3352 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3353 			break;
3354 
3355 		/* skip to next line */
3356 		while (*cp != '\n' && left > 0) {
3357 			cp++;
3358 			left--;
3359 		}
3360 		if (*cp == '\n') {
3361 			cp++;
3362 			left--;
3363 		}
3364 	}
3365 	/* skip to next line */
3366 	while (*cp != '\n' && left > 0) {
3367 		cp++;
3368 		left--;
3369 	}
3370 	if (*cp == '\n' && left > 0)
3371 		cp++;
3372 	*ptr = cp;
3373 	return(0);
3374 }
3375 
3376 static char *
3377 pci_describe_device(device_t dev)
3378 {
3379 	int	vendor, device;
3380 	char	*desc, *vp, *dp, *line;
3381 
3382 	desc = vp = dp = NULL;
3383 
3384 	/*
3385 	 * If we have no vendor data, we can't do anything.
3386 	 */
3387 	if (pci_vendordata == NULL)
3388 		goto out;
3389 
3390 	/*
3391 	 * Scan the vendor data looking for this device
3392 	 */
3393 	line = pci_vendordata;
3394 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3395 		goto out;
3396 	for (;;) {
3397 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3398 			goto out;
3399 		if (vendor == pci_get_vendor(dev))
3400 			break;
3401 	}
3402 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3403 		goto out;
3404 	for (;;) {
3405 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3406 			*dp = 0;
3407 			break;
3408 		}
3409 		if (vendor != -1) {
3410 			*dp = 0;
3411 			break;
3412 		}
3413 		if (device == pci_get_device(dev))
3414 			break;
3415 	}
3416 	if (dp[0] == '\0')
3417 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3418 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3419 	    NULL)
3420 		ksprintf(desc, "%s, %s", vp, dp);
3421  out:
3422 	if (vp != NULL)
3423 		kfree(vp, M_DEVBUF);
3424 	if (dp != NULL)
3425 		kfree(dp, M_DEVBUF);
3426 	return(desc);
3427 }
3428 
3429 int
3430 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3431 {
3432 	struct pci_devinfo *dinfo;
3433 	pcicfgregs *cfg;
3434 
3435 	dinfo = device_get_ivars(child);
3436 	cfg = &dinfo->cfg;
3437 
3438 	switch (which) {
3439 	case PCI_IVAR_ETHADDR:
3440 		/*
3441 		 * The generic accessor doesn't deal with failure, so
3442 		 * we set the return value, then return an error.
3443 		 */
3444 		*((uint8_t **) result) = NULL;
3445 		return (EINVAL);
3446 	case PCI_IVAR_SUBVENDOR:
3447 		*result = cfg->subvendor;
3448 		break;
3449 	case PCI_IVAR_SUBDEVICE:
3450 		*result = cfg->subdevice;
3451 		break;
3452 	case PCI_IVAR_VENDOR:
3453 		*result = cfg->vendor;
3454 		break;
3455 	case PCI_IVAR_DEVICE:
3456 		*result = cfg->device;
3457 		break;
3458 	case PCI_IVAR_DEVID:
3459 		*result = (cfg->device << 16) | cfg->vendor;
3460 		break;
3461 	case PCI_IVAR_CLASS:
3462 		*result = cfg->baseclass;
3463 		break;
3464 	case PCI_IVAR_SUBCLASS:
3465 		*result = cfg->subclass;
3466 		break;
3467 	case PCI_IVAR_PROGIF:
3468 		*result = cfg->progif;
3469 		break;
3470 	case PCI_IVAR_REVID:
3471 		*result = cfg->revid;
3472 		break;
3473 	case PCI_IVAR_INTPIN:
3474 		*result = cfg->intpin;
3475 		break;
3476 	case PCI_IVAR_IRQ:
3477 		*result = cfg->intline;
3478 		break;
3479 	case PCI_IVAR_DOMAIN:
3480 		*result = cfg->domain;
3481 		break;
3482 	case PCI_IVAR_BUS:
3483 		*result = cfg->bus;
3484 		break;
3485 	case PCI_IVAR_SLOT:
3486 		*result = cfg->slot;
3487 		break;
3488 	case PCI_IVAR_FUNCTION:
3489 		*result = cfg->func;
3490 		break;
3491 	case PCI_IVAR_CMDREG:
3492 		*result = cfg->cmdreg;
3493 		break;
3494 	case PCI_IVAR_CACHELNSZ:
3495 		*result = cfg->cachelnsz;
3496 		break;
3497 	case PCI_IVAR_MINGNT:
3498 		*result = cfg->mingnt;
3499 		break;
3500 	case PCI_IVAR_MAXLAT:
3501 		*result = cfg->maxlat;
3502 		break;
3503 	case PCI_IVAR_LATTIMER:
3504 		*result = cfg->lattimer;
3505 		break;
3506 	case PCI_IVAR_PCIXCAP_PTR:
3507 		*result = cfg->pcix.pcix_ptr;
3508 		break;
3509 	case PCI_IVAR_PCIECAP_PTR:
3510 		*result = cfg->expr.expr_ptr;
3511 		break;
3512 	case PCI_IVAR_VPDCAP_PTR:
3513 		*result = cfg->vpd.vpd_reg;
3514 		break;
3515 	default:
3516 		return (ENOENT);
3517 	}
3518 	return (0);
3519 }
3520 
3521 int
3522 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3523 {
3524 	struct pci_devinfo *dinfo;
3525 
3526 	dinfo = device_get_ivars(child);
3527 
3528 	switch (which) {
3529 	case PCI_IVAR_INTPIN:
3530 		dinfo->cfg.intpin = value;
3531 		return (0);
3532 	case PCI_IVAR_ETHADDR:
3533 	case PCI_IVAR_SUBVENDOR:
3534 	case PCI_IVAR_SUBDEVICE:
3535 	case PCI_IVAR_VENDOR:
3536 	case PCI_IVAR_DEVICE:
3537 	case PCI_IVAR_DEVID:
3538 	case PCI_IVAR_CLASS:
3539 	case PCI_IVAR_SUBCLASS:
3540 	case PCI_IVAR_PROGIF:
3541 	case PCI_IVAR_REVID:
3542 	case PCI_IVAR_IRQ:
3543 	case PCI_IVAR_DOMAIN:
3544 	case PCI_IVAR_BUS:
3545 	case PCI_IVAR_SLOT:
3546 	case PCI_IVAR_FUNCTION:
3547 		return (EINVAL);	/* disallow for now */
3548 
3549 	default:
3550 		return (ENOENT);
3551 	}
3552 }
3553 #ifdef notyet
3554 #include "opt_ddb.h"
3555 #ifdef DDB
3556 #include <ddb/ddb.h>
3557 #include <sys/cons.h>
3558 
3559 /*
3560  * List resources based on pci map registers, used for within ddb
3561  */
3562 
3563 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3564 {
3565 	struct pci_devinfo *dinfo;
3566 	struct devlist *devlist_head;
3567 	struct pci_conf *p;
3568 	const char *name;
3569 	int i, error, none_count;
3570 
3571 	none_count = 0;
3572 	/* get the head of the device queue */
3573 	devlist_head = &pci_devq;
3574 
3575 	/*
3576 	 * Go through the list of devices and print out devices
3577 	 */
3578 	for (error = 0, i = 0,
3579 	     dinfo = STAILQ_FIRST(devlist_head);
3580 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3581 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3582 
3583 		/* Populate pd_name and pd_unit */
3584 		name = NULL;
3585 		if (dinfo->cfg.dev)
3586 			name = device_get_name(dinfo->cfg.dev);
3587 
3588 		p = &dinfo->conf;
3589 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3590 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3591 			(name && *name) ? name : "none",
3592 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3593 			none_count++,
3594 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3595 			p->pc_sel.pc_func, (p->pc_class << 16) |
3596 			(p->pc_subclass << 8) | p->pc_progif,
3597 			(p->pc_subdevice << 16) | p->pc_subvendor,
3598 			(p->pc_device << 16) | p->pc_vendor,
3599 			p->pc_revid, p->pc_hdr);
3600 	}
3601 }
3602 #endif /* DDB */
3603 #endif
3604 
3605 static struct resource *
3606 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3607     u_long start, u_long end, u_long count, u_int flags)
3608 {
3609 	struct pci_devinfo *dinfo = device_get_ivars(child);
3610 	struct resource_list *rl = &dinfo->resources;
3611 	struct resource_list_entry *rle;
3612 	struct resource *res;
3613 	pci_addr_t map, testval;
3614 	int mapsize;
3615 
3616 	/*
3617 	 * Weed out the bogons, and figure out how large the BAR/map
3618 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3619 	 * Note: atapci in legacy mode are special and handled elsewhere
3620 	 * in the code.  If you have a atapci device in legacy mode and
3621 	 * it fails here, that other code is broken.
3622 	 */
3623 	res = NULL;
3624 	map = pci_read_config(child, *rid, 4);
3625 	pci_write_config(child, *rid, 0xffffffff, 4);
3626 	testval = pci_read_config(child, *rid, 4);
3627 	if (pci_maprange(testval) == 64)
3628 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3629 	if (pci_mapbase(testval) == 0)
3630 		goto out;
3631 
3632 	/*
3633 	 * Restore the original value of the BAR.  We may have reprogrammed
3634 	 * the BAR of the low-level console device and when booting verbose,
3635 	 * we need the console device addressable.
3636 	 */
3637 	pci_write_config(child, *rid, map, 4);
3638 
3639 	if (PCI_BAR_MEM(testval)) {
3640 		if (type != SYS_RES_MEMORY) {
3641 			if (bootverbose)
3642 				device_printf(dev,
3643 				    "child %s requested type %d for rid %#x,"
3644 				    " but the BAR says it is an memio\n",
3645 				    device_get_nameunit(child), type, *rid);
3646 			goto out;
3647 		}
3648 	} else {
3649 		if (type != SYS_RES_IOPORT) {
3650 			if (bootverbose)
3651 				device_printf(dev,
3652 				    "child %s requested type %d for rid %#x,"
3653 				    " but the BAR says it is an ioport\n",
3654 				    device_get_nameunit(child), type, *rid);
3655 			goto out;
3656 		}
3657 	}
3658 	/*
3659 	 * For real BARs, we need to override the size that
3660 	 * the driver requests, because that's what the BAR
3661 	 * actually uses and we would otherwise have a
3662 	 * situation where we might allocate the excess to
3663 	 * another driver, which won't work.
3664 	 */
3665 	mapsize = pci_mapsize(testval);
3666 	count = 1UL << mapsize;
3667 	if (RF_ALIGNMENT(flags) < mapsize)
3668 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3669 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3670 		flags |= RF_PREFETCHABLE;
3671 
3672 	/*
3673 	 * Allocate enough resource, and then write back the
3674 	 * appropriate bar for that resource.
3675 	 */
3676 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3677 	    start, end, count, flags, -1);
3678 	if (res == NULL) {
3679 		device_printf(child,
3680 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3681 		    count, *rid, type, start, end);
3682 		goto out;
3683 	}
3684 	resource_list_add(rl, type, *rid, start, end, count, -1);
3685 	rle = resource_list_find(rl, type, *rid);
3686 	if (rle == NULL)
3687 		panic("pci_alloc_map: unexpectedly can't find resource.");
3688 	rle->res = res;
3689 	rle->start = rman_get_start(res);
3690 	rle->end = rman_get_end(res);
3691 	rle->count = count;
3692 	if (bootverbose)
3693 		device_printf(child,
3694 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3695 		    count, *rid, type, rman_get_start(res));
3696 	map = rman_get_start(res);
3697 out:;
3698 	pci_write_config(child, *rid, map, 4);
3699 	if (pci_maprange(testval) == 64)
3700 		pci_write_config(child, *rid + 4, map >> 32, 4);
3701 	return (res);
3702 }
3703 
3704 
3705 struct resource *
3706 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3707     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3708 {
3709 	struct pci_devinfo *dinfo = device_get_ivars(child);
3710 	struct resource_list *rl = &dinfo->resources;
3711 	struct resource_list_entry *rle;
3712 	pcicfgregs *cfg = &dinfo->cfg;
3713 
3714 	/*
3715 	 * Perform lazy resource allocation
3716 	 */
3717 	if (device_get_parent(child) == dev) {
3718 		switch (type) {
3719 		case SYS_RES_IRQ:
3720 			/*
3721 			 * Can't alloc legacy interrupt once MSI messages
3722 			 * have been allocated.
3723 			 */
3724 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3725 			    cfg->msix.msix_alloc > 0))
3726 				return (NULL);
3727 			/*
3728 			 * If the child device doesn't have an
3729 			 * interrupt routed and is deserving of an
3730 			 * interrupt, try to assign it one.
3731 			 */
3732 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3733 			    (cfg->intpin != 0))
3734 				pci_assign_interrupt(dev, child, 0);
3735 			break;
3736 		case SYS_RES_IOPORT:
3737 		case SYS_RES_MEMORY:
3738 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3739 				/*
3740 				 * Enable the I/O mode.  We should
3741 				 * also be assigning resources too
3742 				 * when none are present.  The
3743 				 * resource_list_alloc kind of sorta does
3744 				 * this...
3745 				 */
3746 				if (PCI_ENABLE_IO(dev, child, type))
3747 					return (NULL);
3748 			}
3749 			rle = resource_list_find(rl, type, *rid);
3750 			if (rle == NULL)
3751 				return (pci_alloc_map(dev, child, type, rid,
3752 				    start, end, count, flags));
3753 			break;
3754 		}
3755 		/*
3756 		 * If we've already allocated the resource, then
3757 		 * return it now.  But first we may need to activate
3758 		 * it, since we don't allocate the resource as active
3759 		 * above.  Normally this would be done down in the
3760 		 * nexus, but since we short-circuit that path we have
3761 		 * to do its job here.  Not sure if we should kfree the
3762 		 * resource if it fails to activate.
3763 		 */
3764 		rle = resource_list_find(rl, type, *rid);
3765 		if (rle != NULL && rle->res != NULL) {
3766 			if (bootverbose)
3767 				device_printf(child,
3768 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3769 				    rman_get_size(rle->res), *rid, type,
3770 				    rman_get_start(rle->res));
3771 			if ((flags & RF_ACTIVE) &&
3772 			    bus_generic_activate_resource(dev, child, type,
3773 			    *rid, rle->res) != 0)
3774 				return (NULL);
3775 			return (rle->res);
3776 		}
3777 	}
3778 	return (resource_list_alloc(rl, dev, child, type, rid,
3779 	    start, end, count, flags, cpuid));
3780 }
3781 
3782 void
3783 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3784 {
3785 	struct pci_devinfo *dinfo;
3786 	struct resource_list *rl;
3787 	struct resource_list_entry *rle;
3788 
3789 	if (device_get_parent(child) != dev)
3790 		return;
3791 
3792 	dinfo = device_get_ivars(child);
3793 	rl = &dinfo->resources;
3794 	rle = resource_list_find(rl, type, rid);
3795 	if (rle) {
3796 		if (rle->res) {
3797 			if (rman_get_device(rle->res) != dev ||
3798 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3799 				device_printf(dev, "delete_resource: "
3800 				    "Resource still owned by child, oops. "
3801 				    "(type=%d, rid=%d, addr=%lx)\n",
3802 				    rle->type, rle->rid,
3803 				    rman_get_start(rle->res));
3804 				return;
3805 			}
3806 			bus_release_resource(dev, type, rid, rle->res);
3807 		}
3808 		resource_list_delete(rl, type, rid);
3809 	}
3810 	/*
3811 	 * Why do we turn off the PCI configuration BAR when we delete a
3812 	 * resource? -- imp
3813 	 */
3814 	pci_write_config(child, rid, 0, 4);
3815 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3816 }
3817 
3818 struct resource_list *
3819 pci_get_resource_list (device_t dev, device_t child)
3820 {
3821 	struct pci_devinfo *dinfo = device_get_ivars(child);
3822 
3823 	if (dinfo == NULL)
3824 		return (NULL);
3825 
3826 	return (&dinfo->resources);
3827 }
3828 
3829 uint32_t
3830 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3831 {
3832 	struct pci_devinfo *dinfo = device_get_ivars(child);
3833 	pcicfgregs *cfg = &dinfo->cfg;
3834 
3835 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3836 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3837 }
3838 
3839 void
3840 pci_write_config_method(device_t dev, device_t child, int reg,
3841     uint32_t val, int width)
3842 {
3843 	struct pci_devinfo *dinfo = device_get_ivars(child);
3844 	pcicfgregs *cfg = &dinfo->cfg;
3845 
3846 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3847 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3848 }
3849 
3850 int
3851 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3852     size_t buflen)
3853 {
3854 
3855 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3856 	    pci_get_function(child));
3857 	return (0);
3858 }
3859 
3860 int
3861 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3862     size_t buflen)
3863 {
3864 	struct pci_devinfo *dinfo;
3865 	pcicfgregs *cfg;
3866 
3867 	dinfo = device_get_ivars(child);
3868 	cfg = &dinfo->cfg;
3869 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3870 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3871 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3872 	    cfg->progif);
3873 	return (0);
3874 }
3875 
3876 int
3877 pci_assign_interrupt_method(device_t dev, device_t child)
3878 {
3879 	struct pci_devinfo *dinfo = device_get_ivars(child);
3880 	pcicfgregs *cfg = &dinfo->cfg;
3881 
3882 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3883 	    cfg->intpin));
3884 }
3885 
3886 static int
3887 pci_modevent(module_t mod, int what, void *arg)
3888 {
3889 	static struct cdev *pci_cdev;
3890 
3891 	switch (what) {
3892 	case MOD_LOAD:
3893 		STAILQ_INIT(&pci_devq);
3894 		pci_generation = 0;
3895 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3896 				    "pci");
3897 		pci_load_vendor_data();
3898 		break;
3899 
3900 	case MOD_UNLOAD:
3901 		destroy_dev(pci_cdev);
3902 		break;
3903 	}
3904 
3905 	return (0);
3906 }
3907 
3908 void
3909 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3910 {
3911 	int i;
3912 
3913 	/*
3914 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3915 	 * which we know need special treatment.  Type 2 devices are
3916 	 * cardbus bridges which also require special treatment.
3917 	 * Other types are unknown, and we err on the side of safety
3918 	 * by ignoring them.
3919 	 */
3920 	if (dinfo->cfg.hdrtype != 0)
3921 		return;
3922 
3923 	/*
3924 	 * Restore the device to full power mode.  We must do this
3925 	 * before we restore the registers because moving from D3 to
3926 	 * D0 will cause the chip's BARs and some other registers to
3927 	 * be reset to some unknown power on reset values.  Cut down
3928 	 * the noise on boot by doing nothing if we are already in
3929 	 * state D0.
3930 	 */
3931 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3932 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3933 	}
3934 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3935 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3936 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3937 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3938 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3939 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3940 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3941 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3942 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3943 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3944 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3945 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3946 
3947 	/* Restore MSI and MSI-X configurations if they are present. */
3948 	if (dinfo->cfg.msi.msi_location != 0)
3949 		pci_resume_msi(dev);
3950 	if (dinfo->cfg.msix.msix_location != 0)
3951 		pci_resume_msix(dev);
3952 }
3953 
3954 void
3955 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3956 {
3957 	int i;
3958 	uint32_t cls;
3959 	int ps;
3960 
3961 	/*
3962 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3963 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3964 	 * which also require special treatment.  Other types are unknown, and
3965 	 * we err on the side of safety by ignoring them.  Powering down
3966 	 * bridges should not be undertaken lightly.
3967 	 */
3968 	if (dinfo->cfg.hdrtype != 0)
3969 		return;
3970 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3971 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3972 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3973 
3974 	/*
3975 	 * Some drivers apparently write to these registers w/o updating our
3976 	 * cached copy.  No harm happens if we update the copy, so do so here
3977 	 * so we can restore them.  The COMMAND register is modified by the
3978 	 * bus w/o updating the cache.  This should represent the normally
3979 	 * writable portion of the 'defined' part of type 0 headers.  In
3980 	 * theory we also need to save/restore the PCI capability structures
3981 	 * we know about, but apart from power we don't know any that are
3982 	 * writable.
3983 	 */
3984 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3985 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3986 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3987 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3988 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3989 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3990 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
3991 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
3992 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
3993 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
3994 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
3995 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
3996 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
3997 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
3998 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
3999 
4000 	/*
4001 	 * don't set the state for display devices, base peripherals and
4002 	 * memory devices since bad things happen when they are powered down.
4003 	 * We should (a) have drivers that can easily detach and (b) use
4004 	 * generic drivers for these devices so that some device actually
4005 	 * attaches.  We need to make sure that when we implement (a) we don't
4006 	 * power the device down on a reattach.
4007 	 */
4008 	cls = pci_get_class(dev);
4009 	if (!setstate)
4010 		return;
4011 	switch (pci_do_power_nodriver)
4012 	{
4013 		case 0:		/* NO powerdown at all */
4014 			return;
4015 		case 1:		/* Conservative about what to power down */
4016 			if (cls == PCIC_STORAGE)
4017 				return;
4018 			/*FALLTHROUGH*/
4019 		case 2:		/* Agressive about what to power down */
4020 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4021 			    cls == PCIC_BASEPERIPH)
4022 				return;
4023 			/*FALLTHROUGH*/
4024 		case 3:		/* Power down everything */
4025 			break;
4026 	}
4027 	/*
4028 	 * PCI spec says we can only go into D3 state from D0 state.
4029 	 * Transition from D[12] into D0 before going to D3 state.
4030 	 */
4031 	ps = pci_get_powerstate(dev);
4032 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4033 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4034 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4035 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4036 }
4037 
4038 #ifdef COMPAT_OLDPCI
4039 
4040 /*
4041  * Locate the parent of a PCI device by scanning the PCI devlist
4042  * and return the entry for the parent.
4043  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4044  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4045  */
4046 pcicfgregs *
4047 pci_devlist_get_parent(pcicfgregs *cfg)
4048 {
4049 	struct devlist *devlist_head;
4050 	struct pci_devinfo *dinfo;
4051 	pcicfgregs *bridge_cfg;
4052 	int i;
4053 
4054 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4055 
4056 	/* If the device is on PCI bus 0, look for the host */
4057 	if (cfg->bus == 0) {
4058 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4059 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4060 			bridge_cfg = &dinfo->cfg;
4061 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4062 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4063 		    		&& bridge_cfg->bus == cfg->bus) {
4064 				return bridge_cfg;
4065 			}
4066 		}
4067 	}
4068 
4069 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4070 	if (cfg->bus > 0) {
4071 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4072 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4073 			bridge_cfg = &dinfo->cfg;
4074 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4075 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4076 				&& bridge_cfg->secondarybus == cfg->bus) {
4077 				return bridge_cfg;
4078 			}
4079 		}
4080 	}
4081 
4082 	return NULL;
4083 }
4084 
4085 #endif	/* COMPAT_OLDPCI */
4086 
4087 int
4088 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4089 {
4090 	int rid, type;
4091 	u_int flags;
4092 
4093 	rid = 0;
4094 	type = PCI_INTR_TYPE_LEGACY;
4095 	flags = RF_SHAREABLE | RF_ACTIVE;
4096 
4097 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4098 	if (msi_enable) {
4099 		int cpu;
4100 
4101 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4102 		if (cpu >= ncpus)
4103 			cpu = ncpus - 1;
4104 
4105 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4106 			flags &= ~RF_SHAREABLE;
4107 			type = PCI_INTR_TYPE_MSI;
4108 		}
4109 	}
4110 
4111 	*rid0 = rid;
4112 	*flags0 = flags;
4113 
4114 	return type;
4115 }
4116