xref: /dragonfly/sys/bus/pci/pci.c (revision 783d47c4)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include "pcib_if.h"
63 #include "pci_if.h"
64 
65 #ifdef __HAVE_ACPI
66 #include <contrib/dev/acpica/acpi.h>
67 #include "acpi_if.h"
68 #else
69 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
70 #endif
71 
72 extern struct dev_ops pcic_ops;	/* XXX */
73 
74 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
75 
76 static uint32_t		pci_mapbase(unsigned mapreg);
77 static const char	*pci_maptype(unsigned mapreg);
78 static int		pci_mapsize(unsigned testval);
79 static int		pci_maprange(unsigned mapreg);
80 static void		pci_fixancient(pcicfgregs *cfg);
81 
82 static int		pci_porten(device_t pcib, int b, int s, int f);
83 static int		pci_memen(device_t pcib, int b, int s, int f);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
87 			    int b, int s, int f, int reg,
88 			    struct resource_list *rl, int force, int prefetch);
89 static int		pci_probe(device_t dev);
90 static int		pci_attach(device_t dev);
91 static void		pci_child_detached(device_t, device_t);
92 static void		pci_load_vendor_data(void);
93 static int		pci_describe_parse_line(char **ptr, int *vendor,
94 			    int *device, char **desc);
95 static char		*pci_describe_device(device_t dev);
96 static int		pci_modevent(module_t mod, int what, void *arg);
97 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
98 			    pcicfgregs *cfg);
99 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
100 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t *data);
102 #if 0
103 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t data);
105 #endif
106 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
107 static void		pci_disable_msi(device_t dev);
108 static void		pci_enable_msi(device_t dev, uint64_t address,
109 			    uint16_t data);
110 static void		pci_setup_msix_vector(device_t dev, u_int index,
111 			    uint64_t address, uint32_t data);
112 static void		pci_mask_msix_vector(device_t dev, u_int index);
113 static void		pci_unmask_msix_vector(device_t dev, u_int index);
114 static void		pci_mask_msix_allvectors(device_t dev);
115 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pcie_slotimpl(const pcicfgregs *);
120 static void		pci_print_verbose_expr(const pcicfgregs *);
121 
122 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_subvendor(device_t, int, int,
128 			    pcicfgregs *);
129 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 	DEVMETHOD(device_detach,	bus_generic_detach),
137 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
138 	DEVMETHOD(device_suspend,	pci_suspend),
139 	DEVMETHOD(device_resume,	pci_resume),
140 
141 	/* Bus interface */
142 	DEVMETHOD(bus_print_child,	pci_print_child),
143 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
144 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
145 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
146 	DEVMETHOD(bus_driver_added,	pci_driver_added),
147 	DEVMETHOD(bus_child_detached,	pci_child_detached),
148 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150 
151 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
152 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
153 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
154 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
155 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
156 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
157 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
159 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
160 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
161 
162 	/* PCI interface */
163 	DEVMETHOD(pci_read_config,	pci_read_config_method),
164 	DEVMETHOD(pci_write_config,	pci_write_config_method),
165 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
175 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
176 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
177 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
178 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
179 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
180 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
181 
182 	{ 0, 0 }
183 };
184 
185 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
186 
187 static devclass_t pci_devclass;
188 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
189 MODULE_VERSION(pci, 1);
190 
191 static char	*pci_vendordata;
192 static size_t	pci_vendordata_size;
193 
194 
195 static const struct pci_read_cap {
196 	int		cap;
197 	pci_read_cap_t	read_cap;
198 } pci_read_caps[] = {
199 	{ PCIY_PMG,		pci_read_cap_pmgt },
200 	{ PCIY_HT,		pci_read_cap_ht },
201 	{ PCIY_MSI,		pci_read_cap_msi },
202 	{ PCIY_MSIX,		pci_read_cap_msix },
203 	{ PCIY_VPD,		pci_read_cap_vpd },
204 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
205 	{ PCIY_PCIX,		pci_read_cap_pcix },
206 	{ PCIY_EXPRESS,		pci_read_cap_express },
207 	{ 0, NULL } /* required last entry */
208 };
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
215 	int	arg1;
216 	int	arg2;
217 };
218 
219 struct pci_quirk pci_quirks[] = {
220 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
221 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
224 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 
226 	/*
227 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
228 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
229 	 */
230 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 
233 	/*
234 	 * MSI doesn't work on earlier Intel chipsets including
235 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
236 	 */
237 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
247 	 * bridge.
248 	 */
249 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	{ 0 }
252 };
253 
254 /* map register information */
255 #define	PCI_MAPMEM	0x01	/* memory map */
256 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
257 #define	PCI_MAPPORT	0x04	/* port map */
258 
259 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
260 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
261 
262 struct devlist pci_devq;
263 uint32_t pci_generation;
264 uint32_t pci_numdevs = 0;
265 static int pcie_chipset, pcix_chipset;
266 
267 /* sysctl vars */
268 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
269 
270 static int pci_enable_io_modes = 1;
271 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
272 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
273     &pci_enable_io_modes, 1,
274     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
275 enable these bits correctly.  We'd like to do this all the time, but there\n\
276 are some peripherals that this causes problems with.");
277 
278 static int pci_do_power_nodriver = 0;
279 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
280 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
281     &pci_do_power_nodriver, 0,
282   "Place a function into D3 state when no driver attaches to it.  0 means\n\
283 disable.  1 means conservatively place devices into D3 state.  2 means\n\
284 aggressively place devices into D3 state.  3 means put absolutely everything\n\
285 in D3 state.");
286 
287 static int pci_do_power_resume = 1;
288 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
289 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
290     &pci_do_power_resume, 1,
291   "Transition from D3 -> D0 on resume.");
292 
293 static int pci_do_msi = 1;
294 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
295 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
296     "Enable support for MSI interrupts");
297 
298 static int pci_do_msix = 1;
299 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
300 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
301     "Enable support for MSI-X interrupts");
302 
303 static int pci_honor_msi_blacklist = 1;
304 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
305 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
306     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
307 
308 static int pci_msi_cpuid;
309 
310 /* Find a device_t by bus/slot/function in domain 0 */
311 
312 device_t
313 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 
316 	return (pci_find_dbsf(0, bus, slot, func));
317 }
318 
319 /* Find a device_t by domain/bus/slot/function */
320 
321 device_t
322 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323 {
324 	struct pci_devinfo *dinfo;
325 
326 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327 		if ((dinfo->cfg.domain == domain) &&
328 		    (dinfo->cfg.bus == bus) &&
329 		    (dinfo->cfg.slot == slot) &&
330 		    (dinfo->cfg.func == func)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 /* Find a device_t by vendor/device ID */
339 
340 device_t
341 pci_find_device(uint16_t vendor, uint16_t device)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.vendor == vendor) &&
347 		    (dinfo->cfg.device == device)) {
348 			return (dinfo->cfg.dev);
349 		}
350 	}
351 
352 	return (NULL);
353 }
354 
355 /* return base address of memory or port map */
356 
357 static uint32_t
358 pci_mapbase(uint32_t mapreg)
359 {
360 
361 	if (PCI_BAR_MEM(mapreg))
362 		return (mapreg & PCIM_BAR_MEM_BASE);
363 	else
364 		return (mapreg & PCIM_BAR_IO_BASE);
365 }
366 
367 /* return map type of memory or port map */
368 
369 static const char *
370 pci_maptype(unsigned mapreg)
371 {
372 
373 	if (PCI_BAR_IO(mapreg))
374 		return ("I/O Port");
375 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
376 		return ("Prefetchable Memory");
377 	return ("Memory");
378 }
379 
380 /* return log2 of map size decoded for memory or port map */
381 
382 static int
383 pci_mapsize(uint32_t testval)
384 {
385 	int ln2size;
386 
387 	testval = pci_mapbase(testval);
388 	ln2size = 0;
389 	if (testval != 0) {
390 		while ((testval & 1) == 0)
391 		{
392 			ln2size++;
393 			testval >>= 1;
394 		}
395 	}
396 	return (ln2size);
397 }
398 
399 /* return log2 of address range supported by map register */
400 
401 static int
402 pci_maprange(unsigned mapreg)
403 {
404 	int ln2range = 0;
405 
406 	if (PCI_BAR_IO(mapreg))
407 		ln2range = 32;
408 	else
409 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
410 		case PCIM_BAR_MEM_32:
411 			ln2range = 32;
412 			break;
413 		case PCIM_BAR_MEM_1MB:
414 			ln2range = 20;
415 			break;
416 		case PCIM_BAR_MEM_64:
417 			ln2range = 64;
418 			break;
419 		}
420 	return (ln2range);
421 }
422 
423 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
424 
425 static void
426 pci_fixancient(pcicfgregs *cfg)
427 {
428 	if (cfg->hdrtype != 0)
429 		return;
430 
431 	/* PCI to PCI bridges use header type 1 */
432 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
433 		cfg->hdrtype = 1;
434 }
435 
436 /* extract header type specific config data */
437 
438 static void
439 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	switch (cfg->hdrtype) {
443 	case 0:
444 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
445 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
446 		cfg->nummaps	    = PCI_MAXMAPS_0;
447 		break;
448 	case 1:
449 		cfg->nummaps	    = PCI_MAXMAPS_1;
450 #ifdef COMPAT_OLDPCI
451 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
452 #endif
453 		break;
454 	case 2:
455 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
456 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
457 		cfg->nummaps	    = PCI_MAXMAPS_2;
458 #ifdef COMPAT_OLDPCI
459 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
460 #endif
461 		break;
462 	}
463 #undef REG
464 }
465 
466 /* read configuration header into pcicfgregs structure */
467 struct pci_devinfo *
468 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
469 {
470 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
471 	pcicfgregs *cfg = NULL;
472 	struct pci_devinfo *devlist_entry;
473 	struct devlist *devlist_head;
474 
475 	devlist_head = &pci_devq;
476 
477 	devlist_entry = NULL;
478 
479 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
480 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
481 
482 		cfg = &devlist_entry->cfg;
483 
484 		cfg->domain		= d;
485 		cfg->bus		= b;
486 		cfg->slot		= s;
487 		cfg->func		= f;
488 		cfg->vendor		= REG(PCIR_VENDOR, 2);
489 		cfg->device		= REG(PCIR_DEVICE, 2);
490 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
491 		cfg->statreg		= REG(PCIR_STATUS, 2);
492 		cfg->baseclass		= REG(PCIR_CLASS, 1);
493 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
494 		cfg->progif		= REG(PCIR_PROGIF, 1);
495 		cfg->revid		= REG(PCIR_REVID, 1);
496 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
497 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
498 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
499 		cfg->intpin		= REG(PCIR_INTPIN, 1);
500 		cfg->intline		= REG(PCIR_INTLINE, 1);
501 
502 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
503 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
504 
505 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
506 		cfg->hdrtype		&= ~PCIM_MFDEV;
507 
508 		pci_fixancient(cfg);
509 		pci_hdrtypedata(pcib, b, s, f, cfg);
510 
511 		pci_read_capabilities(pcib, cfg);
512 
513 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
514 
515 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
516 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
517 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
518 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
519 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
520 
521 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
522 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
523 		devlist_entry->conf.pc_vendor = cfg->vendor;
524 		devlist_entry->conf.pc_device = cfg->device;
525 
526 		devlist_entry->conf.pc_class = cfg->baseclass;
527 		devlist_entry->conf.pc_subclass = cfg->subclass;
528 		devlist_entry->conf.pc_progif = cfg->progif;
529 		devlist_entry->conf.pc_revid = cfg->revid;
530 
531 		pci_numdevs++;
532 		pci_generation++;
533 	}
534 	return (devlist_entry);
535 #undef REG
536 }
537 
538 static int
539 pci_fixup_nextptr(int *nextptr0)
540 {
541 	int nextptr = *nextptr0;
542 
543 	/* "Next pointer" is only one byte */
544 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
545 
546 	if (nextptr & 0x3) {
547 		/*
548 		 * PCI local bus spec 3.0:
549 		 *
550 		 * "... The bottom two bits of all pointers are reserved
551 		 *  and must be implemented as 00b although software must
552 		 *  mask them to allow for future uses of these bits ..."
553 		 */
554 		if (bootverbose) {
555 			kprintf("Illegal PCI extended capability "
556 				"offset, fixup 0x%02x -> 0x%02x\n",
557 				nextptr, nextptr & ~0x3);
558 		}
559 		nextptr &= ~0x3;
560 	}
561 	*nextptr0 = nextptr;
562 
563 	if (nextptr < 0x40) {
564 		if (nextptr != 0) {
565 			kprintf("Illegal PCI extended capability "
566 				"offset 0x%02x", nextptr);
567 		}
568 		return 0;
569 	}
570 	return 1;
571 }
572 
573 static void
574 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
575 {
576 #define REG(n, w)	\
577 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
578 
579 	struct pcicfg_pp *pp = &cfg->pp;
580 
581 	if (pp->pp_cap)
582 		return;
583 
584 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
585 	pp->pp_status = ptr + PCIR_POWER_STATUS;
586 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
587 
588 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
589 		/*
590 		 * XXX
591 		 * We should write to data_select and read back from
592 		 * data_scale to determine whether data register is
593 		 * implemented.
594 		 */
595 #ifdef foo
596 		pp->pp_data = ptr + PCIR_POWER_DATA;
597 #else
598 		pp->pp_data = 0;
599 #endif
600 	}
601 
602 #undef REG
603 }
604 
605 static void
606 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
607 {
608 #if defined(__i386__) || defined(__x86_64__)
609 
610 #define REG(n, w)	\
611 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612 
613 	struct pcicfg_ht *ht = &cfg->ht;
614 	uint64_t addr;
615 	uint32_t val;
616 
617 	/* Determine HT-specific capability type. */
618 	val = REG(ptr + PCIR_HT_COMMAND, 2);
619 
620 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
621 		cfg->ht.ht_slave = ptr;
622 
623 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
624 		return;
625 
626 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
627 		/* Sanity check the mapping window. */
628 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
629 		addr <<= 32;
630 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
631 		if (addr != MSI_X86_ADDR_BASE) {
632 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
633 				"has non-default MSI window 0x%llx\n",
634 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
635 				(long long)addr);
636 		}
637 	} else {
638 		addr = MSI_X86_ADDR_BASE;
639 	}
640 
641 	ht->ht_msimap = ptr;
642 	ht->ht_msictrl = val;
643 	ht->ht_msiaddr = addr;
644 
645 #undef REG
646 
647 #endif	/* __i386__ || __x86_64__ */
648 }
649 
650 static void
651 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
652 {
653 #define REG(n, w)	\
654 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
655 
656 	struct pcicfg_msi *msi = &cfg->msi;
657 
658 	msi->msi_location = ptr;
659 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
660 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
661 
662 #undef REG
663 }
664 
665 static void
666 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
667 {
668 #define REG(n, w)	\
669 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
670 
671 	struct pcicfg_msix *msix = &cfg->msix;
672 	uint32_t val;
673 
674 	msix->msix_location = ptr;
675 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
676 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
677 
678 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
679 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
680 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
681 
682 	val = REG(ptr + PCIR_MSIX_PBA, 4);
683 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
684 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
685 
686 	TAILQ_INIT(&msix->msix_vectors);
687 
688 #undef REG
689 }
690 
691 static void
692 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
693 {
694 	cfg->vpd.vpd_reg = ptr;
695 }
696 
697 static void
698 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
699 {
700 #define REG(n, w)	\
701 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
702 
703 	/* Should always be true. */
704 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
705 		uint32_t val;
706 
707 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
708 		cfg->subvendor = val & 0xffff;
709 		cfg->subdevice = val >> 16;
710 	}
711 
712 #undef REG
713 }
714 
715 static void
716 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
717 {
718 	/*
719 	 * Assume we have a PCI-X chipset if we have
720 	 * at least one PCI-PCI bridge with a PCI-X
721 	 * capability.  Note that some systems with
722 	 * PCI-express or HT chipsets might match on
723 	 * this check as well.
724 	 */
725 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
726 		pcix_chipset = 1;
727 
728 	cfg->pcix.pcix_ptr = ptr;
729 }
730 
731 static int
732 pcie_slotimpl(const pcicfgregs *cfg)
733 {
734 	const struct pcicfg_expr *expr = &cfg->expr;
735 	uint16_t port_type;
736 
737 	/*
738 	 * Only version 1 can be parsed currently
739 	 */
740 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
741 		return 0;
742 
743 	/*
744 	 * - Slot implemented bit is meaningful iff current port is
745 	 *   root port or down stream port.
746 	 * - Testing for root port or down stream port is meanningful
747 	 *   iff PCI configure has type 1 header.
748 	 */
749 
750 	if (cfg->hdrtype != 1)
751 		return 0;
752 
753 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
754 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
755 		return 0;
756 
757 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
758 		return 0;
759 
760 	return 1;
761 }
762 
763 static void
764 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
765 {
766 #define REG(n, w)	\
767 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
768 
769 	struct pcicfg_expr *expr = &cfg->expr;
770 
771 	/*
772 	 * Assume we have a PCI-express chipset if we have
773 	 * at least one PCI-express device.
774 	 */
775 	pcie_chipset = 1;
776 
777 	expr->expr_ptr = ptr;
778 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
779 
780 	/*
781 	 * Only version 1 can be parsed currently
782 	 */
783 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
784 		return;
785 
786 	/*
787 	 * Read slot capabilities.  Slot capabilities exists iff
788 	 * current port's slot is implemented
789 	 */
790 	if (pcie_slotimpl(cfg))
791 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
792 
793 #undef REG
794 }
795 
796 static void
797 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
798 {
799 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
800 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
801 
802 	uint32_t val;
803 	int nextptr, ptrptr;
804 
805 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
806 		/* No capabilities */
807 		return;
808 	}
809 
810 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
811 	case 0:
812 	case 1:
813 		ptrptr = PCIR_CAP_PTR;
814 		break;
815 	case 2:
816 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
817 		break;
818 	default:
819 		return;				/* no capabilities support */
820 	}
821 	nextptr = REG(ptrptr, 1);	/* sanity check? */
822 
823 	/*
824 	 * Read capability entries.
825 	 */
826 	while (pci_fixup_nextptr(&nextptr)) {
827 		const struct pci_read_cap *rc;
828 		int ptr = nextptr;
829 
830 		/* Find the next entry */
831 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
832 
833 		/* Process this entry */
834 		val = REG(ptr + PCICAP_ID, 1);
835 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
836 			if (rc->cap == val) {
837 				rc->read_cap(pcib, ptr, nextptr, cfg);
838 				break;
839 			}
840 		}
841 	}
842 
843 #if defined(__i386__) || defined(__x86_64__)
844 	/*
845 	 * Enable the MSI mapping window for all HyperTransport
846 	 * slaves.  PCI-PCI bridges have their windows enabled via
847 	 * PCIB_MAP_MSI().
848 	 */
849 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
850 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
851 		device_printf(pcib,
852 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
853 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
854 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
855 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
856 		     2);
857 	}
858 #endif
859 
860 /* REG and WREG use carry through to next functions */
861 }
862 
863 /*
864  * PCI Vital Product Data
865  */
866 
867 #define	PCI_VPD_TIMEOUT		1000000
868 
869 static int
870 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
871 {
872 	int count = PCI_VPD_TIMEOUT;
873 
874 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
875 
876 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
877 
878 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
879 		if (--count < 0)
880 			return (ENXIO);
881 		DELAY(1);	/* limit looping */
882 	}
883 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
884 
885 	return (0);
886 }
887 
888 #if 0
889 static int
890 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
891 {
892 	int count = PCI_VPD_TIMEOUT;
893 
894 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
895 
896 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
897 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
898 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
899 		if (--count < 0)
900 			return (ENXIO);
901 		DELAY(1);	/* limit looping */
902 	}
903 
904 	return (0);
905 }
906 #endif
907 
908 #undef PCI_VPD_TIMEOUT
909 
910 struct vpd_readstate {
911 	device_t	pcib;
912 	pcicfgregs	*cfg;
913 	uint32_t	val;
914 	int		bytesinval;
915 	int		off;
916 	uint8_t		cksum;
917 };
918 
919 static int
920 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
921 {
922 	uint32_t reg;
923 	uint8_t byte;
924 
925 	if (vrs->bytesinval == 0) {
926 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
927 			return (ENXIO);
928 		vrs->val = le32toh(reg);
929 		vrs->off += 4;
930 		byte = vrs->val & 0xff;
931 		vrs->bytesinval = 3;
932 	} else {
933 		vrs->val = vrs->val >> 8;
934 		byte = vrs->val & 0xff;
935 		vrs->bytesinval--;
936 	}
937 
938 	vrs->cksum += byte;
939 	*data = byte;
940 	return (0);
941 }
942 
943 int
944 pcie_slot_implemented(device_t dev)
945 {
946 	struct pci_devinfo *dinfo = device_get_ivars(dev);
947 
948 	return pcie_slotimpl(&dinfo->cfg);
949 }
950 
951 void
952 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
953 {
954 	uint8_t expr_ptr;
955 	uint16_t val;
956 
957 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
958 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
959 		panic("%s: invalid max read request size 0x%02x",
960 		      device_get_nameunit(dev), rqsize);
961 	}
962 
963 	expr_ptr = pci_get_pciecap_ptr(dev);
964 	if (!expr_ptr)
965 		panic("%s: not PCIe device", device_get_nameunit(dev));
966 
967 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
968 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
969 		if (bootverbose)
970 			device_printf(dev, "adjust device control 0x%04x", val);
971 
972 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
973 		val |= rqsize;
974 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
975 
976 		if (bootverbose)
977 			kprintf(" -> 0x%04x\n", val);
978 	}
979 }
980 
981 uint16_t
982 pcie_get_max_readrq(device_t dev)
983 {
984 	uint8_t expr_ptr;
985 	uint16_t val;
986 
987 	expr_ptr = pci_get_pciecap_ptr(dev);
988 	if (!expr_ptr)
989 		panic("%s: not PCIe device", device_get_nameunit(dev));
990 
991 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
992 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
993 }
994 
995 static void
996 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
997 {
998 	struct vpd_readstate vrs;
999 	int state;
1000 	int name;
1001 	int remain;
1002 	int i;
1003 	int alloc, off;		/* alloc/off for RO/W arrays */
1004 	int cksumvalid;
1005 	int dflen;
1006 	uint8_t byte;
1007 	uint8_t byte2;
1008 
1009 	/* init vpd reader */
1010 	vrs.bytesinval = 0;
1011 	vrs.off = 0;
1012 	vrs.pcib = pcib;
1013 	vrs.cfg = cfg;
1014 	vrs.cksum = 0;
1015 
1016 	state = 0;
1017 	name = remain = i = 0;	/* shut up stupid gcc */
1018 	alloc = off = 0;	/* shut up stupid gcc */
1019 	dflen = 0;		/* shut up stupid gcc */
1020 	cksumvalid = -1;
1021 	while (state >= 0) {
1022 		if (vpd_nextbyte(&vrs, &byte)) {
1023 			state = -2;
1024 			break;
1025 		}
1026 #if 0
1027 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1028 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1029 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1030 #endif
1031 		switch (state) {
1032 		case 0:		/* item name */
1033 			if (byte & 0x80) {
1034 				if (vpd_nextbyte(&vrs, &byte2)) {
1035 					state = -2;
1036 					break;
1037 				}
1038 				remain = byte2;
1039 				if (vpd_nextbyte(&vrs, &byte2)) {
1040 					state = -2;
1041 					break;
1042 				}
1043 				remain |= byte2 << 8;
1044 				if (remain > (0x7f*4 - vrs.off)) {
1045 					state = -1;
1046 					kprintf(
1047 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1048 					    cfg->domain, cfg->bus, cfg->slot,
1049 					    cfg->func, remain);
1050 				}
1051 				name = byte & 0x7f;
1052 			} else {
1053 				remain = byte & 0x7;
1054 				name = (byte >> 3) & 0xf;
1055 			}
1056 			switch (name) {
1057 			case 0x2:	/* String */
1058 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1059 				    M_DEVBUF, M_WAITOK);
1060 				i = 0;
1061 				state = 1;
1062 				break;
1063 			case 0xf:	/* End */
1064 				state = -1;
1065 				break;
1066 			case 0x10:	/* VPD-R */
1067 				alloc = 8;
1068 				off = 0;
1069 				cfg->vpd.vpd_ros = kmalloc(alloc *
1070 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1071 				    M_WAITOK | M_ZERO);
1072 				state = 2;
1073 				break;
1074 			case 0x11:	/* VPD-W */
1075 				alloc = 8;
1076 				off = 0;
1077 				cfg->vpd.vpd_w = kmalloc(alloc *
1078 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1079 				    M_WAITOK | M_ZERO);
1080 				state = 5;
1081 				break;
1082 			default:	/* Invalid data, abort */
1083 				state = -1;
1084 				break;
1085 			}
1086 			break;
1087 
1088 		case 1:	/* Identifier String */
1089 			cfg->vpd.vpd_ident[i++] = byte;
1090 			remain--;
1091 			if (remain == 0)  {
1092 				cfg->vpd.vpd_ident[i] = '\0';
1093 				state = 0;
1094 			}
1095 			break;
1096 
1097 		case 2:	/* VPD-R Keyword Header */
1098 			if (off == alloc) {
1099 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1100 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1101 				    M_DEVBUF, M_WAITOK | M_ZERO);
1102 			}
1103 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1104 			if (vpd_nextbyte(&vrs, &byte2)) {
1105 				state = -2;
1106 				break;
1107 			}
1108 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1109 			if (vpd_nextbyte(&vrs, &byte2)) {
1110 				state = -2;
1111 				break;
1112 			}
1113 			dflen = byte2;
1114 			if (dflen == 0 &&
1115 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1116 			    2) == 0) {
1117 				/*
1118 				 * if this happens, we can't trust the rest
1119 				 * of the VPD.
1120 				 */
1121 				kprintf(
1122 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1123 				    cfg->domain, cfg->bus, cfg->slot,
1124 				    cfg->func, dflen);
1125 				cksumvalid = 0;
1126 				state = -1;
1127 				break;
1128 			} else if (dflen == 0) {
1129 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1130 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1131 				    M_DEVBUF, M_WAITOK);
1132 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1133 			} else
1134 				cfg->vpd.vpd_ros[off].value = kmalloc(
1135 				    (dflen + 1) *
1136 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1137 				    M_DEVBUF, M_WAITOK);
1138 			remain -= 3;
1139 			i = 0;
1140 			/* keep in sync w/ state 3's transistions */
1141 			if (dflen == 0 && remain == 0)
1142 				state = 0;
1143 			else if (dflen == 0)
1144 				state = 2;
1145 			else
1146 				state = 3;
1147 			break;
1148 
1149 		case 3:	/* VPD-R Keyword Value */
1150 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1151 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1152 			    "RV", 2) == 0 && cksumvalid == -1) {
1153 				if (vrs.cksum == 0)
1154 					cksumvalid = 1;
1155 				else {
1156 					if (bootverbose)
1157 						kprintf(
1158 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1159 						    cfg->domain, cfg->bus,
1160 						    cfg->slot, cfg->func,
1161 						    vrs.cksum);
1162 					cksumvalid = 0;
1163 					state = -1;
1164 					break;
1165 				}
1166 			}
1167 			dflen--;
1168 			remain--;
1169 			/* keep in sync w/ state 2's transistions */
1170 			if (dflen == 0)
1171 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1172 			if (dflen == 0 && remain == 0) {
1173 				cfg->vpd.vpd_rocnt = off;
1174 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1175 				    off * sizeof(*cfg->vpd.vpd_ros),
1176 				    M_DEVBUF, M_WAITOK | M_ZERO);
1177 				state = 0;
1178 			} else if (dflen == 0)
1179 				state = 2;
1180 			break;
1181 
1182 		case 4:
1183 			remain--;
1184 			if (remain == 0)
1185 				state = 0;
1186 			break;
1187 
1188 		case 5:	/* VPD-W Keyword Header */
1189 			if (off == alloc) {
1190 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1191 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1192 				    M_DEVBUF, M_WAITOK | M_ZERO);
1193 			}
1194 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1195 			if (vpd_nextbyte(&vrs, &byte2)) {
1196 				state = -2;
1197 				break;
1198 			}
1199 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1200 			if (vpd_nextbyte(&vrs, &byte2)) {
1201 				state = -2;
1202 				break;
1203 			}
1204 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1205 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1206 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1207 			    sizeof(*cfg->vpd.vpd_w[off].value),
1208 			    M_DEVBUF, M_WAITOK);
1209 			remain -= 3;
1210 			i = 0;
1211 			/* keep in sync w/ state 6's transistions */
1212 			if (dflen == 0 && remain == 0)
1213 				state = 0;
1214 			else if (dflen == 0)
1215 				state = 5;
1216 			else
1217 				state = 6;
1218 			break;
1219 
1220 		case 6:	/* VPD-W Keyword Value */
1221 			cfg->vpd.vpd_w[off].value[i++] = byte;
1222 			dflen--;
1223 			remain--;
1224 			/* keep in sync w/ state 5's transistions */
1225 			if (dflen == 0)
1226 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1227 			if (dflen == 0 && remain == 0) {
1228 				cfg->vpd.vpd_wcnt = off;
1229 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1230 				    off * sizeof(*cfg->vpd.vpd_w),
1231 				    M_DEVBUF, M_WAITOK | M_ZERO);
1232 				state = 0;
1233 			} else if (dflen == 0)
1234 				state = 5;
1235 			break;
1236 
1237 		default:
1238 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1239 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1240 			    state);
1241 			state = -1;
1242 			break;
1243 		}
1244 	}
1245 
1246 	if (cksumvalid == 0 || state < -1) {
1247 		/* read-only data bad, clean up */
1248 		if (cfg->vpd.vpd_ros != NULL) {
1249 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1250 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1251 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1252 			cfg->vpd.vpd_ros = NULL;
1253 		}
1254 	}
1255 	if (state < -1) {
1256 		/* I/O error, clean up */
1257 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1258 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1259 		if (cfg->vpd.vpd_ident != NULL) {
1260 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1261 			cfg->vpd.vpd_ident = NULL;
1262 		}
1263 		if (cfg->vpd.vpd_w != NULL) {
1264 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1265 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1266 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1267 			cfg->vpd.vpd_w = NULL;
1268 		}
1269 	}
1270 	cfg->vpd.vpd_cached = 1;
1271 #undef REG
1272 #undef WREG
1273 }
1274 
1275 int
1276 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1277 {
1278 	struct pci_devinfo *dinfo = device_get_ivars(child);
1279 	pcicfgregs *cfg = &dinfo->cfg;
1280 
1281 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1282 		pci_read_vpd(device_get_parent(dev), cfg);
1283 
1284 	*identptr = cfg->vpd.vpd_ident;
1285 
1286 	if (*identptr == NULL)
1287 		return (ENXIO);
1288 
1289 	return (0);
1290 }
1291 
1292 int
1293 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1294 	const char **vptr)
1295 {
1296 	struct pci_devinfo *dinfo = device_get_ivars(child);
1297 	pcicfgregs *cfg = &dinfo->cfg;
1298 	int i;
1299 
1300 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1301 		pci_read_vpd(device_get_parent(dev), cfg);
1302 
1303 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1304 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1305 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1306 			*vptr = cfg->vpd.vpd_ros[i].value;
1307 		}
1308 
1309 	if (i != cfg->vpd.vpd_rocnt)
1310 		return (0);
1311 
1312 	*vptr = NULL;
1313 	return (ENXIO);
1314 }
1315 
1316 /*
1317  * Return the offset in configuration space of the requested extended
1318  * capability entry or 0 if the specified capability was not found.
1319  */
1320 int
1321 pci_find_extcap_method(device_t dev, device_t child, int capability,
1322     int *capreg)
1323 {
1324 	struct pci_devinfo *dinfo = device_get_ivars(child);
1325 	pcicfgregs *cfg = &dinfo->cfg;
1326 	u_int32_t status;
1327 	u_int8_t ptr;
1328 
1329 	/*
1330 	 * Check the CAP_LIST bit of the PCI status register first.
1331 	 */
1332 	status = pci_read_config(child, PCIR_STATUS, 2);
1333 	if (!(status & PCIM_STATUS_CAPPRESENT))
1334 		return (ENXIO);
1335 
1336 	/*
1337 	 * Determine the start pointer of the capabilities list.
1338 	 */
1339 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1340 	case 0:
1341 	case 1:
1342 		ptr = PCIR_CAP_PTR;
1343 		break;
1344 	case 2:
1345 		ptr = PCIR_CAP_PTR_2;
1346 		break;
1347 	default:
1348 		/* XXX: panic? */
1349 		return (ENXIO);		/* no extended capabilities support */
1350 	}
1351 	ptr = pci_read_config(child, ptr, 1);
1352 
1353 	/*
1354 	 * Traverse the capabilities list.
1355 	 */
1356 	while (ptr != 0) {
1357 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1358 			if (capreg != NULL)
1359 				*capreg = ptr;
1360 			return (0);
1361 		}
1362 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1363 	}
1364 
1365 	return (ENOENT);
1366 }
1367 
1368 /*
1369  * Support for MSI-X message interrupts.
1370  */
1371 static void
1372 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1373     uint32_t data)
1374 {
1375 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1376 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1377 	uint32_t offset;
1378 
1379 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1380 	offset = msix->msix_table_offset + index * 16;
1381 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1382 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1383 	bus_write_4(msix->msix_table_res, offset + 8, data);
1384 
1385 	/* Enable MSI -> HT mapping. */
1386 	pci_ht_map_msi(dev, address);
1387 }
1388 
1389 static void
1390 pci_mask_msix_vector(device_t dev, u_int index)
1391 {
1392 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1393 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1394 	uint32_t offset, val;
1395 
1396 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1397 	offset = msix->msix_table_offset + index * 16 + 12;
1398 	val = bus_read_4(msix->msix_table_res, offset);
1399 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1400 		val |= PCIM_MSIX_VCTRL_MASK;
1401 		bus_write_4(msix->msix_table_res, offset, val);
1402 	}
1403 }
1404 
1405 static void
1406 pci_unmask_msix_vector(device_t dev, u_int index)
1407 {
1408 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1409 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1410 	uint32_t offset, val;
1411 
1412 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1413 	offset = msix->msix_table_offset + index * 16 + 12;
1414 	val = bus_read_4(msix->msix_table_res, offset);
1415 	if (val & PCIM_MSIX_VCTRL_MASK) {
1416 		val &= ~PCIM_MSIX_VCTRL_MASK;
1417 		bus_write_4(msix->msix_table_res, offset, val);
1418 	}
1419 }
1420 
1421 int
1422 pci_pending_msix_vector(device_t dev, u_int index)
1423 {
1424 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1425 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1426 	uint32_t offset, bit;
1427 
1428 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1429 	    ("MSI-X is not setup yet"));
1430 
1431 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1432 	offset = msix->msix_pba_offset + (index / 32) * 4;
1433 	bit = 1 << index % 32;
1434 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1435 }
1436 
1437 /*
1438  * Restore MSI-X registers and table during resume.  If MSI-X is
1439  * enabled then walk the virtual table to restore the actual MSI-X
1440  * table.
1441  */
1442 static void
1443 pci_resume_msix(device_t dev)
1444 {
1445 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1446 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1447 
1448 	if (msix->msix_table_res != NULL) {
1449 		const struct msix_vector *mv;
1450 
1451 		pci_mask_msix_allvectors(dev);
1452 
1453 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1454 			u_int vector;
1455 
1456 			if (mv->mv_address == 0)
1457 				continue;
1458 
1459 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1460 			pci_setup_msix_vector(dev, vector,
1461 			    mv->mv_address, mv->mv_data);
1462 			pci_unmask_msix_vector(dev, vector);
1463 		}
1464 	}
1465 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1466 	    msix->msix_ctrl, 2);
1467 }
1468 
1469 /*
1470  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1471  *
1472  * After this function returns, the MSI-X's rid will be saved in rid0.
1473  */
1474 int
1475 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1476     int *rid0, int cpuid)
1477 {
1478 	struct pci_devinfo *dinfo = device_get_ivars(child);
1479 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1480 	struct msix_vector *mv;
1481 	struct resource_list_entry *rle;
1482 	int error, irq, rid;
1483 
1484 	KASSERT(msix->msix_table_res != NULL &&
1485 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1486 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1487 	KASSERT(vector < msix->msix_msgnum,
1488 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1489 
1490 	if (bootverbose) {
1491 		device_printf(child,
1492 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1493 		    vector, msix->msix_msgnum);
1494 	}
1495 
1496 	/* Set rid according to vector number */
1497 	rid = PCI_MSIX_VEC2RID(vector);
1498 
1499 	/* Vector has already been allocated */
1500 	mv = pci_find_msix_vector(child, rid);
1501 	if (mv != NULL)
1502 		return EBUSY;
1503 
1504 	/* Allocate a message. */
1505 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1506 	if (error)
1507 		return error;
1508 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1509 	    irq, irq, 1, cpuid);
1510 
1511 	if (bootverbose) {
1512 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1513 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1514 		    rle->start, cpuid);
1515 	}
1516 
1517 	/* Update counts of alloc'd messages. */
1518 	msix->msix_alloc++;
1519 
1520 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1521 	mv->mv_rid = rid;
1522 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1523 
1524 	*rid0 = rid;
1525 	return 0;
1526 }
1527 
1528 int
1529 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1530 {
1531 	struct pci_devinfo *dinfo = device_get_ivars(child);
1532 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1533 	struct resource_list_entry *rle;
1534 	struct msix_vector *mv;
1535 	int irq, cpuid;
1536 
1537 	KASSERT(msix->msix_table_res != NULL &&
1538 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1539 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1540 	KASSERT(rid > 0, ("invalid rid %d", rid));
1541 
1542 	mv = pci_find_msix_vector(child, rid);
1543 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1544 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1545 
1546 	/* Make sure resource is no longer allocated. */
1547 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1548 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1549 	KASSERT(rle->res == NULL,
1550 	    ("MSI-X resource is still allocated, rid %d", rid));
1551 
1552 	irq = rle->start;
1553 	cpuid = rle->cpuid;
1554 
1555 	/* Free the resource list entries. */
1556 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1557 
1558 	/* Release the IRQ. */
1559 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1560 
1561 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1562 	kfree(mv, M_DEVBUF);
1563 
1564 	msix->msix_alloc--;
1565 	return (0);
1566 }
1567 
1568 /*
1569  * Return the max supported MSI-X messages this device supports.
1570  * Basically, assuming the MD code can alloc messages, this function
1571  * should return the maximum value that pci_alloc_msix() can return.
1572  * Thus, it is subject to the tunables, etc.
1573  */
1574 int
1575 pci_msix_count_method(device_t dev, device_t child)
1576 {
1577 	struct pci_devinfo *dinfo = device_get_ivars(child);
1578 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1579 
1580 	if (pci_do_msix && msix->msix_location != 0)
1581 		return (msix->msix_msgnum);
1582 	return (0);
1583 }
1584 
1585 int
1586 pci_setup_msix(device_t dev)
1587 {
1588 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1589 	pcicfgregs *cfg = &dinfo->cfg;
1590 	struct resource_list_entry *rle;
1591 	struct resource *table_res, *pba_res;
1592 
1593 	KASSERT(cfg->msix.msix_table_res == NULL &&
1594 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1595 
1596 	/* If rid 0 is allocated, then fail. */
1597 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1598 	if (rle != NULL && rle->res != NULL)
1599 		return (ENXIO);
1600 
1601 	/* Already have allocated MSIs? */
1602 	if (cfg->msi.msi_alloc != 0)
1603 		return (ENXIO);
1604 
1605 	/* If MSI is blacklisted for this system, fail. */
1606 	if (pci_msi_blacklisted())
1607 		return (ENXIO);
1608 
1609 	/* MSI-X capability present? */
1610 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1611 	    !pci_do_msix)
1612 		return (ENODEV);
1613 
1614 	KASSERT(cfg->msix.msix_alloc == 0 &&
1615 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1616 	    ("MSI-X vector has been allocated"));
1617 
1618 	/* Make sure the appropriate BARs are mapped. */
1619 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1620 	    cfg->msix.msix_table_bar);
1621 	if (rle == NULL || rle->res == NULL ||
1622 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1623 		return (ENXIO);
1624 	table_res = rle->res;
1625 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1626 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1627 		    cfg->msix.msix_pba_bar);
1628 		if (rle == NULL || rle->res == NULL ||
1629 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1630 			return (ENXIO);
1631 	}
1632 	pba_res = rle->res;
1633 
1634 	cfg->msix.msix_table_res = table_res;
1635 	cfg->msix.msix_pba_res = pba_res;
1636 
1637 	pci_mask_msix_allvectors(dev);
1638 
1639 	return 0;
1640 }
1641 
1642 void
1643 pci_teardown_msix(device_t dev)
1644 {
1645 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1646 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1647 
1648 	KASSERT(msix->msix_table_res != NULL &&
1649 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1650 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1651 	    ("MSI-X vector is still allocated"));
1652 
1653 	pci_mask_msix_allvectors(dev);
1654 
1655 	msix->msix_table_res = NULL;
1656 	msix->msix_pba_res = NULL;
1657 }
1658 
1659 void
1660 pci_enable_msix(device_t dev)
1661 {
1662 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1663 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1664 
1665 	KASSERT(msix->msix_table_res != NULL &&
1666 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1667 
1668 	/* Update control register to enable MSI-X. */
1669 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1670 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1671 	    msix->msix_ctrl, 2);
1672 }
1673 
1674 void
1675 pci_disable_msix(device_t dev)
1676 {
1677 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1678 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1679 
1680 	KASSERT(msix->msix_table_res != NULL &&
1681 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1682 
1683 	/* Disable MSI -> HT mapping. */
1684 	pci_ht_map_msi(dev, 0);
1685 
1686 	/* Update control register to disable MSI-X. */
1687 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1688 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1689 	    msix->msix_ctrl, 2);
1690 }
1691 
1692 static void
1693 pci_mask_msix_allvectors(device_t dev)
1694 {
1695 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1696 	u_int i;
1697 
1698 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1699 		pci_mask_msix_vector(dev, i);
1700 }
1701 
1702 static struct msix_vector *
1703 pci_find_msix_vector(device_t dev, int rid)
1704 {
1705 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1706 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1707 	struct msix_vector *mv;
1708 
1709 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1710 		if (mv->mv_rid == rid)
1711 			return mv;
1712 	}
1713 	return NULL;
1714 }
1715 
1716 /*
1717  * HyperTransport MSI mapping control
1718  */
1719 void
1720 pci_ht_map_msi(device_t dev, uint64_t addr)
1721 {
1722 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1723 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1724 
1725 	if (!ht->ht_msimap)
1726 		return;
1727 
1728 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1729 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1730 		/* Enable MSI -> HT mapping. */
1731 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1732 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1733 		    ht->ht_msictrl, 2);
1734 	}
1735 
1736 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1737 		/* Disable MSI -> HT mapping. */
1738 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1739 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1740 		    ht->ht_msictrl, 2);
1741 	}
1742 }
1743 
1744 /*
1745  * Support for MSI message signalled interrupts.
1746  */
1747 void
1748 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1749 {
1750 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1751 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1752 
1753 	/* Write data and address values. */
1754 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1755 	    address & 0xffffffff, 4);
1756 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1757 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1758 		    address >> 32, 4);
1759 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1760 		    data, 2);
1761 	} else
1762 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1763 		    2);
1764 
1765 	/* Enable MSI in the control register. */
1766 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1767 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1768 	    2);
1769 
1770 	/* Enable MSI -> HT mapping. */
1771 	pci_ht_map_msi(dev, address);
1772 }
1773 
1774 void
1775 pci_disable_msi(device_t dev)
1776 {
1777 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1778 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1779 
1780 	/* Disable MSI -> HT mapping. */
1781 	pci_ht_map_msi(dev, 0);
1782 
1783 	/* Disable MSI in the control register. */
1784 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1785 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1786 	    2);
1787 }
1788 
1789 /*
1790  * Restore MSI registers during resume.  If MSI is enabled then
1791  * restore the data and address registers in addition to the control
1792  * register.
1793  */
1794 static void
1795 pci_resume_msi(device_t dev)
1796 {
1797 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1798 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1799 	uint64_t address;
1800 	uint16_t data;
1801 
1802 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1803 		address = msi->msi_addr;
1804 		data = msi->msi_data;
1805 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1806 		    address & 0xffffffff, 4);
1807 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1808 			pci_write_config(dev, msi->msi_location +
1809 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1810 			pci_write_config(dev, msi->msi_location +
1811 			    PCIR_MSI_DATA_64BIT, data, 2);
1812 		} else
1813 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1814 			    data, 2);
1815 	}
1816 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1817 	    2);
1818 }
1819 
1820 /*
1821  * Returns true if the specified device is blacklisted because MSI
1822  * doesn't work.
1823  */
1824 int
1825 pci_msi_device_blacklisted(device_t dev)
1826 {
1827 	struct pci_quirk *q;
1828 
1829 	if (!pci_honor_msi_blacklist)
1830 		return (0);
1831 
1832 	for (q = &pci_quirks[0]; q->devid; q++) {
1833 		if (q->devid == pci_get_devid(dev) &&
1834 		    q->type == PCI_QUIRK_DISABLE_MSI)
1835 			return (1);
1836 	}
1837 	return (0);
1838 }
1839 
1840 /*
1841  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1842  * we just check for blacklisted chipsets as represented by the
1843  * host-PCI bridge at device 0:0:0.  In the future, it may become
1844  * necessary to check other system attributes, such as the kenv values
1845  * that give the motherboard manufacturer and model number.
1846  */
1847 static int
1848 pci_msi_blacklisted(void)
1849 {
1850 	device_t dev;
1851 
1852 	if (!pci_honor_msi_blacklist)
1853 		return (0);
1854 
1855 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1856 	if (!(pcie_chipset || pcix_chipset))
1857 		return (1);
1858 
1859 	dev = pci_find_bsf(0, 0, 0);
1860 	if (dev != NULL)
1861 		return (pci_msi_device_blacklisted(dev));
1862 	return (0);
1863 }
1864 
1865 /*
1866  * Attempt to allocate count MSI messages on start_cpuid.
1867  *
1868  * If start_cpuid < 0, then the MSI messages' target CPU will be
1869  * selected automaticly.
1870  *
1871  * If the caller explicitly specified the MSI messages' target CPU,
1872  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1873  * messages on the specified CPU, if the allocation fails due to MD
1874  * does not have enough vectors (EMSGSIZE), then we will try next
1875  * available CPU, until the allocation fails on all CPUs.
1876  *
1877  * EMSGSIZE will be returned, if all available CPUs does not have
1878  * enough vectors for the requested amount of MSI messages.  Caller
1879  * should either reduce the amount of MSI messages to be requested,
1880  * or simply giving up using MSI.
1881  *
1882  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1883  * returned in 'rid' array, if the allocation succeeds.
1884  */
1885 int
1886 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1887     int start_cpuid)
1888 {
1889 	struct pci_devinfo *dinfo = device_get_ivars(child);
1890 	pcicfgregs *cfg = &dinfo->cfg;
1891 	struct resource_list_entry *rle;
1892 	int error, i, irqs[32], cpuid = 0;
1893 	uint16_t ctrl;
1894 
1895 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1896 	    ("invalid MSI count %d", count));
1897 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1898 
1899 	/* If rid 0 is allocated, then fail. */
1900 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1901 	if (rle != NULL && rle->res != NULL)
1902 		return (ENXIO);
1903 
1904 	/* Already have allocated messages? */
1905 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1906 		return (ENXIO);
1907 
1908 	/* If MSI is blacklisted for this system, fail. */
1909 	if (pci_msi_blacklisted())
1910 		return (ENXIO);
1911 
1912 	/* MSI capability present? */
1913 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1914 	    !pci_do_msi)
1915 		return (ENODEV);
1916 
1917 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1918 	    count, cfg->msi.msi_msgnum));
1919 
1920 	if (bootverbose) {
1921 		device_printf(child,
1922 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1923 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1924 	}
1925 
1926 	if (start_cpuid < 0)
1927 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1928 
1929 	error = EINVAL;
1930 	for (i = 0; i < ncpus; ++i) {
1931 		cpuid = (start_cpuid + i) % ncpus;
1932 
1933 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1934 		    cfg->msi.msi_msgnum, irqs, cpuid);
1935 		if (error == 0)
1936 			break;
1937 		else if (error != EMSGSIZE)
1938 			return error;
1939 	}
1940 	if (error)
1941 		return error;
1942 
1943 	/*
1944 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1945 	 * the irqs[] array, so add new resources starting at rid 1.
1946 	 */
1947 	for (i = 0; i < count; i++) {
1948 		rid[i] = i + 1;
1949 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1950 		    irqs[i], irqs[i], 1, cpuid);
1951 	}
1952 
1953 	if (bootverbose) {
1954 		if (count == 1) {
1955 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1956 			    irqs[0], cpuid);
1957 		} else {
1958 			int run;
1959 
1960 			/*
1961 			 * Be fancy and try to print contiguous runs
1962 			 * of IRQ values as ranges.  'run' is true if
1963 			 * we are in a range.
1964 			 */
1965 			device_printf(child, "using IRQs %d", irqs[0]);
1966 			run = 0;
1967 			for (i = 1; i < count; i++) {
1968 
1969 				/* Still in a run? */
1970 				if (irqs[i] == irqs[i - 1] + 1) {
1971 					run = 1;
1972 					continue;
1973 				}
1974 
1975 				/* Finish previous range. */
1976 				if (run) {
1977 					kprintf("-%d", irqs[i - 1]);
1978 					run = 0;
1979 				}
1980 
1981 				/* Start new range. */
1982 				kprintf(",%d", irqs[i]);
1983 			}
1984 
1985 			/* Unfinished range? */
1986 			if (run)
1987 				kprintf("-%d", irqs[count - 1]);
1988 			kprintf(" for MSI on cpu%d\n", cpuid);
1989 		}
1990 	}
1991 
1992 	/* Update control register with count. */
1993 	ctrl = cfg->msi.msi_ctrl;
1994 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1995 	ctrl |= (ffs(count) - 1) << 4;
1996 	cfg->msi.msi_ctrl = ctrl;
1997 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1998 
1999 	/* Update counts of alloc'd messages. */
2000 	cfg->msi.msi_alloc = count;
2001 	cfg->msi.msi_handlers = 0;
2002 	return (0);
2003 }
2004 
2005 /* Release the MSI messages associated with this device. */
2006 int
2007 pci_release_msi_method(device_t dev, device_t child)
2008 {
2009 	struct pci_devinfo *dinfo = device_get_ivars(child);
2010 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2011 	struct resource_list_entry *rle;
2012 	int i, irqs[32], cpuid = -1;
2013 
2014 	/* Do we have any messages to release? */
2015 	if (msi->msi_alloc == 0)
2016 		return (ENODEV);
2017 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2018 
2019 	/* Make sure none of the resources are allocated. */
2020 	if (msi->msi_handlers > 0)
2021 		return (EBUSY);
2022 	for (i = 0; i < msi->msi_alloc; i++) {
2023 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2024 		KASSERT(rle != NULL, ("missing MSI resource"));
2025 		if (rle->res != NULL)
2026 			return (EBUSY);
2027 		if (i == 0) {
2028 			cpuid = rle->cpuid;
2029 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2030 			    ("invalid MSI target cpuid %d", cpuid));
2031 		} else {
2032 			KASSERT(rle->cpuid == cpuid,
2033 			    ("MSI targets different cpus, "
2034 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2035 		}
2036 		irqs[i] = rle->start;
2037 	}
2038 
2039 	/* Update control register with 0 count. */
2040 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2041 	    ("%s: MSI still enabled", __func__));
2042 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2043 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2044 	    msi->msi_ctrl, 2);
2045 
2046 	/* Release the messages. */
2047 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2048 	    cpuid);
2049 	for (i = 0; i < msi->msi_alloc; i++)
2050 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2051 
2052 	/* Update alloc count. */
2053 	msi->msi_alloc = 0;
2054 	msi->msi_addr = 0;
2055 	msi->msi_data = 0;
2056 	return (0);
2057 }
2058 
2059 /*
2060  * Return the max supported MSI messages this device supports.
2061  * Basically, assuming the MD code can alloc messages, this function
2062  * should return the maximum value that pci_alloc_msi() can return.
2063  * Thus, it is subject to the tunables, etc.
2064  */
2065 int
2066 pci_msi_count_method(device_t dev, device_t child)
2067 {
2068 	struct pci_devinfo *dinfo = device_get_ivars(child);
2069 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2070 
2071 	if (pci_do_msi && msi->msi_location != 0)
2072 		return (msi->msi_msgnum);
2073 	return (0);
2074 }
2075 
2076 /* kfree pcicfgregs structure and all depending data structures */
2077 
2078 int
2079 pci_freecfg(struct pci_devinfo *dinfo)
2080 {
2081 	struct devlist *devlist_head;
2082 	int i;
2083 
2084 	devlist_head = &pci_devq;
2085 
2086 	if (dinfo->cfg.vpd.vpd_reg) {
2087 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2088 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2089 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2090 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2091 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2092 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2093 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2094 	}
2095 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2096 	kfree(dinfo, M_DEVBUF);
2097 
2098 	/* increment the generation count */
2099 	pci_generation++;
2100 
2101 	/* we're losing one device */
2102 	pci_numdevs--;
2103 	return (0);
2104 }
2105 
2106 /*
2107  * PCI power manangement
2108  */
2109 int
2110 pci_set_powerstate_method(device_t dev, device_t child, int state)
2111 {
2112 	struct pci_devinfo *dinfo = device_get_ivars(child);
2113 	pcicfgregs *cfg = &dinfo->cfg;
2114 	uint16_t status;
2115 	int result, oldstate, highest, delay;
2116 
2117 	if (cfg->pp.pp_cap == 0)
2118 		return (EOPNOTSUPP);
2119 
2120 	/*
2121 	 * Optimize a no state change request away.  While it would be OK to
2122 	 * write to the hardware in theory, some devices have shown odd
2123 	 * behavior when going from D3 -> D3.
2124 	 */
2125 	oldstate = pci_get_powerstate(child);
2126 	if (oldstate == state)
2127 		return (0);
2128 
2129 	/*
2130 	 * The PCI power management specification states that after a state
2131 	 * transition between PCI power states, system software must
2132 	 * guarantee a minimal delay before the function accesses the device.
2133 	 * Compute the worst case delay that we need to guarantee before we
2134 	 * access the device.  Many devices will be responsive much more
2135 	 * quickly than this delay, but there are some that don't respond
2136 	 * instantly to state changes.  Transitions to/from D3 state require
2137 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2138 	 * is done below with DELAY rather than a sleeper function because
2139 	 * this function can be called from contexts where we cannot sleep.
2140 	 */
2141 	highest = (oldstate > state) ? oldstate : state;
2142 	if (highest == PCI_POWERSTATE_D3)
2143 	    delay = 10000;
2144 	else if (highest == PCI_POWERSTATE_D2)
2145 	    delay = 200;
2146 	else
2147 	    delay = 0;
2148 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2149 	    & ~PCIM_PSTAT_DMASK;
2150 	result = 0;
2151 	switch (state) {
2152 	case PCI_POWERSTATE_D0:
2153 		status |= PCIM_PSTAT_D0;
2154 		break;
2155 	case PCI_POWERSTATE_D1:
2156 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2157 			return (EOPNOTSUPP);
2158 		status |= PCIM_PSTAT_D1;
2159 		break;
2160 	case PCI_POWERSTATE_D2:
2161 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2162 			return (EOPNOTSUPP);
2163 		status |= PCIM_PSTAT_D2;
2164 		break;
2165 	case PCI_POWERSTATE_D3:
2166 		status |= PCIM_PSTAT_D3;
2167 		break;
2168 	default:
2169 		return (EINVAL);
2170 	}
2171 
2172 	if (bootverbose)
2173 		kprintf(
2174 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2175 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2176 		    dinfo->cfg.func, oldstate, state);
2177 
2178 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2179 	if (delay)
2180 		DELAY(delay);
2181 	return (0);
2182 }
2183 
2184 int
2185 pci_get_powerstate_method(device_t dev, device_t child)
2186 {
2187 	struct pci_devinfo *dinfo = device_get_ivars(child);
2188 	pcicfgregs *cfg = &dinfo->cfg;
2189 	uint16_t status;
2190 	int result;
2191 
2192 	if (cfg->pp.pp_cap != 0) {
2193 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2194 		switch (status & PCIM_PSTAT_DMASK) {
2195 		case PCIM_PSTAT_D0:
2196 			result = PCI_POWERSTATE_D0;
2197 			break;
2198 		case PCIM_PSTAT_D1:
2199 			result = PCI_POWERSTATE_D1;
2200 			break;
2201 		case PCIM_PSTAT_D2:
2202 			result = PCI_POWERSTATE_D2;
2203 			break;
2204 		case PCIM_PSTAT_D3:
2205 			result = PCI_POWERSTATE_D3;
2206 			break;
2207 		default:
2208 			result = PCI_POWERSTATE_UNKNOWN;
2209 			break;
2210 		}
2211 	} else {
2212 		/* No support, device is always at D0 */
2213 		result = PCI_POWERSTATE_D0;
2214 	}
2215 	return (result);
2216 }
2217 
2218 /*
2219  * Some convenience functions for PCI device drivers.
2220  */
2221 
2222 static __inline void
2223 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2224 {
2225 	uint16_t	command;
2226 
2227 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2228 	command |= bit;
2229 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2230 }
2231 
2232 static __inline void
2233 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2234 {
2235 	uint16_t	command;
2236 
2237 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2238 	command &= ~bit;
2239 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2240 }
2241 
2242 int
2243 pci_enable_busmaster_method(device_t dev, device_t child)
2244 {
2245 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2246 	return (0);
2247 }
2248 
2249 int
2250 pci_disable_busmaster_method(device_t dev, device_t child)
2251 {
2252 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2253 	return (0);
2254 }
2255 
2256 int
2257 pci_enable_io_method(device_t dev, device_t child, int space)
2258 {
2259 	uint16_t command;
2260 	uint16_t bit;
2261 	char *error;
2262 
2263 	bit = 0;
2264 	error = NULL;
2265 
2266 	switch(space) {
2267 	case SYS_RES_IOPORT:
2268 		bit = PCIM_CMD_PORTEN;
2269 		error = "port";
2270 		break;
2271 	case SYS_RES_MEMORY:
2272 		bit = PCIM_CMD_MEMEN;
2273 		error = "memory";
2274 		break;
2275 	default:
2276 		return (EINVAL);
2277 	}
2278 	pci_set_command_bit(dev, child, bit);
2279 	/* Some devices seem to need a brief stall here, what do to? */
2280 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2281 	if (command & bit)
2282 		return (0);
2283 	device_printf(child, "failed to enable %s mapping!\n", error);
2284 	return (ENXIO);
2285 }
2286 
2287 int
2288 pci_disable_io_method(device_t dev, device_t child, int space)
2289 {
2290 	uint16_t command;
2291 	uint16_t bit;
2292 	char *error;
2293 
2294 	bit = 0;
2295 	error = NULL;
2296 
2297 	switch(space) {
2298 	case SYS_RES_IOPORT:
2299 		bit = PCIM_CMD_PORTEN;
2300 		error = "port";
2301 		break;
2302 	case SYS_RES_MEMORY:
2303 		bit = PCIM_CMD_MEMEN;
2304 		error = "memory";
2305 		break;
2306 	default:
2307 		return (EINVAL);
2308 	}
2309 	pci_clear_command_bit(dev, child, bit);
2310 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2311 	if (command & bit) {
2312 		device_printf(child, "failed to disable %s mapping!\n", error);
2313 		return (ENXIO);
2314 	}
2315 	return (0);
2316 }
2317 
2318 /*
2319  * New style pci driver.  Parent device is either a pci-host-bridge or a
2320  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2321  */
2322 
2323 void
2324 pci_print_verbose(struct pci_devinfo *dinfo)
2325 {
2326 
2327 	if (bootverbose) {
2328 		pcicfgregs *cfg = &dinfo->cfg;
2329 
2330 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2331 		    cfg->vendor, cfg->device, cfg->revid);
2332 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2333 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2334 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2335 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2336 		    cfg->mfdev);
2337 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2338 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2339 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2340 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2341 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2342 		if (cfg->intpin > 0)
2343 			kprintf("\tintpin=%c, irq=%d\n",
2344 			    cfg->intpin +'a' -1, cfg->intline);
2345 		if (cfg->pp.pp_cap) {
2346 			uint16_t status;
2347 
2348 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2349 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2350 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2351 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2352 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2353 			    status & PCIM_PSTAT_DMASK);
2354 		}
2355 		if (cfg->msi.msi_location) {
2356 			int ctrl;
2357 
2358 			ctrl = cfg->msi.msi_ctrl;
2359 			kprintf("\tMSI supports %d message%s%s%s\n",
2360 			    cfg->msi.msi_msgnum,
2361 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2362 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2363 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2364 		}
2365 		if (cfg->msix.msix_location) {
2366 			kprintf("\tMSI-X supports %d message%s ",
2367 			    cfg->msix.msix_msgnum,
2368 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2369 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2370 				kprintf("in map 0x%x\n",
2371 				    cfg->msix.msix_table_bar);
2372 			else
2373 				kprintf("in maps 0x%x and 0x%x\n",
2374 				    cfg->msix.msix_table_bar,
2375 				    cfg->msix.msix_pba_bar);
2376 		}
2377 		pci_print_verbose_expr(cfg);
2378 	}
2379 }
2380 
2381 static void
2382 pci_print_verbose_expr(const pcicfgregs *cfg)
2383 {
2384 	const struct pcicfg_expr *expr = &cfg->expr;
2385 	const char *port_name;
2386 	uint16_t port_type;
2387 
2388 	if (!bootverbose)
2389 		return;
2390 
2391 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2392 		return;
2393 
2394 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2395 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2396 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2397 		goto back;
2398 
2399 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2400 
2401 	switch (port_type) {
2402 	case PCIE_END_POINT:
2403 		port_name = "DEVICE";
2404 		break;
2405 	case PCIE_LEG_END_POINT:
2406 		port_name = "LEGDEV";
2407 		break;
2408 	case PCIE_ROOT_PORT:
2409 		port_name = "ROOT";
2410 		break;
2411 	case PCIE_UP_STREAM_PORT:
2412 		port_name = "UPSTREAM";
2413 		break;
2414 	case PCIE_DOWN_STREAM_PORT:
2415 		port_name = "DOWNSTRM";
2416 		break;
2417 	case PCIE_PCIE2PCI_BRIDGE:
2418 		port_name = "PCIE2PCI";
2419 		break;
2420 	case PCIE_PCI2PCIE_BRIDGE:
2421 		port_name = "PCI2PCIE";
2422 		break;
2423 	default:
2424 		port_name = NULL;
2425 		break;
2426 	}
2427 	if ((port_type == PCIE_ROOT_PORT ||
2428 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2429 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2430 		port_name = NULL;
2431 	if (port_name != NULL)
2432 		kprintf("[%s]", port_name);
2433 
2434 	if (pcie_slotimpl(cfg)) {
2435 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2436 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2437 			kprintf("[HOTPLUG]");
2438 	}
2439 back:
2440 	kprintf("\n");
2441 }
2442 
2443 static int
2444 pci_porten(device_t pcib, int b, int s, int f)
2445 {
2446 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2447 		& PCIM_CMD_PORTEN) != 0;
2448 }
2449 
2450 static int
2451 pci_memen(device_t pcib, int b, int s, int f)
2452 {
2453 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2454 		& PCIM_CMD_MEMEN) != 0;
2455 }
2456 
2457 /*
2458  * Add a resource based on a pci map register. Return 1 if the map
2459  * register is a 32bit map register or 2 if it is a 64bit register.
2460  */
2461 static int
2462 pci_add_map(device_t pcib, device_t bus, device_t dev,
2463     int b, int s, int f, int reg, struct resource_list *rl, int force,
2464     int prefetch)
2465 {
2466 	uint32_t map;
2467 	pci_addr_t base;
2468 	pci_addr_t start, end, count;
2469 	uint8_t ln2size;
2470 	uint8_t ln2range;
2471 	uint32_t testval;
2472 	uint16_t cmd;
2473 	int type;
2474 	int barlen;
2475 	struct resource *res;
2476 
2477 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2478 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2479 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2480 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2481 
2482 	if (PCI_BAR_MEM(map)) {
2483 		type = SYS_RES_MEMORY;
2484 		if (map & PCIM_BAR_MEM_PREFETCH)
2485 			prefetch = 1;
2486 	} else
2487 		type = SYS_RES_IOPORT;
2488 	ln2size = pci_mapsize(testval);
2489 	ln2range = pci_maprange(testval);
2490 	base = pci_mapbase(map);
2491 	barlen = ln2range == 64 ? 2 : 1;
2492 
2493 	/*
2494 	 * For I/O registers, if bottom bit is set, and the next bit up
2495 	 * isn't clear, we know we have a BAR that doesn't conform to the
2496 	 * spec, so ignore it.  Also, sanity check the size of the data
2497 	 * areas to the type of memory involved.  Memory must be at least
2498 	 * 16 bytes in size, while I/O ranges must be at least 4.
2499 	 */
2500 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2501 		return (barlen);
2502 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2503 	    (type == SYS_RES_IOPORT && ln2size < 2))
2504 		return (barlen);
2505 
2506 	if (ln2range == 64)
2507 		/* Read the other half of a 64bit map register */
2508 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2509 	if (bootverbose) {
2510 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2511 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2512 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2513 			kprintf(", port disabled\n");
2514 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2515 			kprintf(", memory disabled\n");
2516 		else
2517 			kprintf(", enabled\n");
2518 	}
2519 
2520 	/*
2521 	 * If base is 0, then we have problems.  It is best to ignore
2522 	 * such entries for the moment.  These will be allocated later if
2523 	 * the driver specifically requests them.  However, some
2524 	 * removable busses look better when all resources are allocated,
2525 	 * so allow '0' to be overriden.
2526 	 *
2527 	 * Similarly treat maps whose values is the same as the test value
2528 	 * read back.  These maps have had all f's written to them by the
2529 	 * BIOS in an attempt to disable the resources.
2530 	 */
2531 	if (!force && (base == 0 || map == testval))
2532 		return (barlen);
2533 	if ((u_long)base != base) {
2534 		device_printf(bus,
2535 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2536 		    pci_get_domain(dev), b, s, f, reg);
2537 		return (barlen);
2538 	}
2539 
2540 	/*
2541 	 * This code theoretically does the right thing, but has
2542 	 * undesirable side effects in some cases where peripherals
2543 	 * respond oddly to having these bits enabled.  Let the user
2544 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2545 	 * default).
2546 	 */
2547 	if (pci_enable_io_modes) {
2548 		/* Turn on resources that have been left off by a lazy BIOS */
2549 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2550 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2551 			cmd |= PCIM_CMD_PORTEN;
2552 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2553 		}
2554 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2555 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2556 			cmd |= PCIM_CMD_MEMEN;
2557 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2558 		}
2559 	} else {
2560 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2561 			return (barlen);
2562 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2563 			return (barlen);
2564 	}
2565 
2566 	count = 1 << ln2size;
2567 	if (base == 0 || base == pci_mapbase(testval)) {
2568 		start = 0;	/* Let the parent decide. */
2569 		end = ~0ULL;
2570 	} else {
2571 		start = base;
2572 		end = base + (1 << ln2size) - 1;
2573 	}
2574 	resource_list_add(rl, type, reg, start, end, count, -1);
2575 
2576 	/*
2577 	 * Try to allocate the resource for this BAR from our parent
2578 	 * so that this resource range is already reserved.  The
2579 	 * driver for this device will later inherit this resource in
2580 	 * pci_alloc_resource().
2581 	 */
2582 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2583 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2584 	if (res == NULL) {
2585 		/*
2586 		 * If the allocation fails, delete the resource list
2587 		 * entry to force pci_alloc_resource() to allocate
2588 		 * resources from the parent.
2589 		 */
2590 		resource_list_delete(rl, type, reg);
2591 #ifdef PCI_BAR_CLEAR
2592 		/* Clear the BAR */
2593 		start = 0;
2594 #else	/* !PCI_BAR_CLEAR */
2595 		/*
2596 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2597 		 * PCI function, clearing the BAR causes HPET timer
2598 		 * stop ticking.
2599 		 */
2600 		if (bootverbose) {
2601 			kprintf("pci:%d:%d:%d: resource reservation failed "
2602 				"%#jx - %#jx\n", b, s, f,
2603 				(intmax_t)start, (intmax_t)end);
2604 		}
2605 		return (barlen);
2606 #endif	/* PCI_BAR_CLEAR */
2607 	} else {
2608 		start = rman_get_start(res);
2609 	}
2610 	pci_write_config(dev, reg, start, 4);
2611 	if (ln2range == 64)
2612 		pci_write_config(dev, reg + 4, start >> 32, 4);
2613 	return (barlen);
2614 }
2615 
2616 /*
2617  * For ATA devices we need to decide early what addressing mode to use.
2618  * Legacy demands that the primary and secondary ATA ports sits on the
2619  * same addresses that old ISA hardware did. This dictates that we use
2620  * those addresses and ignore the BAR's if we cannot set PCI native
2621  * addressing mode.
2622  */
2623 static void
2624 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2625     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2626 {
2627 	int rid, type, progif;
2628 #if 0
2629 	/* if this device supports PCI native addressing use it */
2630 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2631 	if ((progif & 0x8a) == 0x8a) {
2632 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2633 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2634 			kprintf("Trying ATA native PCI addressing mode\n");
2635 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2636 		}
2637 	}
2638 #endif
2639 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2640 	type = SYS_RES_IOPORT;
2641 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2642 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2643 		    prefetchmask & (1 << 0));
2644 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2645 		    prefetchmask & (1 << 1));
2646 	} else {
2647 		rid = PCIR_BAR(0);
2648 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2649 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2650 		    0, -1);
2651 		rid = PCIR_BAR(1);
2652 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2653 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2654 		    0, -1);
2655 	}
2656 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2657 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2658 		    prefetchmask & (1 << 2));
2659 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2660 		    prefetchmask & (1 << 3));
2661 	} else {
2662 		rid = PCIR_BAR(2);
2663 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2664 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2665 		    0, -1);
2666 		rid = PCIR_BAR(3);
2667 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2668 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2669 		    0, -1);
2670 	}
2671 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2672 	    prefetchmask & (1 << 4));
2673 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2674 	    prefetchmask & (1 << 5));
2675 }
2676 
2677 static void
2678 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2679 {
2680 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2681 	pcicfgregs *cfg = &dinfo->cfg;
2682 	char tunable_name[64];
2683 	int irq;
2684 
2685 	/* Has to have an intpin to have an interrupt. */
2686 	if (cfg->intpin == 0)
2687 		return;
2688 
2689 	/* Let the user override the IRQ with a tunable. */
2690 	irq = PCI_INVALID_IRQ;
2691 	ksnprintf(tunable_name, sizeof(tunable_name),
2692 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2693 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2694 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2695 		if (irq >= 255 || irq <= 0) {
2696 			irq = PCI_INVALID_IRQ;
2697 		} else {
2698 			BUS_CONFIG_INTR(bus, dev, irq,
2699 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2700 		}
2701 	}
2702 
2703 	/*
2704 	 * If we didn't get an IRQ via the tunable, then we either use the
2705 	 * IRQ value in the intline register or we ask the bus to route an
2706 	 * interrupt for us.  If force_route is true, then we only use the
2707 	 * value in the intline register if the bus was unable to assign an
2708 	 * IRQ.
2709 	 */
2710 	if (!PCI_INTERRUPT_VALID(irq)) {
2711 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2712 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2713 		if (!PCI_INTERRUPT_VALID(irq))
2714 			irq = cfg->intline;
2715 	}
2716 
2717 	/* If after all that we don't have an IRQ, just bail. */
2718 	if (!PCI_INTERRUPT_VALID(irq))
2719 		return;
2720 
2721 	/* Update the config register if it changed. */
2722 	if (irq != cfg->intline) {
2723 		cfg->intline = irq;
2724 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2725 	}
2726 
2727 	/* Add this IRQ as rid 0 interrupt resource. */
2728 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2729 	    machintr_legacy_intr_cpuid(irq));
2730 }
2731 
2732 void
2733 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2734 {
2735 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2736 	pcicfgregs *cfg = &dinfo->cfg;
2737 	struct resource_list *rl = &dinfo->resources;
2738 	struct pci_quirk *q;
2739 	int b, i, f, s;
2740 
2741 	b = cfg->bus;
2742 	s = cfg->slot;
2743 	f = cfg->func;
2744 
2745 	/* ATA devices needs special map treatment */
2746 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2747 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2748 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2749 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2750 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2751 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2752 	else
2753 		for (i = 0; i < cfg->nummaps;)
2754 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2755 			    rl, force, prefetchmask & (1 << i));
2756 
2757 	/*
2758 	 * Add additional, quirked resources.
2759 	 */
2760 	for (q = &pci_quirks[0]; q->devid; q++) {
2761 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2762 		    && q->type == PCI_QUIRK_MAP_REG)
2763 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2764 			  force, 0);
2765 	}
2766 
2767 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2768 		/*
2769 		 * Try to re-route interrupts. Sometimes the BIOS or
2770 		 * firmware may leave bogus values in these registers.
2771 		 * If the re-route fails, then just stick with what we
2772 		 * have.
2773 		 */
2774 		pci_assign_interrupt(bus, dev, 1);
2775 	}
2776 }
2777 
2778 void
2779 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2780 {
2781 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2782 	device_t pcib = device_get_parent(dev);
2783 	struct pci_devinfo *dinfo;
2784 	int maxslots;
2785 	int s, f, pcifunchigh;
2786 	uint8_t hdrtype;
2787 
2788 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2789 	    ("dinfo_size too small"));
2790 	maxslots = PCIB_MAXSLOTS(pcib);
2791 	for (s = 0; s <= maxslots; s++) {
2792 		pcifunchigh = 0;
2793 		f = 0;
2794 		DELAY(1);
2795 		hdrtype = REG(PCIR_HDRTYPE, 1);
2796 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2797 			continue;
2798 		if (hdrtype & PCIM_MFDEV)
2799 			pcifunchigh = PCI_FUNCMAX;
2800 		for (f = 0; f <= pcifunchigh; f++) {
2801 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2802 			    dinfo_size);
2803 			if (dinfo != NULL) {
2804 				pci_add_child(dev, dinfo);
2805 			}
2806 		}
2807 	}
2808 #undef REG
2809 }
2810 
2811 void
2812 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2813 {
2814 	device_t pcib;
2815 
2816 	pcib = device_get_parent(bus);
2817 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2818 	device_set_ivars(dinfo->cfg.dev, dinfo);
2819 	resource_list_init(&dinfo->resources);
2820 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2821 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2822 	pci_print_verbose(dinfo);
2823 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2824 }
2825 
2826 static int
2827 pci_probe(device_t dev)
2828 {
2829 	device_set_desc(dev, "PCI bus");
2830 
2831 	/* Allow other subclasses to override this driver. */
2832 	return (-1000);
2833 }
2834 
2835 static int
2836 pci_attach(device_t dev)
2837 {
2838 	int busno, domain;
2839 
2840 	/*
2841 	 * Since there can be multiple independantly numbered PCI
2842 	 * busses on systems with multiple PCI domains, we can't use
2843 	 * the unit number to decide which bus we are probing. We ask
2844 	 * the parent pcib what our domain and bus numbers are.
2845 	 */
2846 	domain = pcib_get_domain(dev);
2847 	busno = pcib_get_bus(dev);
2848 	if (bootverbose)
2849 		device_printf(dev, "domain=%d, physical bus=%d\n",
2850 		    domain, busno);
2851 
2852 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2853 
2854 	return (bus_generic_attach(dev));
2855 }
2856 
2857 int
2858 pci_suspend(device_t dev)
2859 {
2860 	int dstate, error, i, numdevs;
2861 	device_t acpi_dev, child, *devlist;
2862 	struct pci_devinfo *dinfo;
2863 
2864 	/*
2865 	 * Save the PCI configuration space for each child and set the
2866 	 * device in the appropriate power state for this sleep state.
2867 	 */
2868 	acpi_dev = NULL;
2869 	if (pci_do_power_resume)
2870 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2871 	device_get_children(dev, &devlist, &numdevs);
2872 	for (i = 0; i < numdevs; i++) {
2873 		child = devlist[i];
2874 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2875 		pci_cfg_save(child, dinfo, 0);
2876 	}
2877 
2878 	/* Suspend devices before potentially powering them down. */
2879 	error = bus_generic_suspend(dev);
2880 	if (error) {
2881 		kfree(devlist, M_TEMP);
2882 		return (error);
2883 	}
2884 
2885 	/*
2886 	 * Always set the device to D3.  If ACPI suggests a different
2887 	 * power state, use it instead.  If ACPI is not present, the
2888 	 * firmware is responsible for managing device power.  Skip
2889 	 * children who aren't attached since they are powered down
2890 	 * separately.  Only manage type 0 devices for now.
2891 	 */
2892 	for (i = 0; acpi_dev && i < numdevs; i++) {
2893 		child = devlist[i];
2894 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2895 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2896 			dstate = PCI_POWERSTATE_D3;
2897 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2898 			pci_set_powerstate(child, dstate);
2899 		}
2900 	}
2901 	kfree(devlist, M_TEMP);
2902 	return (0);
2903 }
2904 
2905 int
2906 pci_resume(device_t dev)
2907 {
2908 	int i, numdevs;
2909 	device_t acpi_dev, child, *devlist;
2910 	struct pci_devinfo *dinfo;
2911 
2912 	/*
2913 	 * Set each child to D0 and restore its PCI configuration space.
2914 	 */
2915 	acpi_dev = NULL;
2916 	if (pci_do_power_resume)
2917 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2918 	device_get_children(dev, &devlist, &numdevs);
2919 	for (i = 0; i < numdevs; i++) {
2920 		/*
2921 		 * Notify ACPI we're going to D0 but ignore the result.  If
2922 		 * ACPI is not present, the firmware is responsible for
2923 		 * managing device power.  Only manage type 0 devices for now.
2924 		 */
2925 		child = devlist[i];
2926 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2927 		if (acpi_dev && device_is_attached(child) &&
2928 		    dinfo->cfg.hdrtype == 0) {
2929 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2930 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2931 		}
2932 
2933 		/* Now the device is powered up, restore its config space. */
2934 		pci_cfg_restore(child, dinfo);
2935 	}
2936 	kfree(devlist, M_TEMP);
2937 	return (bus_generic_resume(dev));
2938 }
2939 
2940 static void
2941 pci_load_vendor_data(void)
2942 {
2943 	caddr_t vendordata, info;
2944 
2945 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2946 		info = preload_search_info(vendordata, MODINFO_ADDR);
2947 		pci_vendordata = *(char **)info;
2948 		info = preload_search_info(vendordata, MODINFO_SIZE);
2949 		pci_vendordata_size = *(size_t *)info;
2950 		/* terminate the database */
2951 		pci_vendordata[pci_vendordata_size] = '\n';
2952 	}
2953 }
2954 
2955 void
2956 pci_driver_added(device_t dev, driver_t *driver)
2957 {
2958 	int numdevs;
2959 	device_t *devlist;
2960 	device_t child;
2961 	struct pci_devinfo *dinfo;
2962 	int i;
2963 
2964 	if (bootverbose)
2965 		device_printf(dev, "driver added\n");
2966 	DEVICE_IDENTIFY(driver, dev);
2967 	device_get_children(dev, &devlist, &numdevs);
2968 	for (i = 0; i < numdevs; i++) {
2969 		child = devlist[i];
2970 		if (device_get_state(child) != DS_NOTPRESENT)
2971 			continue;
2972 		dinfo = device_get_ivars(child);
2973 		pci_print_verbose(dinfo);
2974 		if (bootverbose)
2975 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
2976 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2977 			    dinfo->cfg.func);
2978 		pci_cfg_restore(child, dinfo);
2979 		if (device_probe_and_attach(child) != 0)
2980 			pci_cfg_save(child, dinfo, 1);
2981 	}
2982 	kfree(devlist, M_TEMP);
2983 }
2984 
2985 static void
2986 pci_child_detached(device_t parent __unused, device_t child)
2987 {
2988 	/* Turn child's power off */
2989 	pci_cfg_save(child, device_get_ivars(child), 1);
2990 }
2991 
2992 int
2993 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2994     driver_intr_t *intr, void *arg, void **cookiep,
2995     lwkt_serialize_t serializer, const char *desc)
2996 {
2997 	int rid, error;
2998 	void *cookie;
2999 
3000 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3001 	    arg, &cookie, serializer, desc);
3002 	if (error)
3003 		return (error);
3004 
3005 	/* If this is not a direct child, just bail out. */
3006 	if (device_get_parent(child) != dev) {
3007 		*cookiep = cookie;
3008 		return(0);
3009 	}
3010 
3011 	rid = rman_get_rid(irq);
3012 	if (rid == 0) {
3013 		/* Make sure that INTx is enabled */
3014 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3015 	} else {
3016 		struct pci_devinfo *dinfo = device_get_ivars(child);
3017 		uint64_t addr;
3018 		uint32_t data;
3019 
3020 		/*
3021 		 * Check to see if the interrupt is MSI or MSI-X.
3022 		 * Ask our parent to map the MSI and give
3023 		 * us the address and data register values.
3024 		 * If we fail for some reason, teardown the
3025 		 * interrupt handler.
3026 		 */
3027 		if (dinfo->cfg.msi.msi_alloc > 0) {
3028 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3029 
3030 			if (msi->msi_addr == 0) {
3031 				KASSERT(msi->msi_handlers == 0,
3032 			    ("MSI has handlers, but vectors not mapped"));
3033 				error = PCIB_MAP_MSI(device_get_parent(dev),
3034 				    child, rman_get_start(irq), &addr, &data,
3035 				    rman_get_cpuid(irq));
3036 				if (error)
3037 					goto bad;
3038 				msi->msi_addr = addr;
3039 				msi->msi_data = data;
3040 				pci_enable_msi(child, addr, data);
3041 			}
3042 			msi->msi_handlers++;
3043 		} else {
3044 			struct msix_vector *mv;
3045 			u_int vector;
3046 
3047 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3048 			    ("No MSI-X or MSI rid %d allocated", rid));
3049 
3050 			mv = pci_find_msix_vector(child, rid);
3051 			KASSERT(mv != NULL,
3052 			    ("MSI-X rid %d is not allocated", rid));
3053 			KASSERT(mv->mv_address == 0,
3054 			    ("MSI-X rid %d has been setup", rid));
3055 
3056 			error = PCIB_MAP_MSI(device_get_parent(dev),
3057 			    child, rman_get_start(irq), &addr, &data,
3058 			    rman_get_cpuid(irq));
3059 			if (error)
3060 				goto bad;
3061 			mv->mv_address = addr;
3062 			mv->mv_data = data;
3063 
3064 			vector = PCI_MSIX_RID2VEC(rid);
3065 			pci_setup_msix_vector(child, vector,
3066 			    mv->mv_address, mv->mv_data);
3067 			pci_unmask_msix_vector(child, vector);
3068 		}
3069 
3070 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3071 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3072 	bad:
3073 		if (error) {
3074 			(void)bus_generic_teardown_intr(dev, child, irq,
3075 			    cookie);
3076 			return (error);
3077 		}
3078 	}
3079 	*cookiep = cookie;
3080 	return (0);
3081 }
3082 
3083 int
3084 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3085     void *cookie)
3086 {
3087 	int rid, error;
3088 
3089 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3090 		return (EINVAL);
3091 
3092 	/* If this isn't a direct child, just bail out */
3093 	if (device_get_parent(child) != dev)
3094 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3095 
3096 	rid = rman_get_rid(irq);
3097 	if (rid == 0) {
3098 		/* Mask INTx */
3099 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3100 	} else {
3101 		struct pci_devinfo *dinfo = device_get_ivars(child);
3102 
3103 		/*
3104 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3105 		 * decrement the appropriate handlers count and mask the
3106 		 * MSI-X message, or disable MSI messages if the count
3107 		 * drops to 0.
3108 		 */
3109 		if (dinfo->cfg.msi.msi_alloc > 0) {
3110 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3111 
3112 			KASSERT(rid <= msi->msi_alloc,
3113 			    ("MSI-X index too high"));
3114 			KASSERT(msi->msi_handlers > 0,
3115 			    ("MSI rid %d is not setup", rid));
3116 
3117 			msi->msi_handlers--;
3118 			if (msi->msi_handlers == 0)
3119 				pci_disable_msi(child);
3120 		} else {
3121 			struct msix_vector *mv;
3122 
3123 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3124 			    ("No MSI or MSI-X rid %d allocated", rid));
3125 
3126 			mv = pci_find_msix_vector(child, rid);
3127 			KASSERT(mv != NULL,
3128 			    ("MSI-X rid %d is not allocated", rid));
3129 			KASSERT(mv->mv_address != 0,
3130 			    ("MSI-X rid %d has not been setup", rid));
3131 
3132 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3133 			mv->mv_address = 0;
3134 			mv->mv_data = 0;
3135 		}
3136 	}
3137 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3138 	if (rid > 0)
3139 		KASSERT(error == 0,
3140 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3141 	return (error);
3142 }
3143 
3144 int
3145 pci_print_child(device_t dev, device_t child)
3146 {
3147 	struct pci_devinfo *dinfo;
3148 	struct resource_list *rl;
3149 	int retval = 0;
3150 
3151 	dinfo = device_get_ivars(child);
3152 	rl = &dinfo->resources;
3153 
3154 	retval += bus_print_child_header(dev, child);
3155 
3156 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3157 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3158 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3159 	if (device_get_flags(dev))
3160 		retval += kprintf(" flags %#x", device_get_flags(dev));
3161 
3162 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3163 	    pci_get_function(child));
3164 
3165 	retval += bus_print_child_footer(dev, child);
3166 
3167 	return (retval);
3168 }
3169 
3170 static struct
3171 {
3172 	int	class;
3173 	int	subclass;
3174 	char	*desc;
3175 } pci_nomatch_tab[] = {
3176 	{PCIC_OLD,		-1,			"old"},
3177 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3178 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3179 	{PCIC_STORAGE,		-1,			"mass storage"},
3180 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3181 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3182 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3183 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3184 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3185 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3186 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3187 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3188 	{PCIC_NETWORK,		-1,			"network"},
3189 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3190 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3191 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3192 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3193 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3194 	{PCIC_DISPLAY,		-1,			"display"},
3195 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3196 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3197 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3198 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3199 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3200 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3201 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3202 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3203 	{PCIC_MEMORY,		-1,			"memory"},
3204 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3205 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3206 	{PCIC_BRIDGE,		-1,			"bridge"},
3207 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3208 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3209 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3210 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3211 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3212 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3213 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3214 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3215 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3216 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3217 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3218 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3219 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3220 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3221 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3222 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3223 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3224 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3225 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3226 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3227 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3228 	{PCIC_INPUTDEV,		-1,			"input device"},
3229 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3230 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3231 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3232 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3233 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3234 	{PCIC_DOCKING,		-1,			"docking station"},
3235 	{PCIC_PROCESSOR,	-1,			"processor"},
3236 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3237 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3238 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3239 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3240 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3241 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3242 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3243 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3244 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3245 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3246 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3247 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3248 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3249 	{PCIC_SATCOM,		-1,			"satellite communication"},
3250 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3251 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3252 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3253 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3254 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3255 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3256 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3257 	{PCIC_DASP,		-1,			"dasp"},
3258 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3259 	{0, 0,		NULL}
3260 };
3261 
3262 void
3263 pci_probe_nomatch(device_t dev, device_t child)
3264 {
3265 	int	i;
3266 	char	*cp, *scp, *device;
3267 
3268 	/*
3269 	 * Look for a listing for this device in a loaded device database.
3270 	 */
3271 	if ((device = pci_describe_device(child)) != NULL) {
3272 		device_printf(dev, "<%s>", device);
3273 		kfree(device, M_DEVBUF);
3274 	} else {
3275 		/*
3276 		 * Scan the class/subclass descriptions for a general
3277 		 * description.
3278 		 */
3279 		cp = "unknown";
3280 		scp = NULL;
3281 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3282 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3283 				if (pci_nomatch_tab[i].subclass == -1) {
3284 					cp = pci_nomatch_tab[i].desc;
3285 				} else if (pci_nomatch_tab[i].subclass ==
3286 				    pci_get_subclass(child)) {
3287 					scp = pci_nomatch_tab[i].desc;
3288 				}
3289 			}
3290 		}
3291 		device_printf(dev, "<%s%s%s>",
3292 		    cp ? cp : "",
3293 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3294 		    scp ? scp : "");
3295 	}
3296 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3297 		pci_get_vendor(child), pci_get_device(child),
3298 		pci_get_slot(child), pci_get_function(child));
3299 	if (pci_get_intpin(child) > 0) {
3300 		int irq;
3301 
3302 		irq = pci_get_irq(child);
3303 		if (PCI_INTERRUPT_VALID(irq))
3304 			kprintf(" irq %d", irq);
3305 	}
3306 	kprintf("\n");
3307 
3308 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3309 }
3310 
3311 /*
3312  * Parse the PCI device database, if loaded, and return a pointer to a
3313  * description of the device.
3314  *
3315  * The database is flat text formatted as follows:
3316  *
3317  * Any line not in a valid format is ignored.
3318  * Lines are terminated with newline '\n' characters.
3319  *
3320  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3321  * the vendor name.
3322  *
3323  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3324  * - devices cannot be listed without a corresponding VENDOR line.
3325  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3326  * another TAB, then the device name.
3327  */
3328 
3329 /*
3330  * Assuming (ptr) points to the beginning of a line in the database,
3331  * return the vendor or device and description of the next entry.
3332  * The value of (vendor) or (device) inappropriate for the entry type
3333  * is set to -1.  Returns nonzero at the end of the database.
3334  *
3335  * Note that this is slightly unrobust in the face of corrupt data;
3336  * we attempt to safeguard against this by spamming the end of the
3337  * database with a newline when we initialise.
3338  */
3339 static int
3340 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3341 {
3342 	char	*cp = *ptr;
3343 	int	left;
3344 
3345 	*device = -1;
3346 	*vendor = -1;
3347 	**desc = '\0';
3348 	for (;;) {
3349 		left = pci_vendordata_size - (cp - pci_vendordata);
3350 		if (left <= 0) {
3351 			*ptr = cp;
3352 			return(1);
3353 		}
3354 
3355 		/* vendor entry? */
3356 		if (*cp != '\t' &&
3357 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3358 			break;
3359 		/* device entry? */
3360 		if (*cp == '\t' &&
3361 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3362 			break;
3363 
3364 		/* skip to next line */
3365 		while (*cp != '\n' && left > 0) {
3366 			cp++;
3367 			left--;
3368 		}
3369 		if (*cp == '\n') {
3370 			cp++;
3371 			left--;
3372 		}
3373 	}
3374 	/* skip to next line */
3375 	while (*cp != '\n' && left > 0) {
3376 		cp++;
3377 		left--;
3378 	}
3379 	if (*cp == '\n' && left > 0)
3380 		cp++;
3381 	*ptr = cp;
3382 	return(0);
3383 }
3384 
3385 static char *
3386 pci_describe_device(device_t dev)
3387 {
3388 	int	vendor, device;
3389 	char	*desc, *vp, *dp, *line;
3390 
3391 	desc = vp = dp = NULL;
3392 
3393 	/*
3394 	 * If we have no vendor data, we can't do anything.
3395 	 */
3396 	if (pci_vendordata == NULL)
3397 		goto out;
3398 
3399 	/*
3400 	 * Scan the vendor data looking for this device
3401 	 */
3402 	line = pci_vendordata;
3403 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3404 		goto out;
3405 	for (;;) {
3406 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3407 			goto out;
3408 		if (vendor == pci_get_vendor(dev))
3409 			break;
3410 	}
3411 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3412 		goto out;
3413 	for (;;) {
3414 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3415 			*dp = 0;
3416 			break;
3417 		}
3418 		if (vendor != -1) {
3419 			*dp = 0;
3420 			break;
3421 		}
3422 		if (device == pci_get_device(dev))
3423 			break;
3424 	}
3425 	if (dp[0] == '\0')
3426 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3427 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3428 	    NULL)
3429 		ksprintf(desc, "%s, %s", vp, dp);
3430  out:
3431 	if (vp != NULL)
3432 		kfree(vp, M_DEVBUF);
3433 	if (dp != NULL)
3434 		kfree(dp, M_DEVBUF);
3435 	return(desc);
3436 }
3437 
3438 int
3439 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3440 {
3441 	struct pci_devinfo *dinfo;
3442 	pcicfgregs *cfg;
3443 
3444 	dinfo = device_get_ivars(child);
3445 	cfg = &dinfo->cfg;
3446 
3447 	switch (which) {
3448 	case PCI_IVAR_ETHADDR:
3449 		/*
3450 		 * The generic accessor doesn't deal with failure, so
3451 		 * we set the return value, then return an error.
3452 		 */
3453 		*((uint8_t **) result) = NULL;
3454 		return (EINVAL);
3455 	case PCI_IVAR_SUBVENDOR:
3456 		*result = cfg->subvendor;
3457 		break;
3458 	case PCI_IVAR_SUBDEVICE:
3459 		*result = cfg->subdevice;
3460 		break;
3461 	case PCI_IVAR_VENDOR:
3462 		*result = cfg->vendor;
3463 		break;
3464 	case PCI_IVAR_DEVICE:
3465 		*result = cfg->device;
3466 		break;
3467 	case PCI_IVAR_DEVID:
3468 		*result = (cfg->device << 16) | cfg->vendor;
3469 		break;
3470 	case PCI_IVAR_CLASS:
3471 		*result = cfg->baseclass;
3472 		break;
3473 	case PCI_IVAR_SUBCLASS:
3474 		*result = cfg->subclass;
3475 		break;
3476 	case PCI_IVAR_PROGIF:
3477 		*result = cfg->progif;
3478 		break;
3479 	case PCI_IVAR_REVID:
3480 		*result = cfg->revid;
3481 		break;
3482 	case PCI_IVAR_INTPIN:
3483 		*result = cfg->intpin;
3484 		break;
3485 	case PCI_IVAR_IRQ:
3486 		*result = cfg->intline;
3487 		break;
3488 	case PCI_IVAR_DOMAIN:
3489 		*result = cfg->domain;
3490 		break;
3491 	case PCI_IVAR_BUS:
3492 		*result = cfg->bus;
3493 		break;
3494 	case PCI_IVAR_SLOT:
3495 		*result = cfg->slot;
3496 		break;
3497 	case PCI_IVAR_FUNCTION:
3498 		*result = cfg->func;
3499 		break;
3500 	case PCI_IVAR_CMDREG:
3501 		*result = cfg->cmdreg;
3502 		break;
3503 	case PCI_IVAR_CACHELNSZ:
3504 		*result = cfg->cachelnsz;
3505 		break;
3506 	case PCI_IVAR_MINGNT:
3507 		*result = cfg->mingnt;
3508 		break;
3509 	case PCI_IVAR_MAXLAT:
3510 		*result = cfg->maxlat;
3511 		break;
3512 	case PCI_IVAR_LATTIMER:
3513 		*result = cfg->lattimer;
3514 		break;
3515 	case PCI_IVAR_PCIXCAP_PTR:
3516 		*result = cfg->pcix.pcix_ptr;
3517 		break;
3518 	case PCI_IVAR_PCIECAP_PTR:
3519 		*result = cfg->expr.expr_ptr;
3520 		break;
3521 	case PCI_IVAR_VPDCAP_PTR:
3522 		*result = cfg->vpd.vpd_reg;
3523 		break;
3524 	default:
3525 		return (ENOENT);
3526 	}
3527 	return (0);
3528 }
3529 
3530 int
3531 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3532 {
3533 	struct pci_devinfo *dinfo;
3534 
3535 	dinfo = device_get_ivars(child);
3536 
3537 	switch (which) {
3538 	case PCI_IVAR_INTPIN:
3539 		dinfo->cfg.intpin = value;
3540 		return (0);
3541 	case PCI_IVAR_ETHADDR:
3542 	case PCI_IVAR_SUBVENDOR:
3543 	case PCI_IVAR_SUBDEVICE:
3544 	case PCI_IVAR_VENDOR:
3545 	case PCI_IVAR_DEVICE:
3546 	case PCI_IVAR_DEVID:
3547 	case PCI_IVAR_CLASS:
3548 	case PCI_IVAR_SUBCLASS:
3549 	case PCI_IVAR_PROGIF:
3550 	case PCI_IVAR_REVID:
3551 	case PCI_IVAR_IRQ:
3552 	case PCI_IVAR_DOMAIN:
3553 	case PCI_IVAR_BUS:
3554 	case PCI_IVAR_SLOT:
3555 	case PCI_IVAR_FUNCTION:
3556 		return (EINVAL);	/* disallow for now */
3557 
3558 	default:
3559 		return (ENOENT);
3560 	}
3561 }
3562 #ifdef notyet
3563 #include "opt_ddb.h"
3564 #ifdef DDB
3565 #include <ddb/ddb.h>
3566 #include <sys/cons.h>
3567 
3568 /*
3569  * List resources based on pci map registers, used for within ddb
3570  */
3571 
3572 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3573 {
3574 	struct pci_devinfo *dinfo;
3575 	struct devlist *devlist_head;
3576 	struct pci_conf *p;
3577 	const char *name;
3578 	int i, error, none_count;
3579 
3580 	none_count = 0;
3581 	/* get the head of the device queue */
3582 	devlist_head = &pci_devq;
3583 
3584 	/*
3585 	 * Go through the list of devices and print out devices
3586 	 */
3587 	for (error = 0, i = 0,
3588 	     dinfo = STAILQ_FIRST(devlist_head);
3589 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3590 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3591 
3592 		/* Populate pd_name and pd_unit */
3593 		name = NULL;
3594 		if (dinfo->cfg.dev)
3595 			name = device_get_name(dinfo->cfg.dev);
3596 
3597 		p = &dinfo->conf;
3598 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3599 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3600 			(name && *name) ? name : "none",
3601 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3602 			none_count++,
3603 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3604 			p->pc_sel.pc_func, (p->pc_class << 16) |
3605 			(p->pc_subclass << 8) | p->pc_progif,
3606 			(p->pc_subdevice << 16) | p->pc_subvendor,
3607 			(p->pc_device << 16) | p->pc_vendor,
3608 			p->pc_revid, p->pc_hdr);
3609 	}
3610 }
3611 #endif /* DDB */
3612 #endif
3613 
3614 static struct resource *
3615 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3616     u_long start, u_long end, u_long count, u_int flags)
3617 {
3618 	struct pci_devinfo *dinfo = device_get_ivars(child);
3619 	struct resource_list *rl = &dinfo->resources;
3620 	struct resource_list_entry *rle;
3621 	struct resource *res;
3622 	pci_addr_t map, testval;
3623 	int mapsize;
3624 
3625 	/*
3626 	 * Weed out the bogons, and figure out how large the BAR/map
3627 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3628 	 * Note: atapci in legacy mode are special and handled elsewhere
3629 	 * in the code.  If you have a atapci device in legacy mode and
3630 	 * it fails here, that other code is broken.
3631 	 */
3632 	res = NULL;
3633 	map = pci_read_config(child, *rid, 4);
3634 	pci_write_config(child, *rid, 0xffffffff, 4);
3635 	testval = pci_read_config(child, *rid, 4);
3636 	if (pci_maprange(testval) == 64)
3637 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3638 	if (pci_mapbase(testval) == 0)
3639 		goto out;
3640 
3641 	/*
3642 	 * Restore the original value of the BAR.  We may have reprogrammed
3643 	 * the BAR of the low-level console device and when booting verbose,
3644 	 * we need the console device addressable.
3645 	 */
3646 	pci_write_config(child, *rid, map, 4);
3647 
3648 	if (PCI_BAR_MEM(testval)) {
3649 		if (type != SYS_RES_MEMORY) {
3650 			if (bootverbose)
3651 				device_printf(dev,
3652 				    "child %s requested type %d for rid %#x,"
3653 				    " but the BAR says it is an memio\n",
3654 				    device_get_nameunit(child), type, *rid);
3655 			goto out;
3656 		}
3657 	} else {
3658 		if (type != SYS_RES_IOPORT) {
3659 			if (bootverbose)
3660 				device_printf(dev,
3661 				    "child %s requested type %d for rid %#x,"
3662 				    " but the BAR says it is an ioport\n",
3663 				    device_get_nameunit(child), type, *rid);
3664 			goto out;
3665 		}
3666 	}
3667 	/*
3668 	 * For real BARs, we need to override the size that
3669 	 * the driver requests, because that's what the BAR
3670 	 * actually uses and we would otherwise have a
3671 	 * situation where we might allocate the excess to
3672 	 * another driver, which won't work.
3673 	 */
3674 	mapsize = pci_mapsize(testval);
3675 	count = 1UL << mapsize;
3676 	if (RF_ALIGNMENT(flags) < mapsize)
3677 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3678 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3679 		flags |= RF_PREFETCHABLE;
3680 
3681 	/*
3682 	 * Allocate enough resource, and then write back the
3683 	 * appropriate bar for that resource.
3684 	 */
3685 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3686 	    start, end, count, flags, -1);
3687 	if (res == NULL) {
3688 		device_printf(child,
3689 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3690 		    count, *rid, type, start, end);
3691 		goto out;
3692 	}
3693 	resource_list_add(rl, type, *rid, start, end, count, -1);
3694 	rle = resource_list_find(rl, type, *rid);
3695 	if (rle == NULL)
3696 		panic("pci_alloc_map: unexpectedly can't find resource.");
3697 	rle->res = res;
3698 	rle->start = rman_get_start(res);
3699 	rle->end = rman_get_end(res);
3700 	rle->count = count;
3701 	if (bootverbose)
3702 		device_printf(child,
3703 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3704 		    count, *rid, type, rman_get_start(res));
3705 	map = rman_get_start(res);
3706 out:;
3707 	pci_write_config(child, *rid, map, 4);
3708 	if (pci_maprange(testval) == 64)
3709 		pci_write_config(child, *rid + 4, map >> 32, 4);
3710 	return (res);
3711 }
3712 
3713 
3714 struct resource *
3715 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3716     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3717 {
3718 	struct pci_devinfo *dinfo = device_get_ivars(child);
3719 	struct resource_list *rl = &dinfo->resources;
3720 	struct resource_list_entry *rle;
3721 	pcicfgregs *cfg = &dinfo->cfg;
3722 
3723 	/*
3724 	 * Perform lazy resource allocation
3725 	 */
3726 	if (device_get_parent(child) == dev) {
3727 		switch (type) {
3728 		case SYS_RES_IRQ:
3729 			/*
3730 			 * Can't alloc legacy interrupt once MSI messages
3731 			 * have been allocated.
3732 			 */
3733 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3734 			    cfg->msix.msix_alloc > 0))
3735 				return (NULL);
3736 			/*
3737 			 * If the child device doesn't have an
3738 			 * interrupt routed and is deserving of an
3739 			 * interrupt, try to assign it one.
3740 			 */
3741 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3742 			    (cfg->intpin != 0))
3743 				pci_assign_interrupt(dev, child, 0);
3744 			break;
3745 		case SYS_RES_IOPORT:
3746 		case SYS_RES_MEMORY:
3747 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3748 				/*
3749 				 * Enable the I/O mode.  We should
3750 				 * also be assigning resources too
3751 				 * when none are present.  The
3752 				 * resource_list_alloc kind of sorta does
3753 				 * this...
3754 				 */
3755 				if (PCI_ENABLE_IO(dev, child, type))
3756 					return (NULL);
3757 			}
3758 			rle = resource_list_find(rl, type, *rid);
3759 			if (rle == NULL)
3760 				return (pci_alloc_map(dev, child, type, rid,
3761 				    start, end, count, flags));
3762 			break;
3763 		}
3764 		/*
3765 		 * If we've already allocated the resource, then
3766 		 * return it now.  But first we may need to activate
3767 		 * it, since we don't allocate the resource as active
3768 		 * above.  Normally this would be done down in the
3769 		 * nexus, but since we short-circuit that path we have
3770 		 * to do its job here.  Not sure if we should kfree the
3771 		 * resource if it fails to activate.
3772 		 */
3773 		rle = resource_list_find(rl, type, *rid);
3774 		if (rle != NULL && rle->res != NULL) {
3775 			if (bootverbose)
3776 				device_printf(child,
3777 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3778 				    rman_get_size(rle->res), *rid, type,
3779 				    rman_get_start(rle->res));
3780 			if ((flags & RF_ACTIVE) &&
3781 			    bus_generic_activate_resource(dev, child, type,
3782 			    *rid, rle->res) != 0)
3783 				return (NULL);
3784 			return (rle->res);
3785 		}
3786 	}
3787 	return (resource_list_alloc(rl, dev, child, type, rid,
3788 	    start, end, count, flags, cpuid));
3789 }
3790 
3791 void
3792 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3793 {
3794 	struct pci_devinfo *dinfo;
3795 	struct resource_list *rl;
3796 	struct resource_list_entry *rle;
3797 
3798 	if (device_get_parent(child) != dev)
3799 		return;
3800 
3801 	dinfo = device_get_ivars(child);
3802 	rl = &dinfo->resources;
3803 	rle = resource_list_find(rl, type, rid);
3804 	if (rle) {
3805 		if (rle->res) {
3806 			if (rman_get_device(rle->res) != dev ||
3807 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3808 				device_printf(dev, "delete_resource: "
3809 				    "Resource still owned by child, oops. "
3810 				    "(type=%d, rid=%d, addr=%lx)\n",
3811 				    rle->type, rle->rid,
3812 				    rman_get_start(rle->res));
3813 				return;
3814 			}
3815 			bus_release_resource(dev, type, rid, rle->res);
3816 		}
3817 		resource_list_delete(rl, type, rid);
3818 	}
3819 	/*
3820 	 * Why do we turn off the PCI configuration BAR when we delete a
3821 	 * resource? -- imp
3822 	 */
3823 	pci_write_config(child, rid, 0, 4);
3824 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3825 }
3826 
3827 struct resource_list *
3828 pci_get_resource_list (device_t dev, device_t child)
3829 {
3830 	struct pci_devinfo *dinfo = device_get_ivars(child);
3831 
3832 	if (dinfo == NULL)
3833 		return (NULL);
3834 
3835 	return (&dinfo->resources);
3836 }
3837 
3838 uint32_t
3839 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3840 {
3841 	struct pci_devinfo *dinfo = device_get_ivars(child);
3842 	pcicfgregs *cfg = &dinfo->cfg;
3843 
3844 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3845 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3846 }
3847 
3848 void
3849 pci_write_config_method(device_t dev, device_t child, int reg,
3850     uint32_t val, int width)
3851 {
3852 	struct pci_devinfo *dinfo = device_get_ivars(child);
3853 	pcicfgregs *cfg = &dinfo->cfg;
3854 
3855 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3856 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3857 }
3858 
3859 int
3860 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3861     size_t buflen)
3862 {
3863 
3864 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3865 	    pci_get_function(child));
3866 	return (0);
3867 }
3868 
3869 int
3870 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3871     size_t buflen)
3872 {
3873 	struct pci_devinfo *dinfo;
3874 	pcicfgregs *cfg;
3875 
3876 	dinfo = device_get_ivars(child);
3877 	cfg = &dinfo->cfg;
3878 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3879 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3880 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3881 	    cfg->progif);
3882 	return (0);
3883 }
3884 
3885 int
3886 pci_assign_interrupt_method(device_t dev, device_t child)
3887 {
3888 	struct pci_devinfo *dinfo = device_get_ivars(child);
3889 	pcicfgregs *cfg = &dinfo->cfg;
3890 
3891 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3892 	    cfg->intpin));
3893 }
3894 
3895 static int
3896 pci_modevent(module_t mod, int what, void *arg)
3897 {
3898 	static struct cdev *pci_cdev;
3899 
3900 	switch (what) {
3901 	case MOD_LOAD:
3902 		STAILQ_INIT(&pci_devq);
3903 		pci_generation = 0;
3904 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3905 				    "pci");
3906 		pci_load_vendor_data();
3907 		break;
3908 
3909 	case MOD_UNLOAD:
3910 		destroy_dev(pci_cdev);
3911 		break;
3912 	}
3913 
3914 	return (0);
3915 }
3916 
3917 void
3918 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3919 {
3920 	int i;
3921 
3922 	/*
3923 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3924 	 * which we know need special treatment.  Type 2 devices are
3925 	 * cardbus bridges which also require special treatment.
3926 	 * Other types are unknown, and we err on the side of safety
3927 	 * by ignoring them.
3928 	 */
3929 	if (dinfo->cfg.hdrtype != 0)
3930 		return;
3931 
3932 	/*
3933 	 * Restore the device to full power mode.  We must do this
3934 	 * before we restore the registers because moving from D3 to
3935 	 * D0 will cause the chip's BARs and some other registers to
3936 	 * be reset to some unknown power on reset values.  Cut down
3937 	 * the noise on boot by doing nothing if we are already in
3938 	 * state D0.
3939 	 */
3940 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3941 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3942 	}
3943 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3944 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3945 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3946 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3947 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3948 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3949 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3950 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3951 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3952 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3953 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3954 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3955 
3956 	/* Restore MSI and MSI-X configurations if they are present. */
3957 	if (dinfo->cfg.msi.msi_location != 0)
3958 		pci_resume_msi(dev);
3959 	if (dinfo->cfg.msix.msix_location != 0)
3960 		pci_resume_msix(dev);
3961 }
3962 
3963 void
3964 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3965 {
3966 	int i;
3967 	uint32_t cls;
3968 	int ps;
3969 
3970 	/*
3971 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3972 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3973 	 * which also require special treatment.  Other types are unknown, and
3974 	 * we err on the side of safety by ignoring them.  Powering down
3975 	 * bridges should not be undertaken lightly.
3976 	 */
3977 	if (dinfo->cfg.hdrtype != 0)
3978 		return;
3979 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3980 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3981 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3982 
3983 	/*
3984 	 * Some drivers apparently write to these registers w/o updating our
3985 	 * cached copy.  No harm happens if we update the copy, so do so here
3986 	 * so we can restore them.  The COMMAND register is modified by the
3987 	 * bus w/o updating the cache.  This should represent the normally
3988 	 * writable portion of the 'defined' part of type 0 headers.  In
3989 	 * theory we also need to save/restore the PCI capability structures
3990 	 * we know about, but apart from power we don't know any that are
3991 	 * writable.
3992 	 */
3993 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3994 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3995 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3996 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3997 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3998 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
3999 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4000 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4001 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4002 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4003 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4004 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4005 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4006 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4007 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4008 
4009 	/*
4010 	 * don't set the state for display devices, base peripherals and
4011 	 * memory devices since bad things happen when they are powered down.
4012 	 * We should (a) have drivers that can easily detach and (b) use
4013 	 * generic drivers for these devices so that some device actually
4014 	 * attaches.  We need to make sure that when we implement (a) we don't
4015 	 * power the device down on a reattach.
4016 	 */
4017 	cls = pci_get_class(dev);
4018 	if (!setstate)
4019 		return;
4020 	switch (pci_do_power_nodriver)
4021 	{
4022 		case 0:		/* NO powerdown at all */
4023 			return;
4024 		case 1:		/* Conservative about what to power down */
4025 			if (cls == PCIC_STORAGE)
4026 				return;
4027 			/*FALLTHROUGH*/
4028 		case 2:		/* Agressive about what to power down */
4029 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4030 			    cls == PCIC_BASEPERIPH)
4031 				return;
4032 			/*FALLTHROUGH*/
4033 		case 3:		/* Power down everything */
4034 			break;
4035 	}
4036 	/*
4037 	 * PCI spec says we can only go into D3 state from D0 state.
4038 	 * Transition from D[12] into D0 before going to D3 state.
4039 	 */
4040 	ps = pci_get_powerstate(dev);
4041 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4042 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4043 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4044 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4045 }
4046 
4047 #ifdef COMPAT_OLDPCI
4048 
4049 /*
4050  * Locate the parent of a PCI device by scanning the PCI devlist
4051  * and return the entry for the parent.
4052  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4053  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4054  */
4055 pcicfgregs *
4056 pci_devlist_get_parent(pcicfgregs *cfg)
4057 {
4058 	struct devlist *devlist_head;
4059 	struct pci_devinfo *dinfo;
4060 	pcicfgregs *bridge_cfg;
4061 	int i;
4062 
4063 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4064 
4065 	/* If the device is on PCI bus 0, look for the host */
4066 	if (cfg->bus == 0) {
4067 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4068 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4069 			bridge_cfg = &dinfo->cfg;
4070 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4071 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4072 		    		&& bridge_cfg->bus == cfg->bus) {
4073 				return bridge_cfg;
4074 			}
4075 		}
4076 	}
4077 
4078 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4079 	if (cfg->bus > 0) {
4080 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4081 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4082 			bridge_cfg = &dinfo->cfg;
4083 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4084 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4085 				&& bridge_cfg->secondarybus == cfg->bus) {
4086 				return bridge_cfg;
4087 			}
4088 		}
4089 	}
4090 
4091 	return NULL;
4092 }
4093 
4094 #endif	/* COMPAT_OLDPCI */
4095 
4096 int
4097 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4098 {
4099 	int rid, type;
4100 	u_int flags;
4101 
4102 	rid = 0;
4103 	type = PCI_INTR_TYPE_LEGACY;
4104 	flags = RF_SHAREABLE | RF_ACTIVE;
4105 
4106 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4107 	if (msi_enable) {
4108 		int cpu;
4109 
4110 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4111 		if (cpu >= ncpus)
4112 			cpu = ncpus - 1;
4113 
4114 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4115 			flags &= ~RF_SHAREABLE;
4116 			type = PCI_INTR_TYPE_MSI;
4117 		}
4118 	}
4119 
4120 	*rid0 = rid;
4121 	*flags0 = flags;
4122 
4123 	return type;
4124 }
4125