xref: /dragonfly/sys/bus/pci/pci.c (revision ce7a3582)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_bus.h"
32 #include "opt_acpi.h"
33 #include "opt_compat_oldpci.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 #include <sys/machintr.h>
47 
48 #include <machine/msi_machdep.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 
54 #include <sys/bus.h>
55 #include <sys/rman.h>
56 #include <sys/device.h>
57 
58 #include <sys/pciio.h>
59 #include <bus/pci/pcireg.h>
60 #include <bus/pci/pcivar.h>
61 #include <bus/pci/pci_private.h>
62 
63 #include "pcib_if.h"
64 #include "pci_if.h"
65 
66 #ifdef __HAVE_ACPI
67 #include <contrib/dev/acpica/acpi.h>
68 #include "acpi_if.h"
69 #else
70 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
71 #endif
72 
73 extern struct dev_ops pcic_ops;	/* XXX */
74 
75 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
76 
77 static uint32_t		pci_mapbase(unsigned mapreg);
78 static const char	*pci_maptype(unsigned mapreg);
79 static int		pci_mapsize(unsigned testval);
80 static int		pci_maprange(unsigned mapreg);
81 static void		pci_fixancient(pcicfgregs *cfg);
82 
83 static int		pci_porten(device_t pcib, int b, int s, int f);
84 static int		pci_memen(device_t pcib, int b, int s, int f);
85 static void		pci_assign_interrupt(device_t bus, device_t dev,
86 			    int force_route);
87 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
88 			    int b, int s, int f, int reg,
89 			    struct resource_list *rl, int force, int prefetch);
90 static int		pci_probe(device_t dev);
91 static int		pci_attach(device_t dev);
92 static void		pci_child_detached(device_t, device_t);
93 static void		pci_load_vendor_data(void);
94 static int		pci_describe_parse_line(char **ptr, int *vendor,
95 			    int *device, char **desc);
96 static char		*pci_describe_device(device_t dev);
97 static int		pci_modevent(module_t mod, int what, void *arg);
98 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
99 			    pcicfgregs *cfg);
100 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
101 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t *data);
103 #if 0
104 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t data);
106 #endif
107 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
108 static void		pci_disable_msi(device_t dev);
109 static void		pci_enable_msi(device_t dev, uint64_t address,
110 			    uint16_t data);
111 static void		pci_setup_msix_vector(device_t dev, u_int index,
112 			    uint64_t address, uint32_t data);
113 static void		pci_mask_msix_vector(device_t dev, u_int index);
114 static void		pci_unmask_msix_vector(device_t dev, u_int index);
115 static void		pci_mask_msix_allvectors(device_t dev);
116 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
117 static int		pci_msi_blacklisted(void);
118 static void		pci_resume_msi(device_t dev);
119 static void		pci_resume_msix(device_t dev);
120 static int		pcie_slotimpl(const pcicfgregs *);
121 static void		pci_print_verbose_expr(const pcicfgregs *);
122 
123 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_subvendor(device_t, int, int,
129 			    pcicfgregs *);
130 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
131 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
132 
133 static device_method_t pci_methods[] = {
134 	/* Device interface */
135 	DEVMETHOD(device_probe,		pci_probe),
136 	DEVMETHOD(device_attach,	pci_attach),
137 	DEVMETHOD(device_detach,	bus_generic_detach),
138 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
139 	DEVMETHOD(device_suspend,	pci_suspend),
140 	DEVMETHOD(device_resume,	pci_resume),
141 
142 	/* Bus interface */
143 	DEVMETHOD(bus_print_child,	pci_print_child),
144 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
145 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
146 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
147 	DEVMETHOD(bus_driver_added,	pci_driver_added),
148 	DEVMETHOD(bus_child_detached,	pci_child_detached),
149 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
150 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
151 
152 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
153 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
154 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
155 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
156 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
157 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
158 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
159 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
160 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
161 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
162 
163 	/* PCI interface */
164 	DEVMETHOD(pci_read_config,	pci_read_config_method),
165 	DEVMETHOD(pci_write_config,	pci_write_config_method),
166 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
167 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
168 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
169 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
170 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
171 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
172 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
173 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
174 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
175 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
176 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
179 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
180 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
181 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
182 
183 	{ 0, 0 }
184 };
185 
186 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
187 
188 static devclass_t pci_devclass;
189 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
190 MODULE_VERSION(pci, 1);
191 
192 static char	*pci_vendordata;
193 static size_t	pci_vendordata_size;
194 
195 
196 static const struct pci_read_cap {
197 	int		cap;
198 	pci_read_cap_t	read_cap;
199 } pci_read_caps[] = {
200 	{ PCIY_PMG,		pci_read_cap_pmgt },
201 	{ PCIY_HT,		pci_read_cap_ht },
202 	{ PCIY_MSI,		pci_read_cap_msi },
203 	{ PCIY_MSIX,		pci_read_cap_msix },
204 	{ PCIY_VPD,		pci_read_cap_vpd },
205 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
206 	{ PCIY_PCIX,		pci_read_cap_pcix },
207 	{ PCIY_EXPRESS,		pci_read_cap_express },
208 	{ 0, NULL } /* required last entry */
209 };
210 
211 struct pci_quirk {
212 	uint32_t devid;	/* Vendor/device of the card */
213 	int	type;
214 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
215 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
216 	int	arg1;
217 	int	arg2;
218 };
219 
220 struct pci_quirk pci_quirks[] = {
221 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
222 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
224 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
225 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 
227 	/*
228 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
229 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
230 	 */
231 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 
234 	/*
235 	 * MSI doesn't work on earlier Intel chipsets including
236 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
237 	 */
238 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 
246 	/*
247 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
248 	 * bridge.
249 	 */
250 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
251 
252 	{ 0 }
253 };
254 
255 /* map register information */
256 #define	PCI_MAPMEM	0x01	/* memory map */
257 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
258 #define	PCI_MAPPORT	0x04	/* port map */
259 
260 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
261 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
262 
263 struct devlist pci_devq;
264 uint32_t pci_generation;
265 uint32_t pci_numdevs = 0;
266 static int pcie_chipset, pcix_chipset;
267 
268 /* sysctl vars */
269 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
270 
271 static int pci_enable_io_modes = 1;
272 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
273 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
274     &pci_enable_io_modes, 1,
275     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
276 enable these bits correctly.  We'd like to do this all the time, but there\n\
277 are some peripherals that this causes problems with.");
278 
279 static int pci_do_power_nodriver = 0;
280 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
281 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
282     &pci_do_power_nodriver, 0,
283   "Place a function into D3 state when no driver attaches to it.  0 means\n\
284 disable.  1 means conservatively place devices into D3 state.  2 means\n\
285 aggressively place devices into D3 state.  3 means put absolutely everything\n\
286 in D3 state.");
287 
288 static int pci_do_power_resume = 1;
289 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
290 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
291     &pci_do_power_resume, 1,
292   "Transition from D3 -> D0 on resume.");
293 
294 static int pci_do_msi = 1;
295 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
296 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
297     "Enable support for MSI interrupts");
298 
299 static int pci_do_msix = 1;
300 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
301 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
302     "Enable support for MSI-X interrupts");
303 
304 static int pci_honor_msi_blacklist = 1;
305 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
306 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
307     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
308 
309 static int pci_msi_cpuid;
310 
311 /* Find a device_t by bus/slot/function in domain 0 */
312 
313 device_t
314 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
315 {
316 
317 	return (pci_find_dbsf(0, bus, slot, func));
318 }
319 
320 /* Find a device_t by domain/bus/slot/function */
321 
322 device_t
323 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
324 {
325 	struct pci_devinfo *dinfo;
326 
327 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
328 		if ((dinfo->cfg.domain == domain) &&
329 		    (dinfo->cfg.bus == bus) &&
330 		    (dinfo->cfg.slot == slot) &&
331 		    (dinfo->cfg.func == func)) {
332 			return (dinfo->cfg.dev);
333 		}
334 	}
335 
336 	return (NULL);
337 }
338 
339 /* Find a device_t by vendor/device ID */
340 
341 device_t
342 pci_find_device(uint16_t vendor, uint16_t device)
343 {
344 	struct pci_devinfo *dinfo;
345 
346 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
347 		if ((dinfo->cfg.vendor == vendor) &&
348 		    (dinfo->cfg.device == device)) {
349 			return (dinfo->cfg.dev);
350 		}
351 	}
352 
353 	return (NULL);
354 }
355 
356 /* return base address of memory or port map */
357 
358 static uint32_t
359 pci_mapbase(uint32_t mapreg)
360 {
361 
362 	if (PCI_BAR_MEM(mapreg))
363 		return (mapreg & PCIM_BAR_MEM_BASE);
364 	else
365 		return (mapreg & PCIM_BAR_IO_BASE);
366 }
367 
368 /* return map type of memory or port map */
369 
370 static const char *
371 pci_maptype(unsigned mapreg)
372 {
373 
374 	if (PCI_BAR_IO(mapreg))
375 		return ("I/O Port");
376 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
377 		return ("Prefetchable Memory");
378 	return ("Memory");
379 }
380 
381 /* return log2 of map size decoded for memory or port map */
382 
383 static int
384 pci_mapsize(uint32_t testval)
385 {
386 	int ln2size;
387 
388 	testval = pci_mapbase(testval);
389 	ln2size = 0;
390 	if (testval != 0) {
391 		while ((testval & 1) == 0)
392 		{
393 			ln2size++;
394 			testval >>= 1;
395 		}
396 	}
397 	return (ln2size);
398 }
399 
400 /* return log2 of address range supported by map register */
401 
402 static int
403 pci_maprange(unsigned mapreg)
404 {
405 	int ln2range = 0;
406 
407 	if (PCI_BAR_IO(mapreg))
408 		ln2range = 32;
409 	else
410 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
411 		case PCIM_BAR_MEM_32:
412 			ln2range = 32;
413 			break;
414 		case PCIM_BAR_MEM_1MB:
415 			ln2range = 20;
416 			break;
417 		case PCIM_BAR_MEM_64:
418 			ln2range = 64;
419 			break;
420 		}
421 	return (ln2range);
422 }
423 
424 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
425 
426 static void
427 pci_fixancient(pcicfgregs *cfg)
428 {
429 	if (cfg->hdrtype != 0)
430 		return;
431 
432 	/* PCI to PCI bridges use header type 1 */
433 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
434 		cfg->hdrtype = 1;
435 }
436 
437 /* extract header type specific config data */
438 
439 static void
440 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
441 {
442 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
443 	switch (cfg->hdrtype) {
444 	case 0:
445 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
446 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
447 		cfg->nummaps	    = PCI_MAXMAPS_0;
448 		break;
449 	case 1:
450 		cfg->nummaps	    = PCI_MAXMAPS_1;
451 #ifdef COMPAT_OLDPCI
452 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
453 #endif
454 		break;
455 	case 2:
456 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
457 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
458 		cfg->nummaps	    = PCI_MAXMAPS_2;
459 #ifdef COMPAT_OLDPCI
460 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
461 #endif
462 		break;
463 	}
464 #undef REG
465 }
466 
467 /* read configuration header into pcicfgregs structure */
468 struct pci_devinfo *
469 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
470 {
471 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
472 	pcicfgregs *cfg = NULL;
473 	struct pci_devinfo *devlist_entry;
474 	struct devlist *devlist_head;
475 
476 	devlist_head = &pci_devq;
477 
478 	devlist_entry = NULL;
479 
480 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
481 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
482 
483 		cfg = &devlist_entry->cfg;
484 
485 		cfg->domain		= d;
486 		cfg->bus		= b;
487 		cfg->slot		= s;
488 		cfg->func		= f;
489 		cfg->vendor		= REG(PCIR_VENDOR, 2);
490 		cfg->device		= REG(PCIR_DEVICE, 2);
491 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
492 		cfg->statreg		= REG(PCIR_STATUS, 2);
493 		cfg->baseclass		= REG(PCIR_CLASS, 1);
494 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
495 		cfg->progif		= REG(PCIR_PROGIF, 1);
496 		cfg->revid		= REG(PCIR_REVID, 1);
497 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
498 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
499 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
500 		cfg->intpin		= REG(PCIR_INTPIN, 1);
501 		cfg->intline		= REG(PCIR_INTLINE, 1);
502 
503 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
504 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
505 
506 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
507 		cfg->hdrtype		&= ~PCIM_MFDEV;
508 
509 		pci_fixancient(cfg);
510 		pci_hdrtypedata(pcib, b, s, f, cfg);
511 
512 		pci_read_capabilities(pcib, cfg);
513 
514 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
515 
516 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
517 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
518 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
519 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
520 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
521 
522 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
523 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
524 		devlist_entry->conf.pc_vendor = cfg->vendor;
525 		devlist_entry->conf.pc_device = cfg->device;
526 
527 		devlist_entry->conf.pc_class = cfg->baseclass;
528 		devlist_entry->conf.pc_subclass = cfg->subclass;
529 		devlist_entry->conf.pc_progif = cfg->progif;
530 		devlist_entry->conf.pc_revid = cfg->revid;
531 
532 		pci_numdevs++;
533 		pci_generation++;
534 	}
535 	return (devlist_entry);
536 #undef REG
537 }
538 
539 static int
540 pci_fixup_nextptr(int *nextptr0)
541 {
542 	int nextptr = *nextptr0;
543 
544 	/* "Next pointer" is only one byte */
545 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d\n", nextptr));
546 
547 	if (nextptr & 0x3) {
548 		/*
549 		 * PCI local bus spec 3.0:
550 		 *
551 		 * "... The bottom two bits of all pointers are reserved
552 		 *  and must be implemented as 00b although software must
553 		 *  mask them to allow for future uses of these bits ..."
554 		 */
555 		if (bootverbose) {
556 			kprintf("Illegal PCI extended capability "
557 				"offset, fixup 0x%02x -> 0x%02x\n",
558 				nextptr, nextptr & ~0x3);
559 		}
560 		nextptr &= ~0x3;
561 	}
562 	*nextptr0 = nextptr;
563 
564 	if (nextptr < 0x40) {
565 		if (nextptr != 0) {
566 			kprintf("Illegal PCI extended capability "
567 				"offset 0x%02x", nextptr);
568 		}
569 		return 0;
570 	}
571 	return 1;
572 }
573 
574 static void
575 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
576 {
577 #define REG(n, w)	\
578 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
579 
580 	struct pcicfg_pp *pp = &cfg->pp;
581 
582 	if (pp->pp_cap)
583 		return;
584 
585 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
586 	pp->pp_status = ptr + PCIR_POWER_STATUS;
587 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
588 
589 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
590 		/*
591 		 * XXX
592 		 * We should write to data_select and read back from
593 		 * data_scale to determine whether data register is
594 		 * implemented.
595 		 */
596 #ifdef foo
597 		pp->pp_data = ptr + PCIR_POWER_DATA;
598 #else
599 		pp->pp_data = 0;
600 #endif
601 	}
602 
603 #undef REG
604 }
605 
606 static void
607 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
608 {
609 #if defined(__i386__) || defined(__x86_64__)
610 
611 #define REG(n, w)	\
612 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
613 
614 	struct pcicfg_ht *ht = &cfg->ht;
615 	uint64_t addr;
616 	uint32_t val;
617 
618 	/* Determine HT-specific capability type. */
619 	val = REG(ptr + PCIR_HT_COMMAND, 2);
620 
621 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
622 		cfg->ht.ht_slave = ptr;
623 
624 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
625 		return;
626 
627 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
628 		/* Sanity check the mapping window. */
629 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
630 		addr <<= 32;
631 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
632 		if (addr != MSI_X86_ADDR_BASE) {
633 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
634 				"has non-default MSI window 0x%llx\n",
635 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
636 				(long long)addr);
637 		}
638 	} else {
639 		addr = MSI_X86_ADDR_BASE;
640 	}
641 
642 	ht->ht_msimap = ptr;
643 	ht->ht_msictrl = val;
644 	ht->ht_msiaddr = addr;
645 
646 #undef REG
647 
648 #endif	/* __i386__ || __x86_64__ */
649 }
650 
651 static void
652 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
653 {
654 #define REG(n, w)	\
655 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
656 
657 	struct pcicfg_msi *msi = &cfg->msi;
658 
659 	msi->msi_location = ptr;
660 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
661 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
662 
663 #undef REG
664 }
665 
666 static void
667 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
668 {
669 #define REG(n, w)	\
670 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
671 
672 	struct pcicfg_msix *msix = &cfg->msix;
673 	uint32_t val;
674 
675 	msix->msix_location = ptr;
676 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
677 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
678 
679 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
680 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
681 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
682 
683 	val = REG(ptr + PCIR_MSIX_PBA, 4);
684 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
685 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
686 
687 	TAILQ_INIT(&msix->msix_vectors);
688 
689 #undef REG
690 }
691 
692 static void
693 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
694 {
695 	cfg->vpd.vpd_reg = ptr;
696 }
697 
698 static void
699 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
700 {
701 #define REG(n, w)	\
702 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
703 
704 	/* Should always be true. */
705 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
706 		uint32_t val;
707 
708 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
709 		cfg->subvendor = val & 0xffff;
710 		cfg->subdevice = val >> 16;
711 	}
712 
713 #undef REG
714 }
715 
716 static void
717 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
718 {
719 	/*
720 	 * Assume we have a PCI-X chipset if we have
721 	 * at least one PCI-PCI bridge with a PCI-X
722 	 * capability.  Note that some systems with
723 	 * PCI-express or HT chipsets might match on
724 	 * this check as well.
725 	 */
726 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
727 		pcix_chipset = 1;
728 
729 	cfg->pcix.pcix_ptr = ptr;
730 }
731 
732 static int
733 pcie_slotimpl(const pcicfgregs *cfg)
734 {
735 	const struct pcicfg_expr *expr = &cfg->expr;
736 	uint16_t port_type;
737 
738 	/*
739 	 * Only version 1 can be parsed currently
740 	 */
741 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
742 		return 0;
743 
744 	/*
745 	 * - Slot implemented bit is meaningful iff current port is
746 	 *   root port or down stream port.
747 	 * - Testing for root port or down stream port is meanningful
748 	 *   iff PCI configure has type 1 header.
749 	 */
750 
751 	if (cfg->hdrtype != 1)
752 		return 0;
753 
754 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
755 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
756 		return 0;
757 
758 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
759 		return 0;
760 
761 	return 1;
762 }
763 
764 static void
765 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
766 {
767 #define REG(n, w)	\
768 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
769 
770 	struct pcicfg_expr *expr = &cfg->expr;
771 
772 	/*
773 	 * Assume we have a PCI-express chipset if we have
774 	 * at least one PCI-express device.
775 	 */
776 	pcie_chipset = 1;
777 
778 	expr->expr_ptr = ptr;
779 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
780 
781 	/*
782 	 * Only version 1 can be parsed currently
783 	 */
784 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
785 		return;
786 
787 	/*
788 	 * Read slot capabilities.  Slot capabilities exists iff
789 	 * current port's slot is implemented
790 	 */
791 	if (pcie_slotimpl(cfg))
792 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
793 
794 #undef REG
795 }
796 
797 static void
798 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
799 {
800 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
801 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
802 
803 	uint32_t val;
804 	int nextptr, ptrptr;
805 
806 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
807 		/* No capabilities */
808 		return;
809 	}
810 
811 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
812 	case 0:
813 	case 1:
814 		ptrptr = PCIR_CAP_PTR;
815 		break;
816 	case 2:
817 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
818 		break;
819 	default:
820 		return;				/* no capabilities support */
821 	}
822 	nextptr = REG(ptrptr, 1);	/* sanity check? */
823 
824 	/*
825 	 * Read capability entries.
826 	 */
827 	while (pci_fixup_nextptr(&nextptr)) {
828 		const struct pci_read_cap *rc;
829 		int ptr = nextptr;
830 
831 		/* Find the next entry */
832 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
833 
834 		/* Process this entry */
835 		val = REG(ptr + PCICAP_ID, 1);
836 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
837 			if (rc->cap == val) {
838 				rc->read_cap(pcib, ptr, nextptr, cfg);
839 				break;
840 			}
841 		}
842 	}
843 
844 #if defined(__i386__) || defined(__x86_64__)
845 	/*
846 	 * Enable the MSI mapping window for all HyperTransport
847 	 * slaves.  PCI-PCI bridges have their windows enabled via
848 	 * PCIB_MAP_MSI().
849 	 */
850 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
851 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
852 		device_printf(pcib,
853 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
854 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
855 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
856 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
857 		     2);
858 	}
859 #endif
860 
861 /* REG and WREG use carry through to next functions */
862 }
863 
864 /*
865  * PCI Vital Product Data
866  */
867 
868 #define	PCI_VPD_TIMEOUT		1000000
869 
870 static int
871 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
872 {
873 	int count = PCI_VPD_TIMEOUT;
874 
875 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
876 
877 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
878 
879 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
880 		if (--count < 0)
881 			return (ENXIO);
882 		DELAY(1);	/* limit looping */
883 	}
884 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
885 
886 	return (0);
887 }
888 
889 #if 0
890 static int
891 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
892 {
893 	int count = PCI_VPD_TIMEOUT;
894 
895 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
896 
897 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
898 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
899 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
900 		if (--count < 0)
901 			return (ENXIO);
902 		DELAY(1);	/* limit looping */
903 	}
904 
905 	return (0);
906 }
907 #endif
908 
909 #undef PCI_VPD_TIMEOUT
910 
911 struct vpd_readstate {
912 	device_t	pcib;
913 	pcicfgregs	*cfg;
914 	uint32_t	val;
915 	int		bytesinval;
916 	int		off;
917 	uint8_t		cksum;
918 };
919 
920 static int
921 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
922 {
923 	uint32_t reg;
924 	uint8_t byte;
925 
926 	if (vrs->bytesinval == 0) {
927 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
928 			return (ENXIO);
929 		vrs->val = le32toh(reg);
930 		vrs->off += 4;
931 		byte = vrs->val & 0xff;
932 		vrs->bytesinval = 3;
933 	} else {
934 		vrs->val = vrs->val >> 8;
935 		byte = vrs->val & 0xff;
936 		vrs->bytesinval--;
937 	}
938 
939 	vrs->cksum += byte;
940 	*data = byte;
941 	return (0);
942 }
943 
944 int
945 pcie_slot_implemented(device_t dev)
946 {
947 	struct pci_devinfo *dinfo = device_get_ivars(dev);
948 
949 	return pcie_slotimpl(&dinfo->cfg);
950 }
951 
952 void
953 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
954 {
955 	uint8_t expr_ptr;
956 	uint16_t val;
957 
958 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
959 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
960 		panic("%s: invalid max read request size 0x%02x\n",
961 		      device_get_nameunit(dev), rqsize);
962 	}
963 
964 	expr_ptr = pci_get_pciecap_ptr(dev);
965 	if (!expr_ptr)
966 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
967 
968 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
969 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
970 		if (bootverbose)
971 			device_printf(dev, "adjust device control 0x%04x", val);
972 
973 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
974 		val |= rqsize;
975 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
976 
977 		if (bootverbose)
978 			kprintf(" -> 0x%04x\n", val);
979 	}
980 }
981 
982 uint16_t
983 pcie_get_max_readrq(device_t dev)
984 {
985 	uint8_t expr_ptr;
986 	uint16_t val;
987 
988 	expr_ptr = pci_get_pciecap_ptr(dev);
989 	if (!expr_ptr)
990 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
991 
992 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
993 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
994 }
995 
996 static void
997 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
998 {
999 	struct vpd_readstate vrs;
1000 	int state;
1001 	int name;
1002 	int remain;
1003 	int i;
1004 	int alloc, off;		/* alloc/off for RO/W arrays */
1005 	int cksumvalid;
1006 	int dflen;
1007 	uint8_t byte;
1008 	uint8_t byte2;
1009 
1010 	/* init vpd reader */
1011 	vrs.bytesinval = 0;
1012 	vrs.off = 0;
1013 	vrs.pcib = pcib;
1014 	vrs.cfg = cfg;
1015 	vrs.cksum = 0;
1016 
1017 	state = 0;
1018 	name = remain = i = 0;	/* shut up stupid gcc */
1019 	alloc = off = 0;	/* shut up stupid gcc */
1020 	dflen = 0;		/* shut up stupid gcc */
1021 	cksumvalid = -1;
1022 	while (state >= 0) {
1023 		if (vpd_nextbyte(&vrs, &byte)) {
1024 			state = -2;
1025 			break;
1026 		}
1027 #if 0
1028 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1029 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1030 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1031 #endif
1032 		switch (state) {
1033 		case 0:		/* item name */
1034 			if (byte & 0x80) {
1035 				if (vpd_nextbyte(&vrs, &byte2)) {
1036 					state = -2;
1037 					break;
1038 				}
1039 				remain = byte2;
1040 				if (vpd_nextbyte(&vrs, &byte2)) {
1041 					state = -2;
1042 					break;
1043 				}
1044 				remain |= byte2 << 8;
1045 				if (remain > (0x7f*4 - vrs.off)) {
1046 					state = -1;
1047 					kprintf(
1048 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1049 					    cfg->domain, cfg->bus, cfg->slot,
1050 					    cfg->func, remain);
1051 				}
1052 				name = byte & 0x7f;
1053 			} else {
1054 				remain = byte & 0x7;
1055 				name = (byte >> 3) & 0xf;
1056 			}
1057 			switch (name) {
1058 			case 0x2:	/* String */
1059 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1060 				    M_DEVBUF, M_WAITOK);
1061 				i = 0;
1062 				state = 1;
1063 				break;
1064 			case 0xf:	/* End */
1065 				state = -1;
1066 				break;
1067 			case 0x10:	/* VPD-R */
1068 				alloc = 8;
1069 				off = 0;
1070 				cfg->vpd.vpd_ros = kmalloc(alloc *
1071 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1072 				    M_WAITOK | M_ZERO);
1073 				state = 2;
1074 				break;
1075 			case 0x11:	/* VPD-W */
1076 				alloc = 8;
1077 				off = 0;
1078 				cfg->vpd.vpd_w = kmalloc(alloc *
1079 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1080 				    M_WAITOK | M_ZERO);
1081 				state = 5;
1082 				break;
1083 			default:	/* Invalid data, abort */
1084 				state = -1;
1085 				break;
1086 			}
1087 			break;
1088 
1089 		case 1:	/* Identifier String */
1090 			cfg->vpd.vpd_ident[i++] = byte;
1091 			remain--;
1092 			if (remain == 0)  {
1093 				cfg->vpd.vpd_ident[i] = '\0';
1094 				state = 0;
1095 			}
1096 			break;
1097 
1098 		case 2:	/* VPD-R Keyword Header */
1099 			if (off == alloc) {
1100 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1101 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1102 				    M_DEVBUF, M_WAITOK | M_ZERO);
1103 			}
1104 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1105 			if (vpd_nextbyte(&vrs, &byte2)) {
1106 				state = -2;
1107 				break;
1108 			}
1109 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1110 			if (vpd_nextbyte(&vrs, &byte2)) {
1111 				state = -2;
1112 				break;
1113 			}
1114 			dflen = byte2;
1115 			if (dflen == 0 &&
1116 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1117 			    2) == 0) {
1118 				/*
1119 				 * if this happens, we can't trust the rest
1120 				 * of the VPD.
1121 				 */
1122 				kprintf(
1123 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1124 				    cfg->domain, cfg->bus, cfg->slot,
1125 				    cfg->func, dflen);
1126 				cksumvalid = 0;
1127 				state = -1;
1128 				break;
1129 			} else if (dflen == 0) {
1130 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1131 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1132 				    M_DEVBUF, M_WAITOK);
1133 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1134 			} else
1135 				cfg->vpd.vpd_ros[off].value = kmalloc(
1136 				    (dflen + 1) *
1137 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1138 				    M_DEVBUF, M_WAITOK);
1139 			remain -= 3;
1140 			i = 0;
1141 			/* keep in sync w/ state 3's transistions */
1142 			if (dflen == 0 && remain == 0)
1143 				state = 0;
1144 			else if (dflen == 0)
1145 				state = 2;
1146 			else
1147 				state = 3;
1148 			break;
1149 
1150 		case 3:	/* VPD-R Keyword Value */
1151 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1152 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1153 			    "RV", 2) == 0 && cksumvalid == -1) {
1154 				if (vrs.cksum == 0)
1155 					cksumvalid = 1;
1156 				else {
1157 					if (bootverbose)
1158 						kprintf(
1159 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1160 						    cfg->domain, cfg->bus,
1161 						    cfg->slot, cfg->func,
1162 						    vrs.cksum);
1163 					cksumvalid = 0;
1164 					state = -1;
1165 					break;
1166 				}
1167 			}
1168 			dflen--;
1169 			remain--;
1170 			/* keep in sync w/ state 2's transistions */
1171 			if (dflen == 0)
1172 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1173 			if (dflen == 0 && remain == 0) {
1174 				cfg->vpd.vpd_rocnt = off;
1175 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1176 				    off * sizeof(*cfg->vpd.vpd_ros),
1177 				    M_DEVBUF, M_WAITOK | M_ZERO);
1178 				state = 0;
1179 			} else if (dflen == 0)
1180 				state = 2;
1181 			break;
1182 
1183 		case 4:
1184 			remain--;
1185 			if (remain == 0)
1186 				state = 0;
1187 			break;
1188 
1189 		case 5:	/* VPD-W Keyword Header */
1190 			if (off == alloc) {
1191 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1192 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1193 				    M_DEVBUF, M_WAITOK | M_ZERO);
1194 			}
1195 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1196 			if (vpd_nextbyte(&vrs, &byte2)) {
1197 				state = -2;
1198 				break;
1199 			}
1200 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1201 			if (vpd_nextbyte(&vrs, &byte2)) {
1202 				state = -2;
1203 				break;
1204 			}
1205 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1206 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1207 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1208 			    sizeof(*cfg->vpd.vpd_w[off].value),
1209 			    M_DEVBUF, M_WAITOK);
1210 			remain -= 3;
1211 			i = 0;
1212 			/* keep in sync w/ state 6's transistions */
1213 			if (dflen == 0 && remain == 0)
1214 				state = 0;
1215 			else if (dflen == 0)
1216 				state = 5;
1217 			else
1218 				state = 6;
1219 			break;
1220 
1221 		case 6:	/* VPD-W Keyword Value */
1222 			cfg->vpd.vpd_w[off].value[i++] = byte;
1223 			dflen--;
1224 			remain--;
1225 			/* keep in sync w/ state 5's transistions */
1226 			if (dflen == 0)
1227 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1228 			if (dflen == 0 && remain == 0) {
1229 				cfg->vpd.vpd_wcnt = off;
1230 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1231 				    off * sizeof(*cfg->vpd.vpd_w),
1232 				    M_DEVBUF, M_WAITOK | M_ZERO);
1233 				state = 0;
1234 			} else if (dflen == 0)
1235 				state = 5;
1236 			break;
1237 
1238 		default:
1239 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1240 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1241 			    state);
1242 			state = -1;
1243 			break;
1244 		}
1245 	}
1246 
1247 	if (cksumvalid == 0 || state < -1) {
1248 		/* read-only data bad, clean up */
1249 		if (cfg->vpd.vpd_ros != NULL) {
1250 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1251 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1252 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1253 			cfg->vpd.vpd_ros = NULL;
1254 		}
1255 	}
1256 	if (state < -1) {
1257 		/* I/O error, clean up */
1258 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1259 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1260 		if (cfg->vpd.vpd_ident != NULL) {
1261 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1262 			cfg->vpd.vpd_ident = NULL;
1263 		}
1264 		if (cfg->vpd.vpd_w != NULL) {
1265 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1266 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1267 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1268 			cfg->vpd.vpd_w = NULL;
1269 		}
1270 	}
1271 	cfg->vpd.vpd_cached = 1;
1272 #undef REG
1273 #undef WREG
1274 }
1275 
1276 int
1277 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1278 {
1279 	struct pci_devinfo *dinfo = device_get_ivars(child);
1280 	pcicfgregs *cfg = &dinfo->cfg;
1281 
1282 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1283 		pci_read_vpd(device_get_parent(dev), cfg);
1284 
1285 	*identptr = cfg->vpd.vpd_ident;
1286 
1287 	if (*identptr == NULL)
1288 		return (ENXIO);
1289 
1290 	return (0);
1291 }
1292 
1293 int
1294 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1295 	const char **vptr)
1296 {
1297 	struct pci_devinfo *dinfo = device_get_ivars(child);
1298 	pcicfgregs *cfg = &dinfo->cfg;
1299 	int i;
1300 
1301 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1302 		pci_read_vpd(device_get_parent(dev), cfg);
1303 
1304 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1305 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1306 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1307 			*vptr = cfg->vpd.vpd_ros[i].value;
1308 		}
1309 
1310 	if (i != cfg->vpd.vpd_rocnt)
1311 		return (0);
1312 
1313 	*vptr = NULL;
1314 	return (ENXIO);
1315 }
1316 
1317 /*
1318  * Return the offset in configuration space of the requested extended
1319  * capability entry or 0 if the specified capability was not found.
1320  */
1321 int
1322 pci_find_extcap_method(device_t dev, device_t child, int capability,
1323     int *capreg)
1324 {
1325 	struct pci_devinfo *dinfo = device_get_ivars(child);
1326 	pcicfgregs *cfg = &dinfo->cfg;
1327 	u_int32_t status;
1328 	u_int8_t ptr;
1329 
1330 	/*
1331 	 * Check the CAP_LIST bit of the PCI status register first.
1332 	 */
1333 	status = pci_read_config(child, PCIR_STATUS, 2);
1334 	if (!(status & PCIM_STATUS_CAPPRESENT))
1335 		return (ENXIO);
1336 
1337 	/*
1338 	 * Determine the start pointer of the capabilities list.
1339 	 */
1340 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1341 	case 0:
1342 	case 1:
1343 		ptr = PCIR_CAP_PTR;
1344 		break;
1345 	case 2:
1346 		ptr = PCIR_CAP_PTR_2;
1347 		break;
1348 	default:
1349 		/* XXX: panic? */
1350 		return (ENXIO);		/* no extended capabilities support */
1351 	}
1352 	ptr = pci_read_config(child, ptr, 1);
1353 
1354 	/*
1355 	 * Traverse the capabilities list.
1356 	 */
1357 	while (ptr != 0) {
1358 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1359 			if (capreg != NULL)
1360 				*capreg = ptr;
1361 			return (0);
1362 		}
1363 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1364 	}
1365 
1366 	return (ENOENT);
1367 }
1368 
1369 /*
1370  * Support for MSI-X message interrupts.
1371  */
1372 static void
1373 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1374     uint32_t data)
1375 {
1376 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1377 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1378 	uint32_t offset;
1379 
1380 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1381 	offset = msix->msix_table_offset + index * 16;
1382 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1383 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1384 	bus_write_4(msix->msix_table_res, offset + 8, data);
1385 
1386 	/* Enable MSI -> HT mapping. */
1387 	pci_ht_map_msi(dev, address);
1388 }
1389 
1390 static void
1391 pci_mask_msix_vector(device_t dev, u_int index)
1392 {
1393 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1394 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1395 	uint32_t offset, val;
1396 
1397 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1398 	offset = msix->msix_table_offset + index * 16 + 12;
1399 	val = bus_read_4(msix->msix_table_res, offset);
1400 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1401 		val |= PCIM_MSIX_VCTRL_MASK;
1402 		bus_write_4(msix->msix_table_res, offset, val);
1403 	}
1404 }
1405 
1406 static void
1407 pci_unmask_msix_vector(device_t dev, u_int index)
1408 {
1409 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1410 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1411 	uint32_t offset, val;
1412 
1413 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1414 	offset = msix->msix_table_offset + index * 16 + 12;
1415 	val = bus_read_4(msix->msix_table_res, offset);
1416 	if (val & PCIM_MSIX_VCTRL_MASK) {
1417 		val &= ~PCIM_MSIX_VCTRL_MASK;
1418 		bus_write_4(msix->msix_table_res, offset, val);
1419 	}
1420 }
1421 
1422 int
1423 pci_pending_msix_vector(device_t dev, u_int index)
1424 {
1425 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1426 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1427 	uint32_t offset, bit;
1428 
1429 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1430 	    ("MSI-X is not setup yet\n"));
1431 
1432 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1433 	offset = msix->msix_pba_offset + (index / 32) * 4;
1434 	bit = 1 << index % 32;
1435 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1436 }
1437 
1438 /*
1439  * Restore MSI-X registers and table during resume.  If MSI-X is
1440  * enabled then walk the virtual table to restore the actual MSI-X
1441  * table.
1442  */
1443 static void
1444 pci_resume_msix(device_t dev)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 
1449 	if (msix->msix_table_res != NULL) {
1450 		const struct msix_vector *mv;
1451 
1452 		pci_mask_msix_allvectors(dev);
1453 
1454 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1455 			u_int vector;
1456 
1457 			if (mv->mv_address == 0)
1458 				continue;
1459 
1460 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1461 			pci_setup_msix_vector(dev, vector,
1462 			    mv->mv_address, mv->mv_data);
1463 			pci_unmask_msix_vector(dev, vector);
1464 		}
1465 	}
1466 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1467 	    msix->msix_ctrl, 2);
1468 }
1469 
1470 /*
1471  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1472  *
1473  * After this function returns, the MSI-X's rid will be saved in rid0.
1474  */
1475 int
1476 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1477     int *rid0, int cpuid)
1478 {
1479 	struct pci_devinfo *dinfo = device_get_ivars(child);
1480 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1481 	struct msix_vector *mv;
1482 	struct resource_list_entry *rle;
1483 	int error, irq, rid;
1484 
1485 	KASSERT(msix->msix_table_res != NULL &&
1486 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet\n"));
1487 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d\n", cpuid));
1488 	KASSERT(vector < msix->msix_msgnum,
1489 	    ("invalid MSI-X vector %u, total %d\n", vector, msix->msix_msgnum));
1490 
1491 	if (bootverbose) {
1492 		device_printf(child,
1493 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1494 		    vector, msix->msix_msgnum);
1495 	}
1496 
1497 	/* Set rid according to vector number */
1498 	rid = PCI_MSIX_VEC2RID(vector);
1499 
1500 	/* Vector has already been allocated */
1501 	mv = pci_find_msix_vector(child, rid);
1502 	if (mv != NULL)
1503 		return EBUSY;
1504 
1505 	/* Allocate a message. */
1506 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1507 	if (error)
1508 		return error;
1509 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1510 	    irq, irq, 1, cpuid);
1511 
1512 	if (bootverbose) {
1513 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1514 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1515 		    rle->start, cpuid);
1516 	}
1517 
1518 	/* Update counts of alloc'd messages. */
1519 	msix->msix_alloc++;
1520 
1521 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1522 	mv->mv_rid = rid;
1523 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1524 
1525 	*rid0 = rid;
1526 	return 0;
1527 }
1528 
1529 int
1530 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 	struct resource_list_entry *rle;
1535 	struct msix_vector *mv;
1536 	int irq, cpuid;
1537 
1538 	KASSERT(msix->msix_table_res != NULL &&
1539 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet\n"));
1540 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated\n"));
1541 	KASSERT(rid > 0, ("invalid rid %d\n", rid));
1542 
1543 	mv = pci_find_msix_vector(child, rid);
1544 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated\n", rid));
1545 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown\n", rid));
1546 
1547 	/* Make sure resource is no longer allocated. */
1548 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1549 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d\n", rid));
1550 	KASSERT(rle->res == NULL,
1551 	    ("MSI-X resource is still allocated, rid %d\n", rid));
1552 
1553 	irq = rle->start;
1554 	cpuid = rle->cpuid;
1555 
1556 	/* Free the resource list entries. */
1557 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1558 
1559 	/* Release the IRQ. */
1560 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1561 
1562 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1563 	kfree(mv, M_DEVBUF);
1564 
1565 	msix->msix_alloc--;
1566 	return (0);
1567 }
1568 
1569 /*
1570  * Return the max supported MSI-X messages this device supports.
1571  * Basically, assuming the MD code can alloc messages, this function
1572  * should return the maximum value that pci_alloc_msix() can return.
1573  * Thus, it is subject to the tunables, etc.
1574  */
1575 int
1576 pci_msix_count_method(device_t dev, device_t child)
1577 {
1578 	struct pci_devinfo *dinfo = device_get_ivars(child);
1579 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1580 
1581 	if (pci_do_msix && msix->msix_location != 0)
1582 		return (msix->msix_msgnum);
1583 	return (0);
1584 }
1585 
1586 int
1587 pci_setup_msix(device_t dev)
1588 {
1589 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1590 	pcicfgregs *cfg = &dinfo->cfg;
1591 	struct resource_list_entry *rle;
1592 	struct resource *table_res, *pba_res;
1593 
1594 	KASSERT(cfg->msix.msix_table_res == NULL &&
1595 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet\n"));
1596 
1597 	/* If rid 0 is allocated, then fail. */
1598 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1599 	if (rle != NULL && rle->res != NULL)
1600 		return (ENXIO);
1601 
1602 	/* Already have allocated MSIs? */
1603 	if (cfg->msi.msi_alloc != 0)
1604 		return (ENXIO);
1605 
1606 	/* If MSI is blacklisted for this system, fail. */
1607 	if (pci_msi_blacklisted())
1608 		return (ENXIO);
1609 
1610 	/* MSI-X capability present? */
1611 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1612 	    !pci_do_msix)
1613 		return (ENODEV);
1614 
1615 	KASSERT(cfg->msix.msix_alloc == 0 &&
1616 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1617 	    ("MSI-X vector has been allocated\n"));
1618 
1619 	/* Make sure the appropriate BARs are mapped. */
1620 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1621 	    cfg->msix.msix_table_bar);
1622 	if (rle == NULL || rle->res == NULL ||
1623 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1624 		return (ENXIO);
1625 	table_res = rle->res;
1626 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1627 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1628 		    cfg->msix.msix_pba_bar);
1629 		if (rle == NULL || rle->res == NULL ||
1630 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1631 			return (ENXIO);
1632 	}
1633 	pba_res = rle->res;
1634 
1635 	cfg->msix.msix_table_res = table_res;
1636 	cfg->msix.msix_pba_res = pba_res;
1637 
1638 	pci_mask_msix_allvectors(dev);
1639 
1640 	return 0;
1641 }
1642 
1643 void
1644 pci_teardown_msix(device_t dev)
1645 {
1646 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1647 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1648 
1649 	KASSERT(msix->msix_table_res != NULL &&
1650 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet\n"));
1651 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1652 	    ("MSI-X vector is still allocated\n"));
1653 
1654 	pci_mask_msix_allvectors(dev);
1655 
1656 	msix->msix_table_res = NULL;
1657 	msix->msix_pba_res = NULL;
1658 }
1659 
1660 void
1661 pci_enable_msix(device_t dev)
1662 {
1663 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1664 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1665 
1666 	KASSERT(msix->msix_table_res != NULL &&
1667 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet\n"));
1668 
1669 	/* Update control register to enable MSI-X. */
1670 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1671 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1672 	    msix->msix_ctrl, 2);
1673 }
1674 
1675 void
1676 pci_disable_msix(device_t dev)
1677 {
1678 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1679 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1680 
1681 	KASSERT(msix->msix_table_res != NULL &&
1682 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet\n"));
1683 
1684 	/* Disable MSI -> HT mapping. */
1685 	pci_ht_map_msi(dev, 0);
1686 
1687 	/* Update control register to disable MSI-X. */
1688 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1689 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1690 	    msix->msix_ctrl, 2);
1691 }
1692 
1693 static void
1694 pci_mask_msix_allvectors(device_t dev)
1695 {
1696 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1697 	u_int i;
1698 
1699 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1700 		pci_mask_msix_vector(dev, i);
1701 }
1702 
1703 static struct msix_vector *
1704 pci_find_msix_vector(device_t dev, int rid)
1705 {
1706 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1707 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1708 	struct msix_vector *mv;
1709 
1710 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1711 		if (mv->mv_rid == rid)
1712 			return mv;
1713 	}
1714 	return NULL;
1715 }
1716 
1717 /*
1718  * HyperTransport MSI mapping control
1719  */
1720 void
1721 pci_ht_map_msi(device_t dev, uint64_t addr)
1722 {
1723 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1724 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1725 
1726 	if (!ht->ht_msimap)
1727 		return;
1728 
1729 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1730 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1731 		/* Enable MSI -> HT mapping. */
1732 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1733 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1734 		    ht->ht_msictrl, 2);
1735 	}
1736 
1737 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1738 		/* Disable MSI -> HT mapping. */
1739 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1740 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1741 		    ht->ht_msictrl, 2);
1742 	}
1743 }
1744 
1745 /*
1746  * Support for MSI message signalled interrupts.
1747  */
1748 void
1749 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1750 {
1751 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1752 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1753 
1754 	/* Write data and address values. */
1755 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1756 	    address & 0xffffffff, 4);
1757 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1758 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1759 		    address >> 32, 4);
1760 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1761 		    data, 2);
1762 	} else
1763 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1764 		    2);
1765 
1766 	/* Enable MSI in the control register. */
1767 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1768 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1769 	    2);
1770 
1771 	/* Enable MSI -> HT mapping. */
1772 	pci_ht_map_msi(dev, address);
1773 }
1774 
1775 void
1776 pci_disable_msi(device_t dev)
1777 {
1778 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1779 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1780 
1781 	/* Disable MSI -> HT mapping. */
1782 	pci_ht_map_msi(dev, 0);
1783 
1784 	/* Disable MSI in the control register. */
1785 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1786 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1787 	    2);
1788 }
1789 
1790 /*
1791  * Restore MSI registers during resume.  If MSI is enabled then
1792  * restore the data and address registers in addition to the control
1793  * register.
1794  */
1795 static void
1796 pci_resume_msi(device_t dev)
1797 {
1798 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1799 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1800 	uint64_t address;
1801 	uint16_t data;
1802 
1803 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1804 		address = msi->msi_addr;
1805 		data = msi->msi_data;
1806 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1807 		    address & 0xffffffff, 4);
1808 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1809 			pci_write_config(dev, msi->msi_location +
1810 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1811 			pci_write_config(dev, msi->msi_location +
1812 			    PCIR_MSI_DATA_64BIT, data, 2);
1813 		} else
1814 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1815 			    data, 2);
1816 	}
1817 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1818 	    2);
1819 }
1820 
1821 /*
1822  * Returns true if the specified device is blacklisted because MSI
1823  * doesn't work.
1824  */
1825 int
1826 pci_msi_device_blacklisted(device_t dev)
1827 {
1828 	struct pci_quirk *q;
1829 
1830 	if (!pci_honor_msi_blacklist)
1831 		return (0);
1832 
1833 	for (q = &pci_quirks[0]; q->devid; q++) {
1834 		if (q->devid == pci_get_devid(dev) &&
1835 		    q->type == PCI_QUIRK_DISABLE_MSI)
1836 			return (1);
1837 	}
1838 	return (0);
1839 }
1840 
1841 /*
1842  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1843  * we just check for blacklisted chipsets as represented by the
1844  * host-PCI bridge at device 0:0:0.  In the future, it may become
1845  * necessary to check other system attributes, such as the kenv values
1846  * that give the motherboard manufacturer and model number.
1847  */
1848 static int
1849 pci_msi_blacklisted(void)
1850 {
1851 	device_t dev;
1852 
1853 	if (!pci_honor_msi_blacklist)
1854 		return (0);
1855 
1856 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1857 	if (!(pcie_chipset || pcix_chipset))
1858 		return (1);
1859 
1860 	dev = pci_find_bsf(0, 0, 0);
1861 	if (dev != NULL)
1862 		return (pci_msi_device_blacklisted(dev));
1863 	return (0);
1864 }
1865 
1866 /*
1867  * Attempt to allocate count MSI messages on start_cpuid.
1868  *
1869  * If start_cpuid < 0, then the MSI messages' target CPU will be
1870  * selected automaticly.
1871  *
1872  * If the caller explicitly specified the MSI messages' target CPU,
1873  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1874  * messages on the specified CPU, if the allocation fails due to MD
1875  * does not have enough vectors (EMSGSIZE), then we will try next
1876  * available CPU, until the allocation fails on all CPUs.
1877  *
1878  * EMSGSIZE will be returned, if all available CPUs does not have
1879  * enough vectors for the requested amount of MSI messages.  Caller
1880  * should either reduce the amount of MSI messages to be requested,
1881  * or simply giving up using MSI.
1882  *
1883  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1884  * returned in 'rid' array, if the allocation succeeds.
1885  */
1886 int
1887 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1888     int start_cpuid)
1889 {
1890 	struct pci_devinfo *dinfo = device_get_ivars(child);
1891 	pcicfgregs *cfg = &dinfo->cfg;
1892 	struct resource_list_entry *rle;
1893 	int error, i, irqs[32], cpuid = 0;
1894 	uint16_t ctrl;
1895 
1896 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1897 	    ("invalid MSI count %d\n", count));
1898 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d\n", start_cpuid));
1899 
1900 	/* If rid 0 is allocated, then fail. */
1901 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1902 	if (rle != NULL && rle->res != NULL)
1903 		return (ENXIO);
1904 
1905 	/* Already have allocated messages? */
1906 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1907 		return (ENXIO);
1908 
1909 	/* If MSI is blacklisted for this system, fail. */
1910 	if (pci_msi_blacklisted())
1911 		return (ENXIO);
1912 
1913 	/* MSI capability present? */
1914 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1915 	    !pci_do_msi)
1916 		return (ENODEV);
1917 
1918 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d\n",
1919 	    count, cfg->msi.msi_msgnum));
1920 
1921 	if (bootverbose) {
1922 		device_printf(child,
1923 		    "attempting to allocate %d MSI vectors (%d supported)\n",
1924 		    count, cfg->msi.msi_msgnum);
1925 	}
1926 
1927 	if (start_cpuid < 0)
1928 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1929 
1930 	error = EINVAL;
1931 	for (i = 0; i < ncpus; ++i) {
1932 		cpuid = (start_cpuid + i) % ncpus;
1933 
1934 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1935 		    cfg->msi.msi_msgnum, irqs, cpuid);
1936 		if (error == 0)
1937 			break;
1938 		else if (error != EMSGSIZE)
1939 			return error;
1940 	}
1941 	if (error)
1942 		return error;
1943 
1944 	/*
1945 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1946 	 * the irqs[] array, so add new resources starting at rid 1.
1947 	 */
1948 	for (i = 0; i < count; i++) {
1949 		rid[i] = i + 1;
1950 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1951 		    irqs[i], irqs[i], 1, cpuid);
1952 	}
1953 
1954 	if (bootverbose) {
1955 		if (count == 1) {
1956 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1957 			    irqs[0], cpuid);
1958 		} else {
1959 			int run;
1960 
1961 			/*
1962 			 * Be fancy and try to print contiguous runs
1963 			 * of IRQ values as ranges.  'run' is true if
1964 			 * we are in a range.
1965 			 */
1966 			device_printf(child, "using IRQs %d", irqs[0]);
1967 			run = 0;
1968 			for (i = 1; i < count; i++) {
1969 
1970 				/* Still in a run? */
1971 				if (irqs[i] == irqs[i - 1] + 1) {
1972 					run = 1;
1973 					continue;
1974 				}
1975 
1976 				/* Finish previous range. */
1977 				if (run) {
1978 					kprintf("-%d", irqs[i - 1]);
1979 					run = 0;
1980 				}
1981 
1982 				/* Start new range. */
1983 				kprintf(",%d", irqs[i]);
1984 			}
1985 
1986 			/* Unfinished range? */
1987 			if (run)
1988 				kprintf("-%d", irqs[count - 1]);
1989 			kprintf(" for MSI on cpu%d\n", cpuid);
1990 		}
1991 	}
1992 
1993 	/* Update control register with count. */
1994 	ctrl = cfg->msi.msi_ctrl;
1995 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1996 	ctrl |= (ffs(count) - 1) << 4;
1997 	cfg->msi.msi_ctrl = ctrl;
1998 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1999 
2000 	/* Update counts of alloc'd messages. */
2001 	cfg->msi.msi_alloc = count;
2002 	cfg->msi.msi_handlers = 0;
2003 	return (0);
2004 }
2005 
2006 /* Release the MSI messages associated with this device. */
2007 int
2008 pci_release_msi_method(device_t dev, device_t child)
2009 {
2010 	struct pci_devinfo *dinfo = device_get_ivars(child);
2011 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2012 	struct resource_list_entry *rle;
2013 	int i, irqs[32], cpuid = -1;
2014 
2015 	/* Do we have any messages to release? */
2016 	if (msi->msi_alloc == 0)
2017 		return (ENODEV);
2018 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2019 
2020 	/* Make sure none of the resources are allocated. */
2021 	if (msi->msi_handlers > 0)
2022 		return (EBUSY);
2023 	for (i = 0; i < msi->msi_alloc; i++) {
2024 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2025 		KASSERT(rle != NULL, ("missing MSI resource"));
2026 		if (rle->res != NULL)
2027 			return (EBUSY);
2028 		if (i == 0) {
2029 			cpuid = rle->cpuid;
2030 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2031 			    ("invalid MSI target cpuid %d\n", cpuid));
2032 		} else {
2033 			KASSERT(rle->cpuid == cpuid,
2034 			    ("MSI targets different cpus, "
2035 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2036 		}
2037 		irqs[i] = rle->start;
2038 	}
2039 
2040 	/* Update control register with 0 count. */
2041 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2042 	    ("%s: MSI still enabled", __func__));
2043 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2044 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2045 	    msi->msi_ctrl, 2);
2046 
2047 	/* Release the messages. */
2048 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2049 	    cpuid);
2050 	for (i = 0; i < msi->msi_alloc; i++)
2051 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2052 
2053 	/* Update alloc count. */
2054 	msi->msi_alloc = 0;
2055 	msi->msi_addr = 0;
2056 	msi->msi_data = 0;
2057 	return (0);
2058 }
2059 
2060 /*
2061  * Return the max supported MSI messages this device supports.
2062  * Basically, assuming the MD code can alloc messages, this function
2063  * should return the maximum value that pci_alloc_msi() can return.
2064  * Thus, it is subject to the tunables, etc.
2065  */
2066 int
2067 pci_msi_count_method(device_t dev, device_t child)
2068 {
2069 	struct pci_devinfo *dinfo = device_get_ivars(child);
2070 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2071 
2072 	if (pci_do_msi && msi->msi_location != 0)
2073 		return (msi->msi_msgnum);
2074 	return (0);
2075 }
2076 
2077 /* kfree pcicfgregs structure and all depending data structures */
2078 
2079 int
2080 pci_freecfg(struct pci_devinfo *dinfo)
2081 {
2082 	struct devlist *devlist_head;
2083 	int i;
2084 
2085 	devlist_head = &pci_devq;
2086 
2087 	if (dinfo->cfg.vpd.vpd_reg) {
2088 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2089 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2090 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2091 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2092 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2093 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2094 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2095 	}
2096 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2097 	kfree(dinfo, M_DEVBUF);
2098 
2099 	/* increment the generation count */
2100 	pci_generation++;
2101 
2102 	/* we're losing one device */
2103 	pci_numdevs--;
2104 	return (0);
2105 }
2106 
2107 /*
2108  * PCI power manangement
2109  */
2110 int
2111 pci_set_powerstate_method(device_t dev, device_t child, int state)
2112 {
2113 	struct pci_devinfo *dinfo = device_get_ivars(child);
2114 	pcicfgregs *cfg = &dinfo->cfg;
2115 	uint16_t status;
2116 	int result, oldstate, highest, delay;
2117 
2118 	if (cfg->pp.pp_cap == 0)
2119 		return (EOPNOTSUPP);
2120 
2121 	/*
2122 	 * Optimize a no state change request away.  While it would be OK to
2123 	 * write to the hardware in theory, some devices have shown odd
2124 	 * behavior when going from D3 -> D3.
2125 	 */
2126 	oldstate = pci_get_powerstate(child);
2127 	if (oldstate == state)
2128 		return (0);
2129 
2130 	/*
2131 	 * The PCI power management specification states that after a state
2132 	 * transition between PCI power states, system software must
2133 	 * guarantee a minimal delay before the function accesses the device.
2134 	 * Compute the worst case delay that we need to guarantee before we
2135 	 * access the device.  Many devices will be responsive much more
2136 	 * quickly than this delay, but there are some that don't respond
2137 	 * instantly to state changes.  Transitions to/from D3 state require
2138 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2139 	 * is done below with DELAY rather than a sleeper function because
2140 	 * this function can be called from contexts where we cannot sleep.
2141 	 */
2142 	highest = (oldstate > state) ? oldstate : state;
2143 	if (highest == PCI_POWERSTATE_D3)
2144 	    delay = 10000;
2145 	else if (highest == PCI_POWERSTATE_D2)
2146 	    delay = 200;
2147 	else
2148 	    delay = 0;
2149 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2150 	    & ~PCIM_PSTAT_DMASK;
2151 	result = 0;
2152 	switch (state) {
2153 	case PCI_POWERSTATE_D0:
2154 		status |= PCIM_PSTAT_D0;
2155 		break;
2156 	case PCI_POWERSTATE_D1:
2157 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2158 			return (EOPNOTSUPP);
2159 		status |= PCIM_PSTAT_D1;
2160 		break;
2161 	case PCI_POWERSTATE_D2:
2162 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2163 			return (EOPNOTSUPP);
2164 		status |= PCIM_PSTAT_D2;
2165 		break;
2166 	case PCI_POWERSTATE_D3:
2167 		status |= PCIM_PSTAT_D3;
2168 		break;
2169 	default:
2170 		return (EINVAL);
2171 	}
2172 
2173 	if (bootverbose)
2174 		kprintf(
2175 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2176 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2177 		    dinfo->cfg.func, oldstate, state);
2178 
2179 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2180 	if (delay)
2181 		DELAY(delay);
2182 	return (0);
2183 }
2184 
2185 int
2186 pci_get_powerstate_method(device_t dev, device_t child)
2187 {
2188 	struct pci_devinfo *dinfo = device_get_ivars(child);
2189 	pcicfgregs *cfg = &dinfo->cfg;
2190 	uint16_t status;
2191 	int result;
2192 
2193 	if (cfg->pp.pp_cap != 0) {
2194 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2195 		switch (status & PCIM_PSTAT_DMASK) {
2196 		case PCIM_PSTAT_D0:
2197 			result = PCI_POWERSTATE_D0;
2198 			break;
2199 		case PCIM_PSTAT_D1:
2200 			result = PCI_POWERSTATE_D1;
2201 			break;
2202 		case PCIM_PSTAT_D2:
2203 			result = PCI_POWERSTATE_D2;
2204 			break;
2205 		case PCIM_PSTAT_D3:
2206 			result = PCI_POWERSTATE_D3;
2207 			break;
2208 		default:
2209 			result = PCI_POWERSTATE_UNKNOWN;
2210 			break;
2211 		}
2212 	} else {
2213 		/* No support, device is always at D0 */
2214 		result = PCI_POWERSTATE_D0;
2215 	}
2216 	return (result);
2217 }
2218 
2219 /*
2220  * Some convenience functions for PCI device drivers.
2221  */
2222 
2223 static __inline void
2224 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2225 {
2226 	uint16_t	command;
2227 
2228 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2229 	command |= bit;
2230 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2231 }
2232 
2233 static __inline void
2234 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2235 {
2236 	uint16_t	command;
2237 
2238 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2239 	command &= ~bit;
2240 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2241 }
2242 
2243 int
2244 pci_enable_busmaster_method(device_t dev, device_t child)
2245 {
2246 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2247 	return (0);
2248 }
2249 
2250 int
2251 pci_disable_busmaster_method(device_t dev, device_t child)
2252 {
2253 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2254 	return (0);
2255 }
2256 
2257 int
2258 pci_enable_io_method(device_t dev, device_t child, int space)
2259 {
2260 	uint16_t command;
2261 	uint16_t bit;
2262 	char *error;
2263 
2264 	bit = 0;
2265 	error = NULL;
2266 
2267 	switch(space) {
2268 	case SYS_RES_IOPORT:
2269 		bit = PCIM_CMD_PORTEN;
2270 		error = "port";
2271 		break;
2272 	case SYS_RES_MEMORY:
2273 		bit = PCIM_CMD_MEMEN;
2274 		error = "memory";
2275 		break;
2276 	default:
2277 		return (EINVAL);
2278 	}
2279 	pci_set_command_bit(dev, child, bit);
2280 	/* Some devices seem to need a brief stall here, what do to? */
2281 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2282 	if (command & bit)
2283 		return (0);
2284 	device_printf(child, "failed to enable %s mapping!\n", error);
2285 	return (ENXIO);
2286 }
2287 
2288 int
2289 pci_disable_io_method(device_t dev, device_t child, int space)
2290 {
2291 	uint16_t command;
2292 	uint16_t bit;
2293 	char *error;
2294 
2295 	bit = 0;
2296 	error = NULL;
2297 
2298 	switch(space) {
2299 	case SYS_RES_IOPORT:
2300 		bit = PCIM_CMD_PORTEN;
2301 		error = "port";
2302 		break;
2303 	case SYS_RES_MEMORY:
2304 		bit = PCIM_CMD_MEMEN;
2305 		error = "memory";
2306 		break;
2307 	default:
2308 		return (EINVAL);
2309 	}
2310 	pci_clear_command_bit(dev, child, bit);
2311 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2312 	if (command & bit) {
2313 		device_printf(child, "failed to disable %s mapping!\n", error);
2314 		return (ENXIO);
2315 	}
2316 	return (0);
2317 }
2318 
2319 /*
2320  * New style pci driver.  Parent device is either a pci-host-bridge or a
2321  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2322  */
2323 
2324 void
2325 pci_print_verbose(struct pci_devinfo *dinfo)
2326 {
2327 
2328 	if (bootverbose) {
2329 		pcicfgregs *cfg = &dinfo->cfg;
2330 
2331 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2332 		    cfg->vendor, cfg->device, cfg->revid);
2333 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2334 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2335 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2336 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2337 		    cfg->mfdev);
2338 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2339 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2340 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2341 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2342 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2343 		if (cfg->intpin > 0)
2344 			kprintf("\tintpin=%c, irq=%d\n",
2345 			    cfg->intpin +'a' -1, cfg->intline);
2346 		if (cfg->pp.pp_cap) {
2347 			uint16_t status;
2348 
2349 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2350 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2351 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2352 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2353 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2354 			    status & PCIM_PSTAT_DMASK);
2355 		}
2356 		if (cfg->msi.msi_location) {
2357 			int ctrl;
2358 
2359 			ctrl = cfg->msi.msi_ctrl;
2360 			kprintf("\tMSI supports %d message%s%s%s\n",
2361 			    cfg->msi.msi_msgnum,
2362 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2363 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2364 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2365 		}
2366 		if (cfg->msix.msix_location) {
2367 			kprintf("\tMSI-X supports %d message%s ",
2368 			    cfg->msix.msix_msgnum,
2369 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2370 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2371 				kprintf("in map 0x%x\n",
2372 				    cfg->msix.msix_table_bar);
2373 			else
2374 				kprintf("in maps 0x%x and 0x%x\n",
2375 				    cfg->msix.msix_table_bar,
2376 				    cfg->msix.msix_pba_bar);
2377 		}
2378 		pci_print_verbose_expr(cfg);
2379 	}
2380 }
2381 
2382 static void
2383 pci_print_verbose_expr(const pcicfgregs *cfg)
2384 {
2385 	const struct pcicfg_expr *expr = &cfg->expr;
2386 	const char *port_name;
2387 	uint16_t port_type;
2388 
2389 	if (!bootverbose)
2390 		return;
2391 
2392 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2393 		return;
2394 
2395 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2396 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2397 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2398 		goto back;
2399 
2400 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2401 
2402 	switch (port_type) {
2403 	case PCIE_END_POINT:
2404 		port_name = "DEVICE";
2405 		break;
2406 	case PCIE_LEG_END_POINT:
2407 		port_name = "LEGDEV";
2408 		break;
2409 	case PCIE_ROOT_PORT:
2410 		port_name = "ROOT";
2411 		break;
2412 	case PCIE_UP_STREAM_PORT:
2413 		port_name = "UPSTREAM";
2414 		break;
2415 	case PCIE_DOWN_STREAM_PORT:
2416 		port_name = "DOWNSTRM";
2417 		break;
2418 	case PCIE_PCIE2PCI_BRIDGE:
2419 		port_name = "PCIE2PCI";
2420 		break;
2421 	case PCIE_PCI2PCIE_BRIDGE:
2422 		port_name = "PCI2PCIE";
2423 		break;
2424 	default:
2425 		port_name = NULL;
2426 		break;
2427 	}
2428 	if ((port_type == PCIE_ROOT_PORT ||
2429 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2430 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2431 		port_name = NULL;
2432 	if (port_name != NULL)
2433 		kprintf("[%s]", port_name);
2434 
2435 	if (pcie_slotimpl(cfg)) {
2436 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2437 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2438 			kprintf("[HOTPLUG]");
2439 	}
2440 back:
2441 	kprintf("\n");
2442 }
2443 
2444 static int
2445 pci_porten(device_t pcib, int b, int s, int f)
2446 {
2447 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2448 		& PCIM_CMD_PORTEN) != 0;
2449 }
2450 
2451 static int
2452 pci_memen(device_t pcib, int b, int s, int f)
2453 {
2454 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2455 		& PCIM_CMD_MEMEN) != 0;
2456 }
2457 
2458 /*
2459  * Add a resource based on a pci map register. Return 1 if the map
2460  * register is a 32bit map register or 2 if it is a 64bit register.
2461  */
2462 static int
2463 pci_add_map(device_t pcib, device_t bus, device_t dev,
2464     int b, int s, int f, int reg, struct resource_list *rl, int force,
2465     int prefetch)
2466 {
2467 	uint32_t map;
2468 	pci_addr_t base;
2469 	pci_addr_t start, end, count;
2470 	uint8_t ln2size;
2471 	uint8_t ln2range;
2472 	uint32_t testval;
2473 	uint16_t cmd;
2474 	int type;
2475 	int barlen;
2476 	struct resource *res;
2477 
2478 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2479 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2480 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2481 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2482 
2483 	if (PCI_BAR_MEM(map)) {
2484 		type = SYS_RES_MEMORY;
2485 		if (map & PCIM_BAR_MEM_PREFETCH)
2486 			prefetch = 1;
2487 	} else
2488 		type = SYS_RES_IOPORT;
2489 	ln2size = pci_mapsize(testval);
2490 	ln2range = pci_maprange(testval);
2491 	base = pci_mapbase(map);
2492 	barlen = ln2range == 64 ? 2 : 1;
2493 
2494 	/*
2495 	 * For I/O registers, if bottom bit is set, and the next bit up
2496 	 * isn't clear, we know we have a BAR that doesn't conform to the
2497 	 * spec, so ignore it.  Also, sanity check the size of the data
2498 	 * areas to the type of memory involved.  Memory must be at least
2499 	 * 16 bytes in size, while I/O ranges must be at least 4.
2500 	 */
2501 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2502 		return (barlen);
2503 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2504 	    (type == SYS_RES_IOPORT && ln2size < 2))
2505 		return (barlen);
2506 
2507 	if (ln2range == 64)
2508 		/* Read the other half of a 64bit map register */
2509 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2510 	if (bootverbose) {
2511 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2512 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2513 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2514 			kprintf(", port disabled\n");
2515 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2516 			kprintf(", memory disabled\n");
2517 		else
2518 			kprintf(", enabled\n");
2519 	}
2520 
2521 	/*
2522 	 * If base is 0, then we have problems.  It is best to ignore
2523 	 * such entries for the moment.  These will be allocated later if
2524 	 * the driver specifically requests them.  However, some
2525 	 * removable busses look better when all resources are allocated,
2526 	 * so allow '0' to be overriden.
2527 	 *
2528 	 * Similarly treat maps whose values is the same as the test value
2529 	 * read back.  These maps have had all f's written to them by the
2530 	 * BIOS in an attempt to disable the resources.
2531 	 */
2532 	if (!force && (base == 0 || map == testval))
2533 		return (barlen);
2534 	if ((u_long)base != base) {
2535 		device_printf(bus,
2536 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2537 		    pci_get_domain(dev), b, s, f, reg);
2538 		return (barlen);
2539 	}
2540 
2541 	/*
2542 	 * This code theoretically does the right thing, but has
2543 	 * undesirable side effects in some cases where peripherals
2544 	 * respond oddly to having these bits enabled.  Let the user
2545 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2546 	 * default).
2547 	 */
2548 	if (pci_enable_io_modes) {
2549 		/* Turn on resources that have been left off by a lazy BIOS */
2550 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2551 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2552 			cmd |= PCIM_CMD_PORTEN;
2553 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2554 		}
2555 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2556 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2557 			cmd |= PCIM_CMD_MEMEN;
2558 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2559 		}
2560 	} else {
2561 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2562 			return (barlen);
2563 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2564 			return (barlen);
2565 	}
2566 
2567 	count = 1 << ln2size;
2568 	if (base == 0 || base == pci_mapbase(testval)) {
2569 		start = 0;	/* Let the parent decide. */
2570 		end = ~0ULL;
2571 	} else {
2572 		start = base;
2573 		end = base + (1 << ln2size) - 1;
2574 	}
2575 	resource_list_add(rl, type, reg, start, end, count, -1);
2576 
2577 	/*
2578 	 * Try to allocate the resource for this BAR from our parent
2579 	 * so that this resource range is already reserved.  The
2580 	 * driver for this device will later inherit this resource in
2581 	 * pci_alloc_resource().
2582 	 */
2583 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2584 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2585 	if (res == NULL) {
2586 		/*
2587 		 * If the allocation fails, delete the resource list
2588 		 * entry to force pci_alloc_resource() to allocate
2589 		 * resources from the parent.
2590 		 */
2591 		resource_list_delete(rl, type, reg);
2592 #ifdef PCI_BAR_CLEAR
2593 		/* Clear the BAR */
2594 		start = 0;
2595 #else	/* !PCI_BAR_CLEAR */
2596 		/*
2597 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2598 		 * PCI function, clearing the BAR causes HPET timer
2599 		 * stop ticking.
2600 		 */
2601 		if (bootverbose) {
2602 			kprintf("pci:%d:%d:%d: resource reservation failed "
2603 				"%#jx - %#jx\n", b, s, f,
2604 				(intmax_t)start, (intmax_t)end);
2605 		}
2606 		return (barlen);
2607 #endif	/* PCI_BAR_CLEAR */
2608 	} else {
2609 		start = rman_get_start(res);
2610 	}
2611 	pci_write_config(dev, reg, start, 4);
2612 	if (ln2range == 64)
2613 		pci_write_config(dev, reg + 4, start >> 32, 4);
2614 	return (barlen);
2615 }
2616 
2617 /*
2618  * For ATA devices we need to decide early what addressing mode to use.
2619  * Legacy demands that the primary and secondary ATA ports sits on the
2620  * same addresses that old ISA hardware did. This dictates that we use
2621  * those addresses and ignore the BAR's if we cannot set PCI native
2622  * addressing mode.
2623  */
2624 static void
2625 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2626     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2627 {
2628 	int rid, type, progif;
2629 #if 0
2630 	/* if this device supports PCI native addressing use it */
2631 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2632 	if ((progif & 0x8a) == 0x8a) {
2633 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2634 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2635 			kprintf("Trying ATA native PCI addressing mode\n");
2636 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2637 		}
2638 	}
2639 #endif
2640 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2641 	type = SYS_RES_IOPORT;
2642 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2643 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2644 		    prefetchmask & (1 << 0));
2645 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2646 		    prefetchmask & (1 << 1));
2647 	} else {
2648 		rid = PCIR_BAR(0);
2649 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2650 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2651 		    0, -1);
2652 		rid = PCIR_BAR(1);
2653 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2654 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2655 		    0, -1);
2656 	}
2657 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2658 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2659 		    prefetchmask & (1 << 2));
2660 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2661 		    prefetchmask & (1 << 3));
2662 	} else {
2663 		rid = PCIR_BAR(2);
2664 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2665 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2666 		    0, -1);
2667 		rid = PCIR_BAR(3);
2668 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2669 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2670 		    0, -1);
2671 	}
2672 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2673 	    prefetchmask & (1 << 4));
2674 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2675 	    prefetchmask & (1 << 5));
2676 }
2677 
2678 static void
2679 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2680 {
2681 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2682 	pcicfgregs *cfg = &dinfo->cfg;
2683 	char tunable_name[64];
2684 	int irq;
2685 
2686 	/* Has to have an intpin to have an interrupt. */
2687 	if (cfg->intpin == 0)
2688 		return;
2689 
2690 	/* Let the user override the IRQ with a tunable. */
2691 	irq = PCI_INVALID_IRQ;
2692 	ksnprintf(tunable_name, sizeof(tunable_name),
2693 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2694 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2695 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2696 		if (irq >= 255 || irq <= 0) {
2697 			irq = PCI_INVALID_IRQ;
2698 		} else {
2699 			BUS_CONFIG_INTR(bus, dev, irq,
2700 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2701 		}
2702 	}
2703 
2704 	/*
2705 	 * If we didn't get an IRQ via the tunable, then we either use the
2706 	 * IRQ value in the intline register or we ask the bus to route an
2707 	 * interrupt for us.  If force_route is true, then we only use the
2708 	 * value in the intline register if the bus was unable to assign an
2709 	 * IRQ.
2710 	 */
2711 	if (!PCI_INTERRUPT_VALID(irq)) {
2712 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2713 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2714 		if (!PCI_INTERRUPT_VALID(irq))
2715 			irq = cfg->intline;
2716 	}
2717 
2718 	/* If after all that we don't have an IRQ, just bail. */
2719 	if (!PCI_INTERRUPT_VALID(irq))
2720 		return;
2721 
2722 	/* Update the config register if it changed. */
2723 	if (irq != cfg->intline) {
2724 		cfg->intline = irq;
2725 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2726 	}
2727 
2728 	/* Add this IRQ as rid 0 interrupt resource. */
2729 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2730 	    machintr_legacy_intr_cpuid(irq));
2731 }
2732 
2733 void
2734 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2735 {
2736 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2737 	pcicfgregs *cfg = &dinfo->cfg;
2738 	struct resource_list *rl = &dinfo->resources;
2739 	struct pci_quirk *q;
2740 	int b, i, f, s;
2741 
2742 	b = cfg->bus;
2743 	s = cfg->slot;
2744 	f = cfg->func;
2745 
2746 	/* ATA devices needs special map treatment */
2747 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2748 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2749 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2750 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2751 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2752 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2753 	else
2754 		for (i = 0; i < cfg->nummaps;)
2755 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2756 			    rl, force, prefetchmask & (1 << i));
2757 
2758 	/*
2759 	 * Add additional, quirked resources.
2760 	 */
2761 	for (q = &pci_quirks[0]; q->devid; q++) {
2762 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2763 		    && q->type == PCI_QUIRK_MAP_REG)
2764 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2765 			  force, 0);
2766 	}
2767 
2768 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2769 		/*
2770 		 * Try to re-route interrupts. Sometimes the BIOS or
2771 		 * firmware may leave bogus values in these registers.
2772 		 * If the re-route fails, then just stick with what we
2773 		 * have.
2774 		 */
2775 		pci_assign_interrupt(bus, dev, 1);
2776 	}
2777 }
2778 
2779 void
2780 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2781 {
2782 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2783 	device_t pcib = device_get_parent(dev);
2784 	struct pci_devinfo *dinfo;
2785 	int maxslots;
2786 	int s, f, pcifunchigh;
2787 	uint8_t hdrtype;
2788 
2789 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2790 	    ("dinfo_size too small"));
2791 	maxslots = PCIB_MAXSLOTS(pcib);
2792 	for (s = 0; s <= maxslots; s++) {
2793 		pcifunchigh = 0;
2794 		f = 0;
2795 		DELAY(1);
2796 		hdrtype = REG(PCIR_HDRTYPE, 1);
2797 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2798 			continue;
2799 		if (hdrtype & PCIM_MFDEV)
2800 			pcifunchigh = PCI_FUNCMAX;
2801 		for (f = 0; f <= pcifunchigh; f++) {
2802 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2803 			    dinfo_size);
2804 			if (dinfo != NULL) {
2805 				pci_add_child(dev, dinfo);
2806 			}
2807 		}
2808 	}
2809 #undef REG
2810 }
2811 
2812 void
2813 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2814 {
2815 	device_t pcib;
2816 
2817 	pcib = device_get_parent(bus);
2818 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2819 	device_set_ivars(dinfo->cfg.dev, dinfo);
2820 	resource_list_init(&dinfo->resources);
2821 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2822 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2823 	pci_print_verbose(dinfo);
2824 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2825 }
2826 
2827 static int
2828 pci_probe(device_t dev)
2829 {
2830 	device_set_desc(dev, "PCI bus");
2831 
2832 	/* Allow other subclasses to override this driver. */
2833 	return (-1000);
2834 }
2835 
2836 static int
2837 pci_attach(device_t dev)
2838 {
2839 	int busno, domain;
2840 
2841 	/*
2842 	 * Since there can be multiple independantly numbered PCI
2843 	 * busses on systems with multiple PCI domains, we can't use
2844 	 * the unit number to decide which bus we are probing. We ask
2845 	 * the parent pcib what our domain and bus numbers are.
2846 	 */
2847 	domain = pcib_get_domain(dev);
2848 	busno = pcib_get_bus(dev);
2849 	if (bootverbose)
2850 		device_printf(dev, "domain=%d, physical bus=%d\n",
2851 		    domain, busno);
2852 
2853 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2854 
2855 	return (bus_generic_attach(dev));
2856 }
2857 
2858 int
2859 pci_suspend(device_t dev)
2860 {
2861 	int dstate, error, i, numdevs;
2862 	device_t acpi_dev, child, *devlist;
2863 	struct pci_devinfo *dinfo;
2864 
2865 	/*
2866 	 * Save the PCI configuration space for each child and set the
2867 	 * device in the appropriate power state for this sleep state.
2868 	 */
2869 	acpi_dev = NULL;
2870 	if (pci_do_power_resume)
2871 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2872 	device_get_children(dev, &devlist, &numdevs);
2873 	for (i = 0; i < numdevs; i++) {
2874 		child = devlist[i];
2875 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2876 		pci_cfg_save(child, dinfo, 0);
2877 	}
2878 
2879 	/* Suspend devices before potentially powering them down. */
2880 	error = bus_generic_suspend(dev);
2881 	if (error) {
2882 		kfree(devlist, M_TEMP);
2883 		return (error);
2884 	}
2885 
2886 	/*
2887 	 * Always set the device to D3.  If ACPI suggests a different
2888 	 * power state, use it instead.  If ACPI is not present, the
2889 	 * firmware is responsible for managing device power.  Skip
2890 	 * children who aren't attached since they are powered down
2891 	 * separately.  Only manage type 0 devices for now.
2892 	 */
2893 	for (i = 0; acpi_dev && i < numdevs; i++) {
2894 		child = devlist[i];
2895 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2896 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2897 			dstate = PCI_POWERSTATE_D3;
2898 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2899 			pci_set_powerstate(child, dstate);
2900 		}
2901 	}
2902 	kfree(devlist, M_TEMP);
2903 	return (0);
2904 }
2905 
2906 int
2907 pci_resume(device_t dev)
2908 {
2909 	int i, numdevs;
2910 	device_t acpi_dev, child, *devlist;
2911 	struct pci_devinfo *dinfo;
2912 
2913 	/*
2914 	 * Set each child to D0 and restore its PCI configuration space.
2915 	 */
2916 	acpi_dev = NULL;
2917 	if (pci_do_power_resume)
2918 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2919 	device_get_children(dev, &devlist, &numdevs);
2920 	for (i = 0; i < numdevs; i++) {
2921 		/*
2922 		 * Notify ACPI we're going to D0 but ignore the result.  If
2923 		 * ACPI is not present, the firmware is responsible for
2924 		 * managing device power.  Only manage type 0 devices for now.
2925 		 */
2926 		child = devlist[i];
2927 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2928 		if (acpi_dev && device_is_attached(child) &&
2929 		    dinfo->cfg.hdrtype == 0) {
2930 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2931 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2932 		}
2933 
2934 		/* Now the device is powered up, restore its config space. */
2935 		pci_cfg_restore(child, dinfo);
2936 	}
2937 	kfree(devlist, M_TEMP);
2938 	return (bus_generic_resume(dev));
2939 }
2940 
2941 static void
2942 pci_load_vendor_data(void)
2943 {
2944 	caddr_t vendordata, info;
2945 
2946 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2947 		info = preload_search_info(vendordata, MODINFO_ADDR);
2948 		pci_vendordata = *(char **)info;
2949 		info = preload_search_info(vendordata, MODINFO_SIZE);
2950 		pci_vendordata_size = *(size_t *)info;
2951 		/* terminate the database */
2952 		pci_vendordata[pci_vendordata_size] = '\n';
2953 	}
2954 }
2955 
2956 void
2957 pci_driver_added(device_t dev, driver_t *driver)
2958 {
2959 	int numdevs;
2960 	device_t *devlist;
2961 	device_t child;
2962 	struct pci_devinfo *dinfo;
2963 	int i;
2964 
2965 	if (bootverbose)
2966 		device_printf(dev, "driver added\n");
2967 	DEVICE_IDENTIFY(driver, dev);
2968 	device_get_children(dev, &devlist, &numdevs);
2969 	for (i = 0; i < numdevs; i++) {
2970 		child = devlist[i];
2971 		if (device_get_state(child) != DS_NOTPRESENT)
2972 			continue;
2973 		dinfo = device_get_ivars(child);
2974 		pci_print_verbose(dinfo);
2975 		if (bootverbose)
2976 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
2977 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2978 			    dinfo->cfg.func);
2979 		pci_cfg_restore(child, dinfo);
2980 		if (device_probe_and_attach(child) != 0)
2981 			pci_cfg_save(child, dinfo, 1);
2982 	}
2983 	kfree(devlist, M_TEMP);
2984 }
2985 
2986 static void
2987 pci_child_detached(device_t parent __unused, device_t child)
2988 {
2989 	/* Turn child's power off */
2990 	pci_cfg_save(child, device_get_ivars(child), 1);
2991 }
2992 
2993 int
2994 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
2995     driver_intr_t *intr, void *arg, void **cookiep,
2996     lwkt_serialize_t serializer, const char *desc)
2997 {
2998 	int rid, error;
2999 	void *cookie;
3000 
3001 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3002 	    arg, &cookie, serializer, desc);
3003 	if (error)
3004 		return (error);
3005 
3006 	/* If this is not a direct child, just bail out. */
3007 	if (device_get_parent(child) != dev) {
3008 		*cookiep = cookie;
3009 		return(0);
3010 	}
3011 
3012 	rid = rman_get_rid(irq);
3013 	if (rid == 0) {
3014 		/* Make sure that INTx is enabled */
3015 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3016 	} else {
3017 		struct pci_devinfo *dinfo = device_get_ivars(child);
3018 		uint64_t addr;
3019 		uint32_t data;
3020 
3021 		/*
3022 		 * Check to see if the interrupt is MSI or MSI-X.
3023 		 * Ask our parent to map the MSI and give
3024 		 * us the address and data register values.
3025 		 * If we fail for some reason, teardown the
3026 		 * interrupt handler.
3027 		 */
3028 		if (dinfo->cfg.msi.msi_alloc > 0) {
3029 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3030 
3031 			if (msi->msi_addr == 0) {
3032 				KASSERT(msi->msi_handlers == 0,
3033 			    ("MSI has handlers, but vectors not mapped"));
3034 				error = PCIB_MAP_MSI(device_get_parent(dev),
3035 				    child, rman_get_start(irq), &addr, &data,
3036 				    rman_get_cpuid(irq));
3037 				if (error)
3038 					goto bad;
3039 				msi->msi_addr = addr;
3040 				msi->msi_data = data;
3041 				pci_enable_msi(child, addr, data);
3042 			}
3043 			msi->msi_handlers++;
3044 		} else {
3045 			struct msix_vector *mv;
3046 			u_int vector;
3047 
3048 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3049 			    ("No MSI-X or MSI rid %d allocated\n", rid));
3050 
3051 			mv = pci_find_msix_vector(child, rid);
3052 			KASSERT(mv != NULL,
3053 			    ("MSI-X rid %d is not allocated\n", rid));
3054 			KASSERT(mv->mv_address == 0,
3055 			    ("MSI-X rid %d has been setup\n", rid));
3056 
3057 			error = PCIB_MAP_MSI(device_get_parent(dev),
3058 			    child, rman_get_start(irq), &addr, &data,
3059 			    rman_get_cpuid(irq));
3060 			if (error)
3061 				goto bad;
3062 			mv->mv_address = addr;
3063 			mv->mv_data = data;
3064 
3065 			vector = PCI_MSIX_RID2VEC(rid);
3066 			pci_setup_msix_vector(child, vector,
3067 			    mv->mv_address, mv->mv_data);
3068 			pci_unmask_msix_vector(child, vector);
3069 		}
3070 
3071 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3072 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3073 	bad:
3074 		if (error) {
3075 			(void)bus_generic_teardown_intr(dev, child, irq,
3076 			    cookie);
3077 			return (error);
3078 		}
3079 	}
3080 	*cookiep = cookie;
3081 	return (0);
3082 }
3083 
3084 int
3085 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3086     void *cookie)
3087 {
3088 	int rid, error;
3089 
3090 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3091 		return (EINVAL);
3092 
3093 	/* If this isn't a direct child, just bail out */
3094 	if (device_get_parent(child) != dev)
3095 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3096 
3097 	rid = rman_get_rid(irq);
3098 	if (rid == 0) {
3099 		/* Mask INTx */
3100 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3101 	} else {
3102 		struct pci_devinfo *dinfo = device_get_ivars(child);
3103 
3104 		/*
3105 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3106 		 * decrement the appropriate handlers count and mask the
3107 		 * MSI-X message, or disable MSI messages if the count
3108 		 * drops to 0.
3109 		 */
3110 		if (dinfo->cfg.msi.msi_alloc > 0) {
3111 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3112 
3113 			KASSERT(rid <= msi->msi_alloc,
3114 			    ("MSI-X index too high\n"));
3115 			KASSERT(msi->msi_handlers > 0,
3116 			    ("MSI rid %d is not setup\n", rid));
3117 
3118 			msi->msi_handlers--;
3119 			if (msi->msi_handlers == 0)
3120 				pci_disable_msi(child);
3121 		} else {
3122 			struct msix_vector *mv;
3123 
3124 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3125 			    ("No MSI or MSI-X rid %d allocated", rid));
3126 
3127 			mv = pci_find_msix_vector(child, rid);
3128 			KASSERT(mv != NULL,
3129 			    ("MSI-X rid %d is not allocated\n", rid));
3130 			KASSERT(mv->mv_address != 0,
3131 			    ("MSI-X rid %d has not been setup\n", rid));
3132 
3133 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3134 			mv->mv_address = 0;
3135 			mv->mv_data = 0;
3136 		}
3137 	}
3138 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3139 	if (rid > 0)
3140 		KASSERT(error == 0,
3141 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3142 	return (error);
3143 }
3144 
3145 int
3146 pci_print_child(device_t dev, device_t child)
3147 {
3148 	struct pci_devinfo *dinfo;
3149 	struct resource_list *rl;
3150 	int retval = 0;
3151 
3152 	dinfo = device_get_ivars(child);
3153 	rl = &dinfo->resources;
3154 
3155 	retval += bus_print_child_header(dev, child);
3156 
3157 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3158 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3159 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3160 	if (device_get_flags(dev))
3161 		retval += kprintf(" flags %#x", device_get_flags(dev));
3162 
3163 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3164 	    pci_get_function(child));
3165 
3166 	retval += bus_print_child_footer(dev, child);
3167 
3168 	return (retval);
3169 }
3170 
3171 static struct
3172 {
3173 	int	class;
3174 	int	subclass;
3175 	char	*desc;
3176 } pci_nomatch_tab[] = {
3177 	{PCIC_OLD,		-1,			"old"},
3178 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3179 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3180 	{PCIC_STORAGE,		-1,			"mass storage"},
3181 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3182 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3183 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3184 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3185 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3186 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3187 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3188 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3189 	{PCIC_NETWORK,		-1,			"network"},
3190 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3191 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3192 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3193 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3194 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3195 	{PCIC_DISPLAY,		-1,			"display"},
3196 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3197 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3198 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3199 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3200 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3201 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3202 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3203 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3204 	{PCIC_MEMORY,		-1,			"memory"},
3205 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3206 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3207 	{PCIC_BRIDGE,		-1,			"bridge"},
3208 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3209 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3210 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3211 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3212 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3213 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3214 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3215 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3216 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3217 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3218 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3219 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3220 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3221 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3222 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3223 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3224 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3225 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3226 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3227 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3228 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3229 	{PCIC_INPUTDEV,		-1,			"input device"},
3230 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3231 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3232 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3233 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3234 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3235 	{PCIC_DOCKING,		-1,			"docking station"},
3236 	{PCIC_PROCESSOR,	-1,			"processor"},
3237 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3238 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3239 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3240 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3241 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3242 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3243 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3244 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3245 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3246 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3247 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3248 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3249 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3250 	{PCIC_SATCOM,		-1,			"satellite communication"},
3251 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3252 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3253 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3254 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3255 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3256 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3257 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3258 	{PCIC_DASP,		-1,			"dasp"},
3259 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3260 	{0, 0,		NULL}
3261 };
3262 
3263 void
3264 pci_probe_nomatch(device_t dev, device_t child)
3265 {
3266 	int	i;
3267 	char	*cp, *scp, *device;
3268 
3269 	/*
3270 	 * Look for a listing for this device in a loaded device database.
3271 	 */
3272 	if ((device = pci_describe_device(child)) != NULL) {
3273 		device_printf(dev, "<%s>", device);
3274 		kfree(device, M_DEVBUF);
3275 	} else {
3276 		/*
3277 		 * Scan the class/subclass descriptions for a general
3278 		 * description.
3279 		 */
3280 		cp = "unknown";
3281 		scp = NULL;
3282 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3283 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3284 				if (pci_nomatch_tab[i].subclass == -1) {
3285 					cp = pci_nomatch_tab[i].desc;
3286 				} else if (pci_nomatch_tab[i].subclass ==
3287 				    pci_get_subclass(child)) {
3288 					scp = pci_nomatch_tab[i].desc;
3289 				}
3290 			}
3291 		}
3292 		device_printf(dev, "<%s%s%s>",
3293 		    cp ? cp : "",
3294 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3295 		    scp ? scp : "");
3296 	}
3297 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3298 		pci_get_vendor(child), pci_get_device(child),
3299 		pci_get_slot(child), pci_get_function(child));
3300 	if (pci_get_intpin(child) > 0) {
3301 		int irq;
3302 
3303 		irq = pci_get_irq(child);
3304 		if (PCI_INTERRUPT_VALID(irq))
3305 			kprintf(" irq %d", irq);
3306 	}
3307 	kprintf("\n");
3308 
3309 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3310 }
3311 
3312 /*
3313  * Parse the PCI device database, if loaded, and return a pointer to a
3314  * description of the device.
3315  *
3316  * The database is flat text formatted as follows:
3317  *
3318  * Any line not in a valid format is ignored.
3319  * Lines are terminated with newline '\n' characters.
3320  *
3321  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3322  * the vendor name.
3323  *
3324  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3325  * - devices cannot be listed without a corresponding VENDOR line.
3326  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3327  * another TAB, then the device name.
3328  */
3329 
3330 /*
3331  * Assuming (ptr) points to the beginning of a line in the database,
3332  * return the vendor or device and description of the next entry.
3333  * The value of (vendor) or (device) inappropriate for the entry type
3334  * is set to -1.  Returns nonzero at the end of the database.
3335  *
3336  * Note that this is slightly unrobust in the face of corrupt data;
3337  * we attempt to safeguard against this by spamming the end of the
3338  * database with a newline when we initialise.
3339  */
3340 static int
3341 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3342 {
3343 	char	*cp = *ptr;
3344 	int	left;
3345 
3346 	*device = -1;
3347 	*vendor = -1;
3348 	**desc = '\0';
3349 	for (;;) {
3350 		left = pci_vendordata_size - (cp - pci_vendordata);
3351 		if (left <= 0) {
3352 			*ptr = cp;
3353 			return(1);
3354 		}
3355 
3356 		/* vendor entry? */
3357 		if (*cp != '\t' &&
3358 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3359 			break;
3360 		/* device entry? */
3361 		if (*cp == '\t' &&
3362 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3363 			break;
3364 
3365 		/* skip to next line */
3366 		while (*cp != '\n' && left > 0) {
3367 			cp++;
3368 			left--;
3369 		}
3370 		if (*cp == '\n') {
3371 			cp++;
3372 			left--;
3373 		}
3374 	}
3375 	/* skip to next line */
3376 	while (*cp != '\n' && left > 0) {
3377 		cp++;
3378 		left--;
3379 	}
3380 	if (*cp == '\n' && left > 0)
3381 		cp++;
3382 	*ptr = cp;
3383 	return(0);
3384 }
3385 
3386 static char *
3387 pci_describe_device(device_t dev)
3388 {
3389 	int	vendor, device;
3390 	char	*desc, *vp, *dp, *line;
3391 
3392 	desc = vp = dp = NULL;
3393 
3394 	/*
3395 	 * If we have no vendor data, we can't do anything.
3396 	 */
3397 	if (pci_vendordata == NULL)
3398 		goto out;
3399 
3400 	/*
3401 	 * Scan the vendor data looking for this device
3402 	 */
3403 	line = pci_vendordata;
3404 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3405 		goto out;
3406 	for (;;) {
3407 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3408 			goto out;
3409 		if (vendor == pci_get_vendor(dev))
3410 			break;
3411 	}
3412 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3413 		goto out;
3414 	for (;;) {
3415 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3416 			*dp = 0;
3417 			break;
3418 		}
3419 		if (vendor != -1) {
3420 			*dp = 0;
3421 			break;
3422 		}
3423 		if (device == pci_get_device(dev))
3424 			break;
3425 	}
3426 	if (dp[0] == '\0')
3427 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3428 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3429 	    NULL)
3430 		ksprintf(desc, "%s, %s", vp, dp);
3431  out:
3432 	if (vp != NULL)
3433 		kfree(vp, M_DEVBUF);
3434 	if (dp != NULL)
3435 		kfree(dp, M_DEVBUF);
3436 	return(desc);
3437 }
3438 
3439 int
3440 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3441 {
3442 	struct pci_devinfo *dinfo;
3443 	pcicfgregs *cfg;
3444 
3445 	dinfo = device_get_ivars(child);
3446 	cfg = &dinfo->cfg;
3447 
3448 	switch (which) {
3449 	case PCI_IVAR_ETHADDR:
3450 		/*
3451 		 * The generic accessor doesn't deal with failure, so
3452 		 * we set the return value, then return an error.
3453 		 */
3454 		*((uint8_t **) result) = NULL;
3455 		return (EINVAL);
3456 	case PCI_IVAR_SUBVENDOR:
3457 		*result = cfg->subvendor;
3458 		break;
3459 	case PCI_IVAR_SUBDEVICE:
3460 		*result = cfg->subdevice;
3461 		break;
3462 	case PCI_IVAR_VENDOR:
3463 		*result = cfg->vendor;
3464 		break;
3465 	case PCI_IVAR_DEVICE:
3466 		*result = cfg->device;
3467 		break;
3468 	case PCI_IVAR_DEVID:
3469 		*result = (cfg->device << 16) | cfg->vendor;
3470 		break;
3471 	case PCI_IVAR_CLASS:
3472 		*result = cfg->baseclass;
3473 		break;
3474 	case PCI_IVAR_SUBCLASS:
3475 		*result = cfg->subclass;
3476 		break;
3477 	case PCI_IVAR_PROGIF:
3478 		*result = cfg->progif;
3479 		break;
3480 	case PCI_IVAR_REVID:
3481 		*result = cfg->revid;
3482 		break;
3483 	case PCI_IVAR_INTPIN:
3484 		*result = cfg->intpin;
3485 		break;
3486 	case PCI_IVAR_IRQ:
3487 		*result = cfg->intline;
3488 		break;
3489 	case PCI_IVAR_DOMAIN:
3490 		*result = cfg->domain;
3491 		break;
3492 	case PCI_IVAR_BUS:
3493 		*result = cfg->bus;
3494 		break;
3495 	case PCI_IVAR_SLOT:
3496 		*result = cfg->slot;
3497 		break;
3498 	case PCI_IVAR_FUNCTION:
3499 		*result = cfg->func;
3500 		break;
3501 	case PCI_IVAR_CMDREG:
3502 		*result = cfg->cmdreg;
3503 		break;
3504 	case PCI_IVAR_CACHELNSZ:
3505 		*result = cfg->cachelnsz;
3506 		break;
3507 	case PCI_IVAR_MINGNT:
3508 		*result = cfg->mingnt;
3509 		break;
3510 	case PCI_IVAR_MAXLAT:
3511 		*result = cfg->maxlat;
3512 		break;
3513 	case PCI_IVAR_LATTIMER:
3514 		*result = cfg->lattimer;
3515 		break;
3516 	case PCI_IVAR_PCIXCAP_PTR:
3517 		*result = cfg->pcix.pcix_ptr;
3518 		break;
3519 	case PCI_IVAR_PCIECAP_PTR:
3520 		*result = cfg->expr.expr_ptr;
3521 		break;
3522 	case PCI_IVAR_VPDCAP_PTR:
3523 		*result = cfg->vpd.vpd_reg;
3524 		break;
3525 	default:
3526 		return (ENOENT);
3527 	}
3528 	return (0);
3529 }
3530 
3531 int
3532 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3533 {
3534 	struct pci_devinfo *dinfo;
3535 
3536 	dinfo = device_get_ivars(child);
3537 
3538 	switch (which) {
3539 	case PCI_IVAR_INTPIN:
3540 		dinfo->cfg.intpin = value;
3541 		return (0);
3542 	case PCI_IVAR_ETHADDR:
3543 	case PCI_IVAR_SUBVENDOR:
3544 	case PCI_IVAR_SUBDEVICE:
3545 	case PCI_IVAR_VENDOR:
3546 	case PCI_IVAR_DEVICE:
3547 	case PCI_IVAR_DEVID:
3548 	case PCI_IVAR_CLASS:
3549 	case PCI_IVAR_SUBCLASS:
3550 	case PCI_IVAR_PROGIF:
3551 	case PCI_IVAR_REVID:
3552 	case PCI_IVAR_IRQ:
3553 	case PCI_IVAR_DOMAIN:
3554 	case PCI_IVAR_BUS:
3555 	case PCI_IVAR_SLOT:
3556 	case PCI_IVAR_FUNCTION:
3557 		return (EINVAL);	/* disallow for now */
3558 
3559 	default:
3560 		return (ENOENT);
3561 	}
3562 }
3563 #ifdef notyet
3564 #include "opt_ddb.h"
3565 #ifdef DDB
3566 #include <ddb/ddb.h>
3567 #include <sys/cons.h>
3568 
3569 /*
3570  * List resources based on pci map registers, used for within ddb
3571  */
3572 
3573 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3574 {
3575 	struct pci_devinfo *dinfo;
3576 	struct devlist *devlist_head;
3577 	struct pci_conf *p;
3578 	const char *name;
3579 	int i, error, none_count;
3580 
3581 	none_count = 0;
3582 	/* get the head of the device queue */
3583 	devlist_head = &pci_devq;
3584 
3585 	/*
3586 	 * Go through the list of devices and print out devices
3587 	 */
3588 	for (error = 0, i = 0,
3589 	     dinfo = STAILQ_FIRST(devlist_head);
3590 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3591 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3592 
3593 		/* Populate pd_name and pd_unit */
3594 		name = NULL;
3595 		if (dinfo->cfg.dev)
3596 			name = device_get_name(dinfo->cfg.dev);
3597 
3598 		p = &dinfo->conf;
3599 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3600 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3601 			(name && *name) ? name : "none",
3602 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3603 			none_count++,
3604 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3605 			p->pc_sel.pc_func, (p->pc_class << 16) |
3606 			(p->pc_subclass << 8) | p->pc_progif,
3607 			(p->pc_subdevice << 16) | p->pc_subvendor,
3608 			(p->pc_device << 16) | p->pc_vendor,
3609 			p->pc_revid, p->pc_hdr);
3610 	}
3611 }
3612 #endif /* DDB */
3613 #endif
3614 
3615 static struct resource *
3616 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3617     u_long start, u_long end, u_long count, u_int flags)
3618 {
3619 	struct pci_devinfo *dinfo = device_get_ivars(child);
3620 	struct resource_list *rl = &dinfo->resources;
3621 	struct resource_list_entry *rle;
3622 	struct resource *res;
3623 	pci_addr_t map, testval;
3624 	int mapsize;
3625 
3626 	/*
3627 	 * Weed out the bogons, and figure out how large the BAR/map
3628 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3629 	 * Note: atapci in legacy mode are special and handled elsewhere
3630 	 * in the code.  If you have a atapci device in legacy mode and
3631 	 * it fails here, that other code is broken.
3632 	 */
3633 	res = NULL;
3634 	map = pci_read_config(child, *rid, 4);
3635 	pci_write_config(child, *rid, 0xffffffff, 4);
3636 	testval = pci_read_config(child, *rid, 4);
3637 	if (pci_maprange(testval) == 64)
3638 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3639 	if (pci_mapbase(testval) == 0)
3640 		goto out;
3641 
3642 	/*
3643 	 * Restore the original value of the BAR.  We may have reprogrammed
3644 	 * the BAR of the low-level console device and when booting verbose,
3645 	 * we need the console device addressable.
3646 	 */
3647 	pci_write_config(child, *rid, map, 4);
3648 
3649 	if (PCI_BAR_MEM(testval)) {
3650 		if (type != SYS_RES_MEMORY) {
3651 			if (bootverbose)
3652 				device_printf(dev,
3653 				    "child %s requested type %d for rid %#x,"
3654 				    " but the BAR says it is an memio\n",
3655 				    device_get_nameunit(child), type, *rid);
3656 			goto out;
3657 		}
3658 	} else {
3659 		if (type != SYS_RES_IOPORT) {
3660 			if (bootverbose)
3661 				device_printf(dev,
3662 				    "child %s requested type %d for rid %#x,"
3663 				    " but the BAR says it is an ioport\n",
3664 				    device_get_nameunit(child), type, *rid);
3665 			goto out;
3666 		}
3667 	}
3668 	/*
3669 	 * For real BARs, we need to override the size that
3670 	 * the driver requests, because that's what the BAR
3671 	 * actually uses and we would otherwise have a
3672 	 * situation where we might allocate the excess to
3673 	 * another driver, which won't work.
3674 	 */
3675 	mapsize = pci_mapsize(testval);
3676 	count = 1UL << mapsize;
3677 	if (RF_ALIGNMENT(flags) < mapsize)
3678 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3679 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3680 		flags |= RF_PREFETCHABLE;
3681 
3682 	/*
3683 	 * Allocate enough resource, and then write back the
3684 	 * appropriate bar for that resource.
3685 	 */
3686 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3687 	    start, end, count, flags, -1);
3688 	if (res == NULL) {
3689 		device_printf(child,
3690 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3691 		    count, *rid, type, start, end);
3692 		goto out;
3693 	}
3694 	resource_list_add(rl, type, *rid, start, end, count, -1);
3695 	rle = resource_list_find(rl, type, *rid);
3696 	if (rle == NULL)
3697 		panic("pci_alloc_map: unexpectedly can't find resource.");
3698 	rle->res = res;
3699 	rle->start = rman_get_start(res);
3700 	rle->end = rman_get_end(res);
3701 	rle->count = count;
3702 	if (bootverbose)
3703 		device_printf(child,
3704 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3705 		    count, *rid, type, rman_get_start(res));
3706 	map = rman_get_start(res);
3707 out:;
3708 	pci_write_config(child, *rid, map, 4);
3709 	if (pci_maprange(testval) == 64)
3710 		pci_write_config(child, *rid + 4, map >> 32, 4);
3711 	return (res);
3712 }
3713 
3714 
3715 struct resource *
3716 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3717     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3718 {
3719 	struct pci_devinfo *dinfo = device_get_ivars(child);
3720 	struct resource_list *rl = &dinfo->resources;
3721 	struct resource_list_entry *rle;
3722 	pcicfgregs *cfg = &dinfo->cfg;
3723 
3724 	/*
3725 	 * Perform lazy resource allocation
3726 	 */
3727 	if (device_get_parent(child) == dev) {
3728 		switch (type) {
3729 		case SYS_RES_IRQ:
3730 			/*
3731 			 * Can't alloc legacy interrupt once MSI messages
3732 			 * have been allocated.
3733 			 */
3734 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3735 			    cfg->msix.msix_alloc > 0))
3736 				return (NULL);
3737 			/*
3738 			 * If the child device doesn't have an
3739 			 * interrupt routed and is deserving of an
3740 			 * interrupt, try to assign it one.
3741 			 */
3742 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3743 			    (cfg->intpin != 0))
3744 				pci_assign_interrupt(dev, child, 0);
3745 			break;
3746 		case SYS_RES_IOPORT:
3747 		case SYS_RES_MEMORY:
3748 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3749 				/*
3750 				 * Enable the I/O mode.  We should
3751 				 * also be assigning resources too
3752 				 * when none are present.  The
3753 				 * resource_list_alloc kind of sorta does
3754 				 * this...
3755 				 */
3756 				if (PCI_ENABLE_IO(dev, child, type))
3757 					return (NULL);
3758 			}
3759 			rle = resource_list_find(rl, type, *rid);
3760 			if (rle == NULL)
3761 				return (pci_alloc_map(dev, child, type, rid,
3762 				    start, end, count, flags));
3763 			break;
3764 		}
3765 		/*
3766 		 * If we've already allocated the resource, then
3767 		 * return it now.  But first we may need to activate
3768 		 * it, since we don't allocate the resource as active
3769 		 * above.  Normally this would be done down in the
3770 		 * nexus, but since we short-circuit that path we have
3771 		 * to do its job here.  Not sure if we should kfree the
3772 		 * resource if it fails to activate.
3773 		 */
3774 		rle = resource_list_find(rl, type, *rid);
3775 		if (rle != NULL && rle->res != NULL) {
3776 			if (bootverbose)
3777 				device_printf(child,
3778 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3779 				    rman_get_size(rle->res), *rid, type,
3780 				    rman_get_start(rle->res));
3781 			if ((flags & RF_ACTIVE) &&
3782 			    bus_generic_activate_resource(dev, child, type,
3783 			    *rid, rle->res) != 0)
3784 				return (NULL);
3785 			return (rle->res);
3786 		}
3787 	}
3788 	return (resource_list_alloc(rl, dev, child, type, rid,
3789 	    start, end, count, flags, cpuid));
3790 }
3791 
3792 void
3793 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3794 {
3795 	struct pci_devinfo *dinfo;
3796 	struct resource_list *rl;
3797 	struct resource_list_entry *rle;
3798 
3799 	if (device_get_parent(child) != dev)
3800 		return;
3801 
3802 	dinfo = device_get_ivars(child);
3803 	rl = &dinfo->resources;
3804 	rle = resource_list_find(rl, type, rid);
3805 	if (rle) {
3806 		if (rle->res) {
3807 			if (rman_get_device(rle->res) != dev ||
3808 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3809 				device_printf(dev, "delete_resource: "
3810 				    "Resource still owned by child, oops. "
3811 				    "(type=%d, rid=%d, addr=%lx)\n",
3812 				    rle->type, rle->rid,
3813 				    rman_get_start(rle->res));
3814 				return;
3815 			}
3816 			bus_release_resource(dev, type, rid, rle->res);
3817 		}
3818 		resource_list_delete(rl, type, rid);
3819 	}
3820 	/*
3821 	 * Why do we turn off the PCI configuration BAR when we delete a
3822 	 * resource? -- imp
3823 	 */
3824 	pci_write_config(child, rid, 0, 4);
3825 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3826 }
3827 
3828 struct resource_list *
3829 pci_get_resource_list (device_t dev, device_t child)
3830 {
3831 	struct pci_devinfo *dinfo = device_get_ivars(child);
3832 
3833 	if (dinfo == NULL)
3834 		return (NULL);
3835 
3836 	return (&dinfo->resources);
3837 }
3838 
3839 uint32_t
3840 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3841 {
3842 	struct pci_devinfo *dinfo = device_get_ivars(child);
3843 	pcicfgregs *cfg = &dinfo->cfg;
3844 
3845 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3846 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3847 }
3848 
3849 void
3850 pci_write_config_method(device_t dev, device_t child, int reg,
3851     uint32_t val, int width)
3852 {
3853 	struct pci_devinfo *dinfo = device_get_ivars(child);
3854 	pcicfgregs *cfg = &dinfo->cfg;
3855 
3856 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3857 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3858 }
3859 
3860 int
3861 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3862     size_t buflen)
3863 {
3864 
3865 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3866 	    pci_get_function(child));
3867 	return (0);
3868 }
3869 
3870 int
3871 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3872     size_t buflen)
3873 {
3874 	struct pci_devinfo *dinfo;
3875 	pcicfgregs *cfg;
3876 
3877 	dinfo = device_get_ivars(child);
3878 	cfg = &dinfo->cfg;
3879 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3880 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3881 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3882 	    cfg->progif);
3883 	return (0);
3884 }
3885 
3886 int
3887 pci_assign_interrupt_method(device_t dev, device_t child)
3888 {
3889 	struct pci_devinfo *dinfo = device_get_ivars(child);
3890 	pcicfgregs *cfg = &dinfo->cfg;
3891 
3892 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3893 	    cfg->intpin));
3894 }
3895 
3896 static int
3897 pci_modevent(module_t mod, int what, void *arg)
3898 {
3899 	static struct cdev *pci_cdev;
3900 
3901 	switch (what) {
3902 	case MOD_LOAD:
3903 		STAILQ_INIT(&pci_devq);
3904 		pci_generation = 0;
3905 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3906 				    "pci");
3907 		pci_load_vendor_data();
3908 		break;
3909 
3910 	case MOD_UNLOAD:
3911 		destroy_dev(pci_cdev);
3912 		break;
3913 	}
3914 
3915 	return (0);
3916 }
3917 
3918 void
3919 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3920 {
3921 	int i;
3922 
3923 	/*
3924 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3925 	 * which we know need special treatment.  Type 2 devices are
3926 	 * cardbus bridges which also require special treatment.
3927 	 * Other types are unknown, and we err on the side of safety
3928 	 * by ignoring them.
3929 	 */
3930 	if (dinfo->cfg.hdrtype != 0)
3931 		return;
3932 
3933 	/*
3934 	 * Restore the device to full power mode.  We must do this
3935 	 * before we restore the registers because moving from D3 to
3936 	 * D0 will cause the chip's BARs and some other registers to
3937 	 * be reset to some unknown power on reset values.  Cut down
3938 	 * the noise on boot by doing nothing if we are already in
3939 	 * state D0.
3940 	 */
3941 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3942 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3943 	}
3944 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3945 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3946 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3947 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3948 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3949 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3950 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3951 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3952 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3953 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3954 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3955 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3956 
3957 	/* Restore MSI and MSI-X configurations if they are present. */
3958 	if (dinfo->cfg.msi.msi_location != 0)
3959 		pci_resume_msi(dev);
3960 	if (dinfo->cfg.msix.msix_location != 0)
3961 		pci_resume_msix(dev);
3962 }
3963 
3964 void
3965 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3966 {
3967 	int i;
3968 	uint32_t cls;
3969 	int ps;
3970 
3971 	/*
3972 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3973 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3974 	 * which also require special treatment.  Other types are unknown, and
3975 	 * we err on the side of safety by ignoring them.  Powering down
3976 	 * bridges should not be undertaken lightly.
3977 	 */
3978 	if (dinfo->cfg.hdrtype != 0)
3979 		return;
3980 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3981 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3982 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3983 
3984 	/*
3985 	 * Some drivers apparently write to these registers w/o updating our
3986 	 * cached copy.  No harm happens if we update the copy, so do so here
3987 	 * so we can restore them.  The COMMAND register is modified by the
3988 	 * bus w/o updating the cache.  This should represent the normally
3989 	 * writable portion of the 'defined' part of type 0 headers.  In
3990 	 * theory we also need to save/restore the PCI capability structures
3991 	 * we know about, but apart from power we don't know any that are
3992 	 * writable.
3993 	 */
3994 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
3995 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
3996 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
3997 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
3998 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
3999 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4000 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4001 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4002 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4003 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4004 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4005 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4006 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4007 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4008 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4009 
4010 	/*
4011 	 * don't set the state for display devices, base peripherals and
4012 	 * memory devices since bad things happen when they are powered down.
4013 	 * We should (a) have drivers that can easily detach and (b) use
4014 	 * generic drivers for these devices so that some device actually
4015 	 * attaches.  We need to make sure that when we implement (a) we don't
4016 	 * power the device down on a reattach.
4017 	 */
4018 	cls = pci_get_class(dev);
4019 	if (!setstate)
4020 		return;
4021 	switch (pci_do_power_nodriver)
4022 	{
4023 		case 0:		/* NO powerdown at all */
4024 			return;
4025 		case 1:		/* Conservative about what to power down */
4026 			if (cls == PCIC_STORAGE)
4027 				return;
4028 			/*FALLTHROUGH*/
4029 		case 2:		/* Agressive about what to power down */
4030 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4031 			    cls == PCIC_BASEPERIPH)
4032 				return;
4033 			/*FALLTHROUGH*/
4034 		case 3:		/* Power down everything */
4035 			break;
4036 	}
4037 	/*
4038 	 * PCI spec says we can only go into D3 state from D0 state.
4039 	 * Transition from D[12] into D0 before going to D3 state.
4040 	 */
4041 	ps = pci_get_powerstate(dev);
4042 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4043 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4044 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4045 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4046 }
4047 
4048 #ifdef COMPAT_OLDPCI
4049 
4050 /*
4051  * Locate the parent of a PCI device by scanning the PCI devlist
4052  * and return the entry for the parent.
4053  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4054  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4055  */
4056 pcicfgregs *
4057 pci_devlist_get_parent(pcicfgregs *cfg)
4058 {
4059 	struct devlist *devlist_head;
4060 	struct pci_devinfo *dinfo;
4061 	pcicfgregs *bridge_cfg;
4062 	int i;
4063 
4064 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4065 
4066 	/* If the device is on PCI bus 0, look for the host */
4067 	if (cfg->bus == 0) {
4068 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4069 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4070 			bridge_cfg = &dinfo->cfg;
4071 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4072 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4073 		    		&& bridge_cfg->bus == cfg->bus) {
4074 				return bridge_cfg;
4075 			}
4076 		}
4077 	}
4078 
4079 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4080 	if (cfg->bus > 0) {
4081 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4082 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4083 			bridge_cfg = &dinfo->cfg;
4084 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4085 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4086 				&& bridge_cfg->secondarybus == cfg->bus) {
4087 				return bridge_cfg;
4088 			}
4089 		}
4090 	}
4091 
4092 	return NULL;
4093 }
4094 
4095 #endif	/* COMPAT_OLDPCI */
4096 
4097 int
4098 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4099 {
4100 	int rid, type;
4101 	u_int flags;
4102 	char env[64];
4103 
4104 	rid = 0;
4105 	type = PCI_INTR_TYPE_LEGACY;
4106 	flags = RF_SHAREABLE | RF_ACTIVE;
4107 
4108 	ksnprintf(env, sizeof(env), "hw.%s.msi.enable",
4109 	    device_get_nameunit(dev));
4110 	kgetenv_int(env, &msi_enable);
4111 
4112 	if (msi_enable) {
4113 		int cpu = -1;
4114 
4115 		ksnprintf(env, sizeof(env), "hw.%s.msi.cpu",
4116 		    device_get_nameunit(dev));
4117 		kgetenv_int(env, &cpu);
4118 		if (cpu >= ncpus)
4119 			cpu = ncpus - 1;
4120 
4121 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4122 			flags &= ~RF_SHAREABLE;
4123 			type = PCI_INTR_TYPE_MSI;
4124 		}
4125 	}
4126 
4127 	*rid0 = rid;
4128 	*flags0 = flags;
4129 
4130 	return type;
4131 }
4132