xref: /dragonfly/sys/bus/pci/pci.c (revision e96fb831)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_bus.h"
32 #include "opt_acpi.h"
33 #include "opt_compat_oldpci.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 #include <sys/machintr.h>
47 
48 #include <machine/msi_machdep.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 
54 #include <sys/bus.h>
55 #include <sys/rman.h>
56 #include <sys/device.h>
57 
58 #include <sys/pciio.h>
59 #include <bus/pci/pcireg.h>
60 #include <bus/pci/pcivar.h>
61 #include <bus/pci/pci_private.h>
62 
63 #include "pcib_if.h"
64 #include "pci_if.h"
65 
66 #ifdef __HAVE_ACPI
67 #include <contrib/dev/acpica/acpi.h>
68 #include "acpi_if.h"
69 #else
70 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
71 #endif
72 
73 extern struct dev_ops pcic_ops;	/* XXX */
74 
75 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
76 
77 static uint32_t		pci_mapbase(unsigned mapreg);
78 static const char	*pci_maptype(unsigned mapreg);
79 static int		pci_mapsize(unsigned testval);
80 static int		pci_maprange(unsigned mapreg);
81 static void		pci_fixancient(pcicfgregs *cfg);
82 
83 static int		pci_porten(device_t pcib, int b, int s, int f);
84 static int		pci_memen(device_t pcib, int b, int s, int f);
85 static void		pci_assign_interrupt(device_t bus, device_t dev,
86 			    int force_route);
87 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
88 			    int b, int s, int f, int reg,
89 			    struct resource_list *rl, int force, int prefetch);
90 static int		pci_probe(device_t dev);
91 static int		pci_attach(device_t dev);
92 static void		pci_child_detached(device_t, device_t);
93 static void		pci_load_vendor_data(void);
94 static int		pci_describe_parse_line(char **ptr, int *vendor,
95 			    int *device, char **desc);
96 static char		*pci_describe_device(device_t dev);
97 static int		pci_modevent(module_t mod, int what, void *arg);
98 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
99 			    pcicfgregs *cfg);
100 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
101 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t *data);
103 #if 0
104 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t data);
106 #endif
107 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
108 static void		pci_disable_msi(device_t dev);
109 static void		pci_enable_msi(device_t dev, uint64_t address,
110 			    uint16_t data);
111 static void		pci_enable_msix(device_t dev, u_int index,
112 			    uint64_t address, uint32_t data);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static void		pci_resume_msi(device_t dev);
117 static void		pci_resume_msix(device_t dev);
118 static int		pcie_slotimpl(const pcicfgregs *);
119 static void		pci_print_verbose_expr(const pcicfgregs *);
120 
121 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
122 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_subvendor(device_t, int, int,
127 			    pcicfgregs *);
128 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	bus_generic_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	pci_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_child_detached,	pci_child_detached),
147 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
148 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
149 
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
156 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
157 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
158 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
159 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
160 
161 	/* PCI interface */
162 	DEVMETHOD(pci_read_config,	pci_read_config_method),
163 	DEVMETHOD(pci_write_config,	pci_write_config_method),
164 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
165 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
166 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
167 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
168 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
169 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
170 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
171 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
172 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
173 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	{ 0, 0 }
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 
194 static const struct pci_read_cap {
195 	int		cap;
196 	pci_read_cap_t	read_cap;
197 } pci_read_caps[] = {
198 	{ PCIY_PMG,		pci_read_cap_pmgt },
199 	{ PCIY_HT,		pci_read_cap_ht },
200 	{ PCIY_MSI,		pci_read_cap_msi },
201 	{ PCIY_MSIX,		pci_read_cap_msix },
202 	{ PCIY_VPD,		pci_read_cap_vpd },
203 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
204 	{ PCIY_PCIX,		pci_read_cap_pcix },
205 	{ PCIY_EXPRESS,		pci_read_cap_express },
206 	{ 0, NULL } /* required last entry */
207 };
208 
209 struct pci_quirk {
210 	uint32_t devid;	/* Vendor/device of the card */
211 	int	type;
212 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
213 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
214 	int	arg1;
215 	int	arg2;
216 };
217 
218 struct pci_quirk pci_quirks[] = {
219 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
220 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
223 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
224 
225 	/*
226 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
227 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
228 	 */
229 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 
232 	/*
233 	 * MSI doesn't work on earlier Intel chipsets including
234 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
235 	 */
236 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 
244 	/*
245 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
246 	 * bridge.
247 	 */
248 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 
250 	{ 0 }
251 };
252 
253 /* map register information */
254 #define	PCI_MAPMEM	0x01	/* memory map */
255 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
256 #define	PCI_MAPPORT	0x04	/* port map */
257 
258 struct devlist pci_devq;
259 uint32_t pci_generation;
260 uint32_t pci_numdevs = 0;
261 static int pcie_chipset, pcix_chipset;
262 
263 /* sysctl vars */
264 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
265 
266 static int pci_enable_io_modes = 1;
267 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
268 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
269     &pci_enable_io_modes, 1,
270     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
271 enable these bits correctly.  We'd like to do this all the time, but there\n\
272 are some peripherals that this causes problems with.");
273 
274 static int pci_do_power_nodriver = 0;
275 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
276 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
277     &pci_do_power_nodriver, 0,
278   "Place a function into D3 state when no driver attaches to it.  0 means\n\
279 disable.  1 means conservatively place devices into D3 state.  2 means\n\
280 aggressively place devices into D3 state.  3 means put absolutely everything\n\
281 in D3 state.");
282 
283 static int pci_do_power_resume = 1;
284 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
285 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
286     &pci_do_power_resume, 1,
287   "Transition from D3 -> D0 on resume.");
288 
289 static int pci_do_msi = 1;
290 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
291 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
292     "Enable support for MSI interrupts");
293 
294 static int pci_do_msix = 0;
295 #if 0
296 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
297 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
298     "Enable support for MSI-X interrupts");
299 #endif
300 
301 static int pci_honor_msi_blacklist = 1;
302 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
303 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
304     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
305 
306 static int pci_msi_cpuid;
307 
308 /* Find a device_t by bus/slot/function in domain 0 */
309 
310 device_t
311 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
312 {
313 
314 	return (pci_find_dbsf(0, bus, slot, func));
315 }
316 
317 /* Find a device_t by domain/bus/slot/function */
318 
319 device_t
320 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
321 {
322 	struct pci_devinfo *dinfo;
323 
324 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
325 		if ((dinfo->cfg.domain == domain) &&
326 		    (dinfo->cfg.bus == bus) &&
327 		    (dinfo->cfg.slot == slot) &&
328 		    (dinfo->cfg.func == func)) {
329 			return (dinfo->cfg.dev);
330 		}
331 	}
332 
333 	return (NULL);
334 }
335 
336 /* Find a device_t by vendor/device ID */
337 
338 device_t
339 pci_find_device(uint16_t vendor, uint16_t device)
340 {
341 	struct pci_devinfo *dinfo;
342 
343 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
344 		if ((dinfo->cfg.vendor == vendor) &&
345 		    (dinfo->cfg.device == device)) {
346 			return (dinfo->cfg.dev);
347 		}
348 	}
349 
350 	return (NULL);
351 }
352 
353 /* return base address of memory or port map */
354 
355 static uint32_t
356 pci_mapbase(uint32_t mapreg)
357 {
358 
359 	if (PCI_BAR_MEM(mapreg))
360 		return (mapreg & PCIM_BAR_MEM_BASE);
361 	else
362 		return (mapreg & PCIM_BAR_IO_BASE);
363 }
364 
365 /* return map type of memory or port map */
366 
367 static const char *
368 pci_maptype(unsigned mapreg)
369 {
370 
371 	if (PCI_BAR_IO(mapreg))
372 		return ("I/O Port");
373 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
374 		return ("Prefetchable Memory");
375 	return ("Memory");
376 }
377 
378 /* return log2 of map size decoded for memory or port map */
379 
380 static int
381 pci_mapsize(uint32_t testval)
382 {
383 	int ln2size;
384 
385 	testval = pci_mapbase(testval);
386 	ln2size = 0;
387 	if (testval != 0) {
388 		while ((testval & 1) == 0)
389 		{
390 			ln2size++;
391 			testval >>= 1;
392 		}
393 	}
394 	return (ln2size);
395 }
396 
397 /* return log2 of address range supported by map register */
398 
399 static int
400 pci_maprange(unsigned mapreg)
401 {
402 	int ln2range = 0;
403 
404 	if (PCI_BAR_IO(mapreg))
405 		ln2range = 32;
406 	else
407 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
408 		case PCIM_BAR_MEM_32:
409 			ln2range = 32;
410 			break;
411 		case PCIM_BAR_MEM_1MB:
412 			ln2range = 20;
413 			break;
414 		case PCIM_BAR_MEM_64:
415 			ln2range = 64;
416 			break;
417 		}
418 	return (ln2range);
419 }
420 
421 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
422 
423 static void
424 pci_fixancient(pcicfgregs *cfg)
425 {
426 	if (cfg->hdrtype != 0)
427 		return;
428 
429 	/* PCI to PCI bridges use header type 1 */
430 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
431 		cfg->hdrtype = 1;
432 }
433 
434 /* extract header type specific config data */
435 
436 static void
437 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
438 {
439 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
440 	switch (cfg->hdrtype) {
441 	case 0:
442 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
443 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
444 		cfg->nummaps	    = PCI_MAXMAPS_0;
445 		break;
446 	case 1:
447 		cfg->nummaps	    = PCI_MAXMAPS_1;
448 #ifdef COMPAT_OLDPCI
449 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
450 #endif
451 		break;
452 	case 2:
453 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
454 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
455 		cfg->nummaps	    = PCI_MAXMAPS_2;
456 #ifdef COMPAT_OLDPCI
457 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
458 #endif
459 		break;
460 	}
461 #undef REG
462 }
463 
464 /* read configuration header into pcicfgregs structure */
465 struct pci_devinfo *
466 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
467 {
468 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
469 	pcicfgregs *cfg = NULL;
470 	struct pci_devinfo *devlist_entry;
471 	struct devlist *devlist_head;
472 
473 	devlist_head = &pci_devq;
474 
475 	devlist_entry = NULL;
476 
477 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
478 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
479 
480 		cfg = &devlist_entry->cfg;
481 
482 		cfg->domain		= d;
483 		cfg->bus		= b;
484 		cfg->slot		= s;
485 		cfg->func		= f;
486 		cfg->vendor		= REG(PCIR_VENDOR, 2);
487 		cfg->device		= REG(PCIR_DEVICE, 2);
488 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
489 		cfg->statreg		= REG(PCIR_STATUS, 2);
490 		cfg->baseclass		= REG(PCIR_CLASS, 1);
491 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
492 		cfg->progif		= REG(PCIR_PROGIF, 1);
493 		cfg->revid		= REG(PCIR_REVID, 1);
494 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
495 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
496 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
497 		cfg->intpin		= REG(PCIR_INTPIN, 1);
498 		cfg->intline		= REG(PCIR_INTLINE, 1);
499 
500 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
501 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
502 
503 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
504 		cfg->hdrtype		&= ~PCIM_MFDEV;
505 
506 		pci_fixancient(cfg);
507 		pci_hdrtypedata(pcib, b, s, f, cfg);
508 
509 		pci_read_capabilities(pcib, cfg);
510 
511 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
512 
513 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
514 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
515 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
516 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
517 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
518 
519 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
520 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
521 		devlist_entry->conf.pc_vendor = cfg->vendor;
522 		devlist_entry->conf.pc_device = cfg->device;
523 
524 		devlist_entry->conf.pc_class = cfg->baseclass;
525 		devlist_entry->conf.pc_subclass = cfg->subclass;
526 		devlist_entry->conf.pc_progif = cfg->progif;
527 		devlist_entry->conf.pc_revid = cfg->revid;
528 
529 		pci_numdevs++;
530 		pci_generation++;
531 	}
532 	return (devlist_entry);
533 #undef REG
534 }
535 
536 static int
537 pci_fixup_nextptr(int *nextptr0)
538 {
539 	int nextptr = *nextptr0;
540 
541 	/* "Next pointer" is only one byte */
542 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d\n", nextptr));
543 
544 	if (nextptr & 0x3) {
545 		/*
546 		 * PCI local bus spec 3.0:
547 		 *
548 		 * "... The bottom two bits of all pointers are reserved
549 		 *  and must be implemented as 00b although software must
550 		 *  mask them to allow for future uses of these bits ..."
551 		 */
552 		if (bootverbose) {
553 			kprintf("Illegal PCI extended capability "
554 				"offset, fixup 0x%02x -> 0x%02x\n",
555 				nextptr, nextptr & ~0x3);
556 		}
557 		nextptr &= ~0x3;
558 	}
559 	*nextptr0 = nextptr;
560 
561 	if (nextptr < 0x40) {
562 		if (nextptr != 0) {
563 			kprintf("Illegal PCI extended capability "
564 				"offset 0x%02x", nextptr);
565 		}
566 		return 0;
567 	}
568 	return 1;
569 }
570 
571 static void
572 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
573 {
574 #define REG(n, w)	\
575 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
576 
577 	struct pcicfg_pp *pp = &cfg->pp;
578 
579 	if (pp->pp_cap)
580 		return;
581 
582 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
583 	pp->pp_status = ptr + PCIR_POWER_STATUS;
584 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
585 
586 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
587 		/*
588 		 * XXX
589 		 * We should write to data_select and read back from
590 		 * data_scale to determine whether data register is
591 		 * implemented.
592 		 */
593 #ifdef foo
594 		pp->pp_data = ptr + PCIR_POWER_DATA;
595 #else
596 		pp->pp_data = 0;
597 #endif
598 	}
599 
600 #undef REG
601 }
602 
603 static void
604 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
605 {
606 #if defined(__i386__) || defined(__x86_64__)
607 
608 #define REG(n, w)	\
609 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
610 
611 	struct pcicfg_ht *ht = &cfg->ht;
612 	uint64_t addr;
613 	uint32_t val;
614 
615 	/* Determine HT-specific capability type. */
616 	val = REG(ptr + PCIR_HT_COMMAND, 2);
617 
618 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
619 		cfg->ht.ht_slave = ptr;
620 
621 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
622 		return;
623 
624 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
625 		/* Sanity check the mapping window. */
626 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
627 		addr <<= 32;
628 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
629 		if (addr != MSI_X86_ADDR_BASE) {
630 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
631 				"has non-default MSI window 0x%llx\n",
632 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
633 				(long long)addr);
634 		}
635 	} else {
636 		addr = MSI_X86_ADDR_BASE;
637 	}
638 
639 	ht->ht_msimap = ptr;
640 	ht->ht_msictrl = val;
641 	ht->ht_msiaddr = addr;
642 
643 #undef REG
644 
645 #endif	/* __i386__ || __x86_64__ */
646 }
647 
648 static void
649 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
650 {
651 #define REG(n, w)	\
652 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
653 
654 	struct pcicfg_msi *msi = &cfg->msi;
655 
656 	msi->msi_location = ptr;
657 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
658 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
659 
660 #undef REG
661 }
662 
663 static void
664 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
665 {
666 #define REG(n, w)	\
667 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
668 
669 	struct pcicfg_msix *msix = &cfg->msix;
670 	uint32_t val;
671 
672 	msix->msix_location = ptr;
673 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
674 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
675 
676 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
677 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
678 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
679 
680 	val = REG(ptr + PCIR_MSIX_PBA, 4);
681 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
682 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
683 
684 #undef REG
685 }
686 
687 static void
688 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
689 {
690 	cfg->vpd.vpd_reg = ptr;
691 }
692 
693 static void
694 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
695 {
696 #define REG(n, w)	\
697 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
698 
699 	/* Should always be true. */
700 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
701 		uint32_t val;
702 
703 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
704 		cfg->subvendor = val & 0xffff;
705 		cfg->subdevice = val >> 16;
706 	}
707 
708 #undef REG
709 }
710 
711 static void
712 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
713 {
714 	/*
715 	 * Assume we have a PCI-X chipset if we have
716 	 * at least one PCI-PCI bridge with a PCI-X
717 	 * capability.  Note that some systems with
718 	 * PCI-express or HT chipsets might match on
719 	 * this check as well.
720 	 */
721 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
722 		pcix_chipset = 1;
723 
724 	cfg->pcix.pcix_ptr = ptr;
725 }
726 
727 static int
728 pcie_slotimpl(const pcicfgregs *cfg)
729 {
730 	const struct pcicfg_expr *expr = &cfg->expr;
731 	uint16_t port_type;
732 
733 	/*
734 	 * Only version 1 can be parsed currently
735 	 */
736 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
737 		return 0;
738 
739 	/*
740 	 * - Slot implemented bit is meaningful iff current port is
741 	 *   root port or down stream port.
742 	 * - Testing for root port or down stream port is meanningful
743 	 *   iff PCI configure has type 1 header.
744 	 */
745 
746 	if (cfg->hdrtype != 1)
747 		return 0;
748 
749 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
750 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
751 		return 0;
752 
753 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
754 		return 0;
755 
756 	return 1;
757 }
758 
759 static void
760 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
761 {
762 #define REG(n, w)	\
763 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
764 
765 	struct pcicfg_expr *expr = &cfg->expr;
766 
767 	/*
768 	 * Assume we have a PCI-express chipset if we have
769 	 * at least one PCI-express device.
770 	 */
771 	pcie_chipset = 1;
772 
773 	expr->expr_ptr = ptr;
774 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
775 
776 	/*
777 	 * Only version 1 can be parsed currently
778 	 */
779 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
780 		return;
781 
782 	/*
783 	 * Read slot capabilities.  Slot capabilities exists iff
784 	 * current port's slot is implemented
785 	 */
786 	if (pcie_slotimpl(cfg))
787 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
788 
789 #undef REG
790 }
791 
792 static void
793 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
794 {
795 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
796 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
797 
798 	uint32_t val;
799 	int nextptr, ptrptr;
800 
801 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
802 		/* No capabilities */
803 		return;
804 	}
805 
806 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
807 	case 0:
808 	case 1:
809 		ptrptr = PCIR_CAP_PTR;
810 		break;
811 	case 2:
812 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
813 		break;
814 	default:
815 		return;				/* no capabilities support */
816 	}
817 	nextptr = REG(ptrptr, 1);	/* sanity check? */
818 
819 	/*
820 	 * Read capability entries.
821 	 */
822 	while (pci_fixup_nextptr(&nextptr)) {
823 		const struct pci_read_cap *rc;
824 		int ptr = nextptr;
825 
826 		/* Find the next entry */
827 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
828 
829 		/* Process this entry */
830 		val = REG(ptr + PCICAP_ID, 1);
831 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
832 			if (rc->cap == val) {
833 				rc->read_cap(pcib, ptr, nextptr, cfg);
834 				break;
835 			}
836 		}
837 	}
838 
839 #if defined(__i386__) || defined(__x86_64__)
840 	/*
841 	 * Enable the MSI mapping window for all HyperTransport
842 	 * slaves.  PCI-PCI bridges have their windows enabled via
843 	 * PCIB_MAP_MSI().
844 	 */
845 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
846 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
847 		device_printf(pcib,
848 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
849 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
850 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
851 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
852 		     2);
853 	}
854 #endif
855 
856 /* REG and WREG use carry through to next functions */
857 }
858 
859 /*
860  * PCI Vital Product Data
861  */
862 
863 #define	PCI_VPD_TIMEOUT		1000000
864 
865 static int
866 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
867 {
868 	int count = PCI_VPD_TIMEOUT;
869 
870 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
871 
872 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
873 
874 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
875 		if (--count < 0)
876 			return (ENXIO);
877 		DELAY(1);	/* limit looping */
878 	}
879 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
880 
881 	return (0);
882 }
883 
884 #if 0
885 static int
886 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
887 {
888 	int count = PCI_VPD_TIMEOUT;
889 
890 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
891 
892 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
893 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
894 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
895 		if (--count < 0)
896 			return (ENXIO);
897 		DELAY(1);	/* limit looping */
898 	}
899 
900 	return (0);
901 }
902 #endif
903 
904 #undef PCI_VPD_TIMEOUT
905 
906 struct vpd_readstate {
907 	device_t	pcib;
908 	pcicfgregs	*cfg;
909 	uint32_t	val;
910 	int		bytesinval;
911 	int		off;
912 	uint8_t		cksum;
913 };
914 
915 static int
916 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
917 {
918 	uint32_t reg;
919 	uint8_t byte;
920 
921 	if (vrs->bytesinval == 0) {
922 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
923 			return (ENXIO);
924 		vrs->val = le32toh(reg);
925 		vrs->off += 4;
926 		byte = vrs->val & 0xff;
927 		vrs->bytesinval = 3;
928 	} else {
929 		vrs->val = vrs->val >> 8;
930 		byte = vrs->val & 0xff;
931 		vrs->bytesinval--;
932 	}
933 
934 	vrs->cksum += byte;
935 	*data = byte;
936 	return (0);
937 }
938 
939 int
940 pcie_slot_implemented(device_t dev)
941 {
942 	struct pci_devinfo *dinfo = device_get_ivars(dev);
943 
944 	return pcie_slotimpl(&dinfo->cfg);
945 }
946 
947 void
948 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
949 {
950 	uint8_t expr_ptr;
951 	uint16_t val;
952 
953 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
954 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
955 		panic("%s: invalid max read request size 0x%02x\n",
956 		      device_get_nameunit(dev), rqsize);
957 	}
958 
959 	expr_ptr = pci_get_pciecap_ptr(dev);
960 	if (!expr_ptr)
961 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
962 
963 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
964 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
965 		if (bootverbose)
966 			device_printf(dev, "adjust device control 0x%04x", val);
967 
968 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
969 		val |= rqsize;
970 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
971 
972 		if (bootverbose)
973 			kprintf(" -> 0x%04x\n", val);
974 	}
975 }
976 
977 uint16_t
978 pcie_get_max_readrq(device_t dev)
979 {
980 	uint8_t expr_ptr;
981 	uint16_t val;
982 
983 	expr_ptr = pci_get_pciecap_ptr(dev);
984 	if (!expr_ptr)
985 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
986 
987 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
988 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
989 }
990 
991 static void
992 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
993 {
994 	struct vpd_readstate vrs;
995 	int state;
996 	int name;
997 	int remain;
998 	int i;
999 	int alloc, off;		/* alloc/off for RO/W arrays */
1000 	int cksumvalid;
1001 	int dflen;
1002 	uint8_t byte;
1003 	uint8_t byte2;
1004 
1005 	/* init vpd reader */
1006 	vrs.bytesinval = 0;
1007 	vrs.off = 0;
1008 	vrs.pcib = pcib;
1009 	vrs.cfg = cfg;
1010 	vrs.cksum = 0;
1011 
1012 	state = 0;
1013 	name = remain = i = 0;	/* shut up stupid gcc */
1014 	alloc = off = 0;	/* shut up stupid gcc */
1015 	dflen = 0;		/* shut up stupid gcc */
1016 	cksumvalid = -1;
1017 	while (state >= 0) {
1018 		if (vpd_nextbyte(&vrs, &byte)) {
1019 			state = -2;
1020 			break;
1021 		}
1022 #if 0
1023 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1024 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1025 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1026 #endif
1027 		switch (state) {
1028 		case 0:		/* item name */
1029 			if (byte & 0x80) {
1030 				if (vpd_nextbyte(&vrs, &byte2)) {
1031 					state = -2;
1032 					break;
1033 				}
1034 				remain = byte2;
1035 				if (vpd_nextbyte(&vrs, &byte2)) {
1036 					state = -2;
1037 					break;
1038 				}
1039 				remain |= byte2 << 8;
1040 				if (remain > (0x7f*4 - vrs.off)) {
1041 					state = -1;
1042 					kprintf(
1043 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1044 					    cfg->domain, cfg->bus, cfg->slot,
1045 					    cfg->func, remain);
1046 				}
1047 				name = byte & 0x7f;
1048 			} else {
1049 				remain = byte & 0x7;
1050 				name = (byte >> 3) & 0xf;
1051 			}
1052 			switch (name) {
1053 			case 0x2:	/* String */
1054 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1055 				    M_DEVBUF, M_WAITOK);
1056 				i = 0;
1057 				state = 1;
1058 				break;
1059 			case 0xf:	/* End */
1060 				state = -1;
1061 				break;
1062 			case 0x10:	/* VPD-R */
1063 				alloc = 8;
1064 				off = 0;
1065 				cfg->vpd.vpd_ros = kmalloc(alloc *
1066 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1067 				    M_WAITOK | M_ZERO);
1068 				state = 2;
1069 				break;
1070 			case 0x11:	/* VPD-W */
1071 				alloc = 8;
1072 				off = 0;
1073 				cfg->vpd.vpd_w = kmalloc(alloc *
1074 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1075 				    M_WAITOK | M_ZERO);
1076 				state = 5;
1077 				break;
1078 			default:	/* Invalid data, abort */
1079 				state = -1;
1080 				break;
1081 			}
1082 			break;
1083 
1084 		case 1:	/* Identifier String */
1085 			cfg->vpd.vpd_ident[i++] = byte;
1086 			remain--;
1087 			if (remain == 0)  {
1088 				cfg->vpd.vpd_ident[i] = '\0';
1089 				state = 0;
1090 			}
1091 			break;
1092 
1093 		case 2:	/* VPD-R Keyword Header */
1094 			if (off == alloc) {
1095 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1096 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1097 				    M_DEVBUF, M_WAITOK | M_ZERO);
1098 			}
1099 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1100 			if (vpd_nextbyte(&vrs, &byte2)) {
1101 				state = -2;
1102 				break;
1103 			}
1104 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1105 			if (vpd_nextbyte(&vrs, &byte2)) {
1106 				state = -2;
1107 				break;
1108 			}
1109 			dflen = byte2;
1110 			if (dflen == 0 &&
1111 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1112 			    2) == 0) {
1113 				/*
1114 				 * if this happens, we can't trust the rest
1115 				 * of the VPD.
1116 				 */
1117 				kprintf(
1118 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1119 				    cfg->domain, cfg->bus, cfg->slot,
1120 				    cfg->func, dflen);
1121 				cksumvalid = 0;
1122 				state = -1;
1123 				break;
1124 			} else if (dflen == 0) {
1125 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1126 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1127 				    M_DEVBUF, M_WAITOK);
1128 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1129 			} else
1130 				cfg->vpd.vpd_ros[off].value = kmalloc(
1131 				    (dflen + 1) *
1132 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1133 				    M_DEVBUF, M_WAITOK);
1134 			remain -= 3;
1135 			i = 0;
1136 			/* keep in sync w/ state 3's transistions */
1137 			if (dflen == 0 && remain == 0)
1138 				state = 0;
1139 			else if (dflen == 0)
1140 				state = 2;
1141 			else
1142 				state = 3;
1143 			break;
1144 
1145 		case 3:	/* VPD-R Keyword Value */
1146 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1147 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1148 			    "RV", 2) == 0 && cksumvalid == -1) {
1149 				if (vrs.cksum == 0)
1150 					cksumvalid = 1;
1151 				else {
1152 					if (bootverbose)
1153 						kprintf(
1154 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1155 						    cfg->domain, cfg->bus,
1156 						    cfg->slot, cfg->func,
1157 						    vrs.cksum);
1158 					cksumvalid = 0;
1159 					state = -1;
1160 					break;
1161 				}
1162 			}
1163 			dflen--;
1164 			remain--;
1165 			/* keep in sync w/ state 2's transistions */
1166 			if (dflen == 0)
1167 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1168 			if (dflen == 0 && remain == 0) {
1169 				cfg->vpd.vpd_rocnt = off;
1170 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1171 				    off * sizeof(*cfg->vpd.vpd_ros),
1172 				    M_DEVBUF, M_WAITOK | M_ZERO);
1173 				state = 0;
1174 			} else if (dflen == 0)
1175 				state = 2;
1176 			break;
1177 
1178 		case 4:
1179 			remain--;
1180 			if (remain == 0)
1181 				state = 0;
1182 			break;
1183 
1184 		case 5:	/* VPD-W Keyword Header */
1185 			if (off == alloc) {
1186 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1187 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1188 				    M_DEVBUF, M_WAITOK | M_ZERO);
1189 			}
1190 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1191 			if (vpd_nextbyte(&vrs, &byte2)) {
1192 				state = -2;
1193 				break;
1194 			}
1195 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1196 			if (vpd_nextbyte(&vrs, &byte2)) {
1197 				state = -2;
1198 				break;
1199 			}
1200 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1201 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1202 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1203 			    sizeof(*cfg->vpd.vpd_w[off].value),
1204 			    M_DEVBUF, M_WAITOK);
1205 			remain -= 3;
1206 			i = 0;
1207 			/* keep in sync w/ state 6's transistions */
1208 			if (dflen == 0 && remain == 0)
1209 				state = 0;
1210 			else if (dflen == 0)
1211 				state = 5;
1212 			else
1213 				state = 6;
1214 			break;
1215 
1216 		case 6:	/* VPD-W Keyword Value */
1217 			cfg->vpd.vpd_w[off].value[i++] = byte;
1218 			dflen--;
1219 			remain--;
1220 			/* keep in sync w/ state 5's transistions */
1221 			if (dflen == 0)
1222 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1223 			if (dflen == 0 && remain == 0) {
1224 				cfg->vpd.vpd_wcnt = off;
1225 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1226 				    off * sizeof(*cfg->vpd.vpd_w),
1227 				    M_DEVBUF, M_WAITOK | M_ZERO);
1228 				state = 0;
1229 			} else if (dflen == 0)
1230 				state = 5;
1231 			break;
1232 
1233 		default:
1234 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1235 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1236 			    state);
1237 			state = -1;
1238 			break;
1239 		}
1240 	}
1241 
1242 	if (cksumvalid == 0 || state < -1) {
1243 		/* read-only data bad, clean up */
1244 		if (cfg->vpd.vpd_ros != NULL) {
1245 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1246 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1247 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1248 			cfg->vpd.vpd_ros = NULL;
1249 		}
1250 	}
1251 	if (state < -1) {
1252 		/* I/O error, clean up */
1253 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1254 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1255 		if (cfg->vpd.vpd_ident != NULL) {
1256 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1257 			cfg->vpd.vpd_ident = NULL;
1258 		}
1259 		if (cfg->vpd.vpd_w != NULL) {
1260 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1261 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1262 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1263 			cfg->vpd.vpd_w = NULL;
1264 		}
1265 	}
1266 	cfg->vpd.vpd_cached = 1;
1267 #undef REG
1268 #undef WREG
1269 }
1270 
1271 int
1272 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1273 {
1274 	struct pci_devinfo *dinfo = device_get_ivars(child);
1275 	pcicfgregs *cfg = &dinfo->cfg;
1276 
1277 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1278 		pci_read_vpd(device_get_parent(dev), cfg);
1279 
1280 	*identptr = cfg->vpd.vpd_ident;
1281 
1282 	if (*identptr == NULL)
1283 		return (ENXIO);
1284 
1285 	return (0);
1286 }
1287 
1288 int
1289 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1290 	const char **vptr)
1291 {
1292 	struct pci_devinfo *dinfo = device_get_ivars(child);
1293 	pcicfgregs *cfg = &dinfo->cfg;
1294 	int i;
1295 
1296 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1297 		pci_read_vpd(device_get_parent(dev), cfg);
1298 
1299 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1300 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1301 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1302 			*vptr = cfg->vpd.vpd_ros[i].value;
1303 		}
1304 
1305 	if (i != cfg->vpd.vpd_rocnt)
1306 		return (0);
1307 
1308 	*vptr = NULL;
1309 	return (ENXIO);
1310 }
1311 
1312 /*
1313  * Return the offset in configuration space of the requested extended
1314  * capability entry or 0 if the specified capability was not found.
1315  */
1316 int
1317 pci_find_extcap_method(device_t dev, device_t child, int capability,
1318     int *capreg)
1319 {
1320 	struct pci_devinfo *dinfo = device_get_ivars(child);
1321 	pcicfgregs *cfg = &dinfo->cfg;
1322 	u_int32_t status;
1323 	u_int8_t ptr;
1324 
1325 	/*
1326 	 * Check the CAP_LIST bit of the PCI status register first.
1327 	 */
1328 	status = pci_read_config(child, PCIR_STATUS, 2);
1329 	if (!(status & PCIM_STATUS_CAPPRESENT))
1330 		return (ENXIO);
1331 
1332 	/*
1333 	 * Determine the start pointer of the capabilities list.
1334 	 */
1335 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1336 	case 0:
1337 	case 1:
1338 		ptr = PCIR_CAP_PTR;
1339 		break;
1340 	case 2:
1341 		ptr = PCIR_CAP_PTR_2;
1342 		break;
1343 	default:
1344 		/* XXX: panic? */
1345 		return (ENXIO);		/* no extended capabilities support */
1346 	}
1347 	ptr = pci_read_config(child, ptr, 1);
1348 
1349 	/*
1350 	 * Traverse the capabilities list.
1351 	 */
1352 	while (ptr != 0) {
1353 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1354 			if (capreg != NULL)
1355 				*capreg = ptr;
1356 			return (0);
1357 		}
1358 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1359 	}
1360 
1361 	return (ENOENT);
1362 }
1363 
1364 /*
1365  * Support for MSI-X message interrupts.
1366  */
1367 void
1368 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1369 {
1370 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1371 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1372 	uint32_t offset;
1373 
1374 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1375 	offset = msix->msix_table_offset + index * 16;
1376 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1377 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1378 	bus_write_4(msix->msix_table_res, offset + 8, data);
1379 
1380 	/* Enable MSI -> HT mapping. */
1381 	pci_ht_map_msi(dev, address);
1382 }
1383 
1384 void
1385 pci_mask_msix(device_t dev, u_int index)
1386 {
1387 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1388 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1389 	uint32_t offset, val;
1390 
1391 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1392 	offset = msix->msix_table_offset + index * 16 + 12;
1393 	val = bus_read_4(msix->msix_table_res, offset);
1394 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1395 		val |= PCIM_MSIX_VCTRL_MASK;
1396 		bus_write_4(msix->msix_table_res, offset, val);
1397 	}
1398 }
1399 
1400 void
1401 pci_unmask_msix(device_t dev, u_int index)
1402 {
1403 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1404 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1405 	uint32_t offset, val;
1406 
1407 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1408 	offset = msix->msix_table_offset + index * 16 + 12;
1409 	val = bus_read_4(msix->msix_table_res, offset);
1410 	if (val & PCIM_MSIX_VCTRL_MASK) {
1411 		val &= ~PCIM_MSIX_VCTRL_MASK;
1412 		bus_write_4(msix->msix_table_res, offset, val);
1413 	}
1414 }
1415 
1416 int
1417 pci_pending_msix(device_t dev, u_int index)
1418 {
1419 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1420 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1421 	uint32_t offset, bit;
1422 
1423 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1424 	offset = msix->msix_pba_offset + (index / 32) * 4;
1425 	bit = 1 << index % 32;
1426 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1427 }
1428 
1429 /*
1430  * Restore MSI-X registers and table during resume.  If MSI-X is
1431  * enabled then walk the virtual table to restore the actual MSI-X
1432  * table.
1433  */
1434 static void
1435 pci_resume_msix(device_t dev)
1436 {
1437 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1438 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1439 	struct msix_table_entry *mte;
1440 	struct msix_vector *mv;
1441 	int i;
1442 
1443 	if (msix->msix_alloc > 0) {
1444 		/* First, mask all vectors. */
1445 		for (i = 0; i < msix->msix_msgnum; i++)
1446 			pci_mask_msix(dev, i);
1447 
1448 		/* Second, program any messages with at least one handler. */
1449 		for (i = 0; i < msix->msix_table_len; i++) {
1450 			mte = &msix->msix_table[i];
1451 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1452 				continue;
1453 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1454 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1455 			pci_unmask_msix(dev, i);
1456 		}
1457 	}
1458 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1459 	    msix->msix_ctrl, 2);
1460 }
1461 
1462 /*
1463  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1464  * returned in *count.  After this function returns, each message will be
1465  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1466  */
1467 int
1468 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1469 {
1470 	struct pci_devinfo *dinfo = device_get_ivars(child);
1471 	pcicfgregs *cfg = &dinfo->cfg;
1472 	struct resource_list_entry *rle;
1473 	int actual, error, i, irq, max;
1474 
1475 	/* Don't let count == 0 get us into trouble. */
1476 	if (*count == 0)
1477 		return (EINVAL);
1478 
1479 	/* If rid 0 is allocated, then fail. */
1480 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1481 	if (rle != NULL && rle->res != NULL)
1482 		return (ENXIO);
1483 
1484 	/* Already have allocated messages? */
1485 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1486 		return (ENXIO);
1487 
1488 	/* If MSI is blacklisted for this system, fail. */
1489 	if (pci_msi_blacklisted())
1490 		return (ENXIO);
1491 
1492 	/* MSI-X capability present? */
1493 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1494 		return (ENODEV);
1495 
1496 	/* Make sure the appropriate BARs are mapped. */
1497 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1498 	    cfg->msix.msix_table_bar);
1499 	if (rle == NULL || rle->res == NULL ||
1500 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1501 		return (ENXIO);
1502 	cfg->msix.msix_table_res = rle->res;
1503 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1504 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1505 		    cfg->msix.msix_pba_bar);
1506 		if (rle == NULL || rle->res == NULL ||
1507 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1508 			return (ENXIO);
1509 	}
1510 	cfg->msix.msix_pba_res = rle->res;
1511 
1512 	if (bootverbose)
1513 		device_printf(child,
1514 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1515 		    *count, cfg->msix.msix_msgnum);
1516 	max = min(*count, cfg->msix.msix_msgnum);
1517 	for (i = 0; i < max; i++) {
1518 		/* Allocate a message. */
1519 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1520 		if (error)
1521 			break;
1522 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1523 		    irq, 1, -1);
1524 	}
1525 	actual = i;
1526 
1527 	if (actual == 0) {
1528 		if (bootverbose) {
1529 			device_printf(child,
1530 			    "could not allocate any MSI-X vectors\n");
1531 		}
1532 		return  (ENXIO);
1533 	}
1534 
1535 	if (bootverbose) {
1536 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1537 		if (actual == 1)
1538 			device_printf(child, "using IRQ %lu for MSI-X\n",
1539 			    rle->start);
1540 		else {
1541 			int run;
1542 
1543 			/*
1544 			 * Be fancy and try to print contiguous runs of
1545 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1546 			 * 'run' is true if we are in a range.
1547 			 */
1548 			device_printf(child, "using IRQs %lu", rle->start);
1549 			irq = rle->start;
1550 			run = 0;
1551 			for (i = 1; i < actual; i++) {
1552 				rle = resource_list_find(&dinfo->resources,
1553 				    SYS_RES_IRQ, i + 1);
1554 
1555 				/* Still in a run? */
1556 				if (rle->start == irq + 1) {
1557 					run = 1;
1558 					irq++;
1559 					continue;
1560 				}
1561 
1562 				/* Finish previous range. */
1563 				if (run) {
1564 					kprintf("-%d", irq);
1565 					run = 0;
1566 				}
1567 
1568 				/* Start new range. */
1569 				kprintf(",%lu", rle->start);
1570 				irq = rle->start;
1571 			}
1572 
1573 			/* Unfinished range? */
1574 			if (run)
1575 				kprintf("-%d", irq);
1576 			kprintf(" for MSI-X\n");
1577 		}
1578 	}
1579 
1580 	/* Mask all vectors. */
1581 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1582 		pci_mask_msix(child, i);
1583 
1584 	/* Allocate and initialize vector data and virtual table. */
1585 	cfg->msix.msix_vectors = kmalloc(sizeof(struct msix_vector) * actual,
1586 	    M_DEVBUF, M_WAITOK | M_ZERO);
1587 	cfg->msix.msix_table = kmalloc(sizeof(struct msix_table_entry) * actual,
1588 	    M_DEVBUF, M_WAITOK | M_ZERO);
1589 	for (i = 0; i < actual; i++) {
1590 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1591 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1592 		cfg->msix.msix_table[i].mte_vector = i + 1;
1593 	}
1594 
1595 	/* Update control register to enable MSI-X. */
1596 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1597 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1598 	    cfg->msix.msix_ctrl, 2);
1599 
1600 	/* Update counts of alloc'd messages. */
1601 	cfg->msix.msix_alloc = actual;
1602 	cfg->msix.msix_table_len = actual;
1603 	*count = actual;
1604 	return (0);
1605 }
1606 
1607 /*
1608  * By default, pci_alloc_msix() will assign the allocated IRQ
1609  * resources consecutively to the first N messages in the MSI-X table.
1610  * However, device drivers may want to use different layouts if they
1611  * either receive fewer messages than they asked for, or they wish to
1612  * populate the MSI-X table sparsely.  This method allows the driver
1613  * to specify what layout it wants.  It must be called after a
1614  * successful pci_alloc_msix() but before any of the associated
1615  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1616  *
1617  * The 'vectors' array contains 'count' message vectors.  The array
1618  * maps directly to the MSI-X table in that index 0 in the array
1619  * specifies the vector for the first message in the MSI-X table, etc.
1620  * The vector value in each array index can either be 0 to indicate
1621  * that no vector should be assigned to a message slot, or it can be a
1622  * number from 1 to N (where N is the count returned from a
1623  * succcessful call to pci_alloc_msix()) to indicate which message
1624  * vector (IRQ) to be used for the corresponding message.
1625  *
1626  * On successful return, each message with a non-zero vector will have
1627  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1628  * 1.  Additionally, if any of the IRQs allocated via the previous
1629  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1630  * will be kfreed back to the system automatically.
1631  *
1632  * For example, suppose a driver has a MSI-X table with 6 messages and
1633  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1634  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1635  * C.  After the call to pci_alloc_msix(), the device will be setup to
1636  * have an MSI-X table of ABC--- (where - means no vector assigned).
1637  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1638  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1639  * be kfreed back to the system.  This device will also have valid
1640  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1641  *
1642  * In any case, the SYS_RES_IRQ rid X will always map to the message
1643  * at MSI-X table index X - 1 and will only be valid if a vector is
1644  * assigned to that table entry.
1645  */
1646 int
1647 pci_remap_msix_method(device_t dev, device_t child, int count,
1648     const u_int *vectors)
1649 {
1650 	struct pci_devinfo *dinfo = device_get_ivars(child);
1651 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1652 	struct resource_list_entry *rle;
1653 	int i, irq, j, *used;
1654 
1655 	/*
1656 	 * Have to have at least one message in the table but the
1657 	 * table can't be bigger than the actual MSI-X table in the
1658 	 * device.
1659 	 */
1660 	if (count == 0 || count > msix->msix_msgnum)
1661 		return (EINVAL);
1662 
1663 	/* Sanity check the vectors. */
1664 	for (i = 0; i < count; i++)
1665 		if (vectors[i] > msix->msix_alloc)
1666 			return (EINVAL);
1667 
1668 	/*
1669 	 * Make sure there aren't any holes in the vectors to be used.
1670 	 * It's a big pain to support it, and it doesn't really make
1671 	 * sense anyway.  Also, at least one vector must be used.
1672 	 */
1673 	used = kmalloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1674 	    M_ZERO);
1675 	for (i = 0; i < count; i++)
1676 		if (vectors[i] != 0)
1677 			used[vectors[i] - 1] = 1;
1678 	for (i = 0; i < msix->msix_alloc - 1; i++)
1679 		if (used[i] == 0 && used[i + 1] == 1) {
1680 			kfree(used, M_DEVBUF);
1681 			return (EINVAL);
1682 		}
1683 	if (used[0] != 1) {
1684 		kfree(used, M_DEVBUF);
1685 		return (EINVAL);
1686 	}
1687 
1688 	/* Make sure none of the resources are allocated. */
1689 	for (i = 0; i < msix->msix_table_len; i++) {
1690 		if (msix->msix_table[i].mte_vector == 0)
1691 			continue;
1692 		if (msix->msix_table[i].mte_handlers > 0)
1693 			return (EBUSY);
1694 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1695 		KASSERT(rle != NULL, ("missing resource"));
1696 		if (rle->res != NULL)
1697 			return (EBUSY);
1698 	}
1699 
1700 	/* Free the existing resource list entries. */
1701 	for (i = 0; i < msix->msix_table_len; i++) {
1702 		if (msix->msix_table[i].mte_vector == 0)
1703 			continue;
1704 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1705 	}
1706 
1707 	/*
1708 	 * Build the new virtual table keeping track of which vectors are
1709 	 * used.
1710 	 */
1711 	kfree(msix->msix_table, M_DEVBUF);
1712 	msix->msix_table = kmalloc(sizeof(struct msix_table_entry) * count,
1713 	    M_DEVBUF, M_WAITOK | M_ZERO);
1714 	for (i = 0; i < count; i++)
1715 		msix->msix_table[i].mte_vector = vectors[i];
1716 	msix->msix_table_len = count;
1717 
1718 	/* Free any unused IRQs and resize the vectors array if necessary. */
1719 	j = msix->msix_alloc - 1;
1720 	if (used[j] == 0) {
1721 		struct msix_vector *vec;
1722 
1723 		while (used[j] == 0) {
1724 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1725 			    msix->msix_vectors[j].mv_irq);
1726 			j--;
1727 		}
1728 		vec = kmalloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1729 		    M_WAITOK);
1730 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1731 		    (j + 1));
1732 		kfree(msix->msix_vectors, M_DEVBUF);
1733 		msix->msix_vectors = vec;
1734 		msix->msix_alloc = j + 1;
1735 	}
1736 	kfree(used, M_DEVBUF);
1737 
1738 	/* Map the IRQs onto the rids. */
1739 	for (i = 0; i < count; i++) {
1740 		if (vectors[i] == 0)
1741 			continue;
1742 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1743 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1744 		    irq, 1, -1);
1745 	}
1746 
1747 	if (bootverbose) {
1748 		device_printf(child, "Remapped MSI-X IRQs as: ");
1749 		for (i = 0; i < count; i++) {
1750 			if (i != 0)
1751 				kprintf(", ");
1752 			if (vectors[i] == 0)
1753 				kprintf("---");
1754 			else
1755 				kprintf("%d",
1756 				    msix->msix_vectors[vectors[i]].mv_irq);
1757 		}
1758 		kprintf("\n");
1759 	}
1760 
1761 	return (0);
1762 }
1763 
1764 static int
1765 pci_release_msix(device_t dev, device_t child)
1766 {
1767 	struct pci_devinfo *dinfo = device_get_ivars(child);
1768 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1769 	struct resource_list_entry *rle;
1770 	int i;
1771 
1772 	/* Do we have any messages to release? */
1773 	if (msix->msix_alloc == 0)
1774 		return (ENODEV);
1775 
1776 	/* Make sure none of the resources are allocated. */
1777 	for (i = 0; i < msix->msix_table_len; i++) {
1778 		if (msix->msix_table[i].mte_vector == 0)
1779 			continue;
1780 		if (msix->msix_table[i].mte_handlers > 0)
1781 			return (EBUSY);
1782 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1783 		KASSERT(rle != NULL, ("missing resource"));
1784 		if (rle->res != NULL)
1785 			return (EBUSY);
1786 	}
1787 
1788 	/* Update control register to disable MSI-X. */
1789 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1790 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1791 	    msix->msix_ctrl, 2);
1792 
1793 	/* Free the resource list entries. */
1794 	for (i = 0; i < msix->msix_table_len; i++) {
1795 		if (msix->msix_table[i].mte_vector == 0)
1796 			continue;
1797 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1798 	}
1799 	kfree(msix->msix_table, M_DEVBUF);
1800 	msix->msix_table_len = 0;
1801 
1802 	/* Release the IRQs. */
1803 	for (i = 0; i < msix->msix_alloc; i++)
1804 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1805 		    msix->msix_vectors[i].mv_irq);
1806 	kfree(msix->msix_vectors, M_DEVBUF);
1807 	msix->msix_alloc = 0;
1808 	return (0);
1809 }
1810 
1811 /*
1812  * Return the max supported MSI-X messages this device supports.
1813  * Basically, assuming the MD code can alloc messages, this function
1814  * should return the maximum value that pci_alloc_msix() can return.
1815  * Thus, it is subject to the tunables, etc.
1816  */
1817 int
1818 pci_msix_count_method(device_t dev, device_t child)
1819 {
1820 	struct pci_devinfo *dinfo = device_get_ivars(child);
1821 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1822 
1823 	if (pci_do_msix && msix->msix_location != 0)
1824 		return (msix->msix_msgnum);
1825 	return (0);
1826 }
1827 
1828 /*
1829  * HyperTransport MSI mapping control
1830  */
1831 void
1832 pci_ht_map_msi(device_t dev, uint64_t addr)
1833 {
1834 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1835 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1836 
1837 	if (!ht->ht_msimap)
1838 		return;
1839 
1840 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1841 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1842 		/* Enable MSI -> HT mapping. */
1843 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1844 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1845 		    ht->ht_msictrl, 2);
1846 	}
1847 
1848 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1849 		/* Disable MSI -> HT mapping. */
1850 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1851 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1852 		    ht->ht_msictrl, 2);
1853 	}
1854 }
1855 
1856 /*
1857  * Support for MSI message signalled interrupts.
1858  */
1859 void
1860 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1861 {
1862 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1863 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1864 
1865 	/* Write data and address values. */
1866 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1867 	    address & 0xffffffff, 4);
1868 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1869 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1870 		    address >> 32, 4);
1871 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1872 		    data, 2);
1873 	} else
1874 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1875 		    2);
1876 
1877 	/* Enable MSI in the control register. */
1878 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1879 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1880 	    2);
1881 
1882 	/* Enable MSI -> HT mapping. */
1883 	pci_ht_map_msi(dev, address);
1884 }
1885 
1886 void
1887 pci_disable_msi(device_t dev)
1888 {
1889 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1890 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1891 
1892 	/* Disable MSI -> HT mapping. */
1893 	pci_ht_map_msi(dev, 0);
1894 
1895 	/* Disable MSI in the control register. */
1896 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1897 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1898 	    2);
1899 }
1900 
1901 /*
1902  * Restore MSI registers during resume.  If MSI is enabled then
1903  * restore the data and address registers in addition to the control
1904  * register.
1905  */
1906 static void
1907 pci_resume_msi(device_t dev)
1908 {
1909 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1910 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1911 	uint64_t address;
1912 	uint16_t data;
1913 
1914 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1915 		address = msi->msi_addr;
1916 		data = msi->msi_data;
1917 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1918 		    address & 0xffffffff, 4);
1919 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1920 			pci_write_config(dev, msi->msi_location +
1921 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1922 			pci_write_config(dev, msi->msi_location +
1923 			    PCIR_MSI_DATA_64BIT, data, 2);
1924 		} else
1925 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1926 			    data, 2);
1927 	}
1928 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1929 	    2);
1930 }
1931 
1932 int
1933 pci_remap_msi_irq(device_t dev, u_int irq)
1934 {
1935 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1936 	pcicfgregs *cfg = &dinfo->cfg;
1937 	struct resource_list_entry *rle;
1938 	struct msix_table_entry *mte;
1939 	struct msix_vector *mv;
1940 	device_t bus;
1941 	uint64_t addr;
1942 	uint32_t data;
1943 	int error, i, j;
1944 
1945 	bus = device_get_parent(dev);
1946 
1947 	/*
1948 	 * Handle MSI first.  We try to find this IRQ among our list
1949 	 * of MSI IRQs.  If we find it, we request updated address and
1950 	 * data registers and apply the results.
1951 	 */
1952 	if (cfg->msi.msi_alloc > 0) {
1953 
1954 		/* If we don't have any active handlers, nothing to do. */
1955 		if (cfg->msi.msi_handlers == 0)
1956 			return (0);
1957 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1958 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1959 			    i + 1);
1960 			if (rle->start == irq) {
1961 				error = PCIB_MAP_MSI(device_get_parent(bus),
1962 				    dev, irq, &addr, &data, -1 /* XXX */);
1963 				if (error)
1964 					return (error);
1965 				pci_disable_msi(dev);
1966 				dinfo->cfg.msi.msi_addr = addr;
1967 				dinfo->cfg.msi.msi_data = data;
1968 				pci_enable_msi(dev, addr, data);
1969 				return (0);
1970 			}
1971 		}
1972 		return (ENOENT);
1973 	}
1974 
1975 	/*
1976 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1977 	 * we request the updated mapping info.  If that works, we go
1978 	 * through all the slots that use this IRQ and update them.
1979 	 */
1980 	if (cfg->msix.msix_alloc > 0) {
1981 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1982 			mv = &cfg->msix.msix_vectors[i];
1983 			if (mv->mv_irq == irq) {
1984 				error = PCIB_MAP_MSI(device_get_parent(bus),
1985 				    dev, irq, &addr, &data, -1 /* XXX */);
1986 				if (error)
1987 					return (error);
1988 				mv->mv_address = addr;
1989 				mv->mv_data = data;
1990 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1991 					mte = &cfg->msix.msix_table[j];
1992 					if (mte->mte_vector != i + 1)
1993 						continue;
1994 					if (mte->mte_handlers == 0)
1995 						continue;
1996 					pci_mask_msix(dev, j);
1997 					pci_enable_msix(dev, j, addr, data);
1998 					pci_unmask_msix(dev, j);
1999 				}
2000 			}
2001 		}
2002 		return (ENOENT);
2003 	}
2004 
2005 	return (ENOENT);
2006 }
2007 
2008 /*
2009  * Returns true if the specified device is blacklisted because MSI
2010  * doesn't work.
2011  */
2012 int
2013 pci_msi_device_blacklisted(device_t dev)
2014 {
2015 	struct pci_quirk *q;
2016 
2017 	if (!pci_honor_msi_blacklist)
2018 		return (0);
2019 
2020 	for (q = &pci_quirks[0]; q->devid; q++) {
2021 		if (q->devid == pci_get_devid(dev) &&
2022 		    q->type == PCI_QUIRK_DISABLE_MSI)
2023 			return (1);
2024 	}
2025 	return (0);
2026 }
2027 
2028 /*
2029  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2030  * we just check for blacklisted chipsets as represented by the
2031  * host-PCI bridge at device 0:0:0.  In the future, it may become
2032  * necessary to check other system attributes, such as the kenv values
2033  * that give the motherboard manufacturer and model number.
2034  */
2035 static int
2036 pci_msi_blacklisted(void)
2037 {
2038 	device_t dev;
2039 
2040 	if (!pci_honor_msi_blacklist)
2041 		return (0);
2042 
2043 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2044 	if (!(pcie_chipset || pcix_chipset))
2045 		return (1);
2046 
2047 	dev = pci_find_bsf(0, 0, 0);
2048 	if (dev != NULL)
2049 		return (pci_msi_device_blacklisted(dev));
2050 	return (0);
2051 }
2052 
2053 /*
2054  * Attempt to allocate count MSI messages on start_cpuid.
2055  *
2056  * If start_cpuid < 0, then the MSI messages' target CPU will be
2057  * selected automaticly.
2058  *
2059  * If the caller explicitly specified the MSI messages' target CPU,
2060  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
2061  * messages on the specified CPU, if the allocation fails due to MD
2062  * does not have enough vectors (EMSGSIZE), then we will try next
2063  * available CPU, until the allocation fails on all CPUs.
2064  *
2065  * EMSGSIZE will be returned, if all available CPUs does not have
2066  * enough vectors for the requested amount of MSI messages.  Caller
2067  * should either reduce the amount of MSI messages to be requested,
2068  * or simply giving up using MSI.
2069  *
2070  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
2071  * returned in 'rid' array, if the allocation succeeds.
2072  */
2073 int
2074 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
2075     int start_cpuid)
2076 {
2077 	struct pci_devinfo *dinfo = device_get_ivars(child);
2078 	pcicfgregs *cfg = &dinfo->cfg;
2079 	struct resource_list_entry *rle;
2080 	int error, i, irqs[32], cpuid = 0;
2081 	uint16_t ctrl;
2082 
2083 	KASSERT(count != 0 && count <= 32 && powerof2(count),
2084 	    ("invalid MSI count %d\n", count));
2085 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d\n", start_cpuid));
2086 
2087 	/* If rid 0 is allocated, then fail. */
2088 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2089 	if (rle != NULL && rle->res != NULL)
2090 		return (ENXIO);
2091 
2092 	/* Already have allocated messages? */
2093 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2094 		return (ENXIO);
2095 
2096 	/* If MSI is blacklisted for this system, fail. */
2097 	if (pci_msi_blacklisted())
2098 		return (ENXIO);
2099 
2100 	/* MSI capability present? */
2101 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2102 		return (ENODEV);
2103 
2104 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d\n",
2105 	    count, cfg->msi.msi_msgnum));
2106 
2107 	if (bootverbose) {
2108 		device_printf(child,
2109 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2110 		    count, cfg->msi.msi_msgnum);
2111 	}
2112 
2113 	if (start_cpuid < 0)
2114 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
2115 
2116 	error = EINVAL;
2117 	for (i = 0; i < ncpus; ++i) {
2118 		cpuid = (start_cpuid + i) % ncpus;
2119 
2120 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
2121 		    cfg->msi.msi_msgnum, irqs, cpuid);
2122 		if (error == 0)
2123 			break;
2124 		else if (error != EMSGSIZE)
2125 			return error;
2126 	}
2127 	if (error)
2128 		return error;
2129 
2130 	/*
2131 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
2132 	 * the irqs[] array, so add new resources starting at rid 1.
2133 	 */
2134 	for (i = 0; i < count; i++) {
2135 		rid[i] = i + 1;
2136 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2137 		    irqs[i], irqs[i], 1, cpuid);
2138 	}
2139 
2140 	if (bootverbose) {
2141 		if (count == 1) {
2142 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
2143 			    irqs[0], cpuid);
2144 		} else {
2145 			int run;
2146 
2147 			/*
2148 			 * Be fancy and try to print contiguous runs
2149 			 * of IRQ values as ranges.  'run' is true if
2150 			 * we are in a range.
2151 			 */
2152 			device_printf(child, "using IRQs %d", irqs[0]);
2153 			run = 0;
2154 			for (i = 1; i < count; i++) {
2155 
2156 				/* Still in a run? */
2157 				if (irqs[i] == irqs[i - 1] + 1) {
2158 					run = 1;
2159 					continue;
2160 				}
2161 
2162 				/* Finish previous range. */
2163 				if (run) {
2164 					kprintf("-%d", irqs[i - 1]);
2165 					run = 0;
2166 				}
2167 
2168 				/* Start new range. */
2169 				kprintf(",%d", irqs[i]);
2170 			}
2171 
2172 			/* Unfinished range? */
2173 			if (run)
2174 				kprintf("-%d", irqs[count - 1]);
2175 			kprintf(" for MSI on cpu%d\n", cpuid);
2176 		}
2177 	}
2178 
2179 	/* Update control register with count. */
2180 	ctrl = cfg->msi.msi_ctrl;
2181 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2182 	ctrl |= (ffs(count) - 1) << 4;
2183 	cfg->msi.msi_ctrl = ctrl;
2184 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2185 
2186 	/* Update counts of alloc'd messages. */
2187 	cfg->msi.msi_alloc = count;
2188 	cfg->msi.msi_handlers = 0;
2189 	return (0);
2190 }
2191 
2192 /* Release the MSI messages associated with this device. */
2193 int
2194 pci_release_msi_method(device_t dev, device_t child)
2195 {
2196 	struct pci_devinfo *dinfo = device_get_ivars(child);
2197 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2198 	struct resource_list_entry *rle;
2199 	int error, i, irqs[32], cpuid = -1;
2200 
2201 	/* Try MSI-X first. */
2202 	error = pci_release_msix(dev, child);
2203 	if (error != ENODEV)
2204 		return (error);
2205 
2206 	/* Do we have any messages to release? */
2207 	if (msi->msi_alloc == 0)
2208 		return (ENODEV);
2209 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2210 
2211 	/* Make sure none of the resources are allocated. */
2212 	if (msi->msi_handlers > 0)
2213 		return (EBUSY);
2214 	for (i = 0; i < msi->msi_alloc; i++) {
2215 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2216 		KASSERT(rle != NULL, ("missing MSI resource"));
2217 		if (rle->res != NULL)
2218 			return (EBUSY);
2219 		if (i == 0) {
2220 			cpuid = rle->cpuid;
2221 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2222 			    ("invalid MSI target cpuid %d\n", cpuid));
2223 		} else {
2224 			KASSERT(rle->cpuid == cpuid,
2225 			    ("MSI targets different cpus, "
2226 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2227 		}
2228 		irqs[i] = rle->start;
2229 	}
2230 
2231 	/* Update control register with 0 count. */
2232 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2233 	    ("%s: MSI still enabled", __func__));
2234 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2235 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2236 	    msi->msi_ctrl, 2);
2237 
2238 	/* Release the messages. */
2239 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2240 	    cpuid);
2241 	for (i = 0; i < msi->msi_alloc; i++)
2242 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2243 
2244 	/* Update alloc count. */
2245 	msi->msi_alloc = 0;
2246 	msi->msi_addr = 0;
2247 	msi->msi_data = 0;
2248 	return (0);
2249 }
2250 
2251 /*
2252  * Return the max supported MSI messages this device supports.
2253  * Basically, assuming the MD code can alloc messages, this function
2254  * should return the maximum value that pci_alloc_msi() can return.
2255  * Thus, it is subject to the tunables, etc.
2256  */
2257 int
2258 pci_msi_count_method(device_t dev, device_t child)
2259 {
2260 	struct pci_devinfo *dinfo = device_get_ivars(child);
2261 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2262 
2263 	if (pci_do_msi && msi->msi_location != 0)
2264 		return (msi->msi_msgnum);
2265 	return (0);
2266 }
2267 
2268 /* kfree pcicfgregs structure and all depending data structures */
2269 
2270 int
2271 pci_freecfg(struct pci_devinfo *dinfo)
2272 {
2273 	struct devlist *devlist_head;
2274 	int i;
2275 
2276 	devlist_head = &pci_devq;
2277 
2278 	if (dinfo->cfg.vpd.vpd_reg) {
2279 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2280 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2281 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2282 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2283 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2284 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2285 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2286 	}
2287 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2288 	kfree(dinfo, M_DEVBUF);
2289 
2290 	/* increment the generation count */
2291 	pci_generation++;
2292 
2293 	/* we're losing one device */
2294 	pci_numdevs--;
2295 	return (0);
2296 }
2297 
2298 /*
2299  * PCI power manangement
2300  */
2301 int
2302 pci_set_powerstate_method(device_t dev, device_t child, int state)
2303 {
2304 	struct pci_devinfo *dinfo = device_get_ivars(child);
2305 	pcicfgregs *cfg = &dinfo->cfg;
2306 	uint16_t status;
2307 	int result, oldstate, highest, delay;
2308 
2309 	if (cfg->pp.pp_cap == 0)
2310 		return (EOPNOTSUPP);
2311 
2312 	/*
2313 	 * Optimize a no state change request away.  While it would be OK to
2314 	 * write to the hardware in theory, some devices have shown odd
2315 	 * behavior when going from D3 -> D3.
2316 	 */
2317 	oldstate = pci_get_powerstate(child);
2318 	if (oldstate == state)
2319 		return (0);
2320 
2321 	/*
2322 	 * The PCI power management specification states that after a state
2323 	 * transition between PCI power states, system software must
2324 	 * guarantee a minimal delay before the function accesses the device.
2325 	 * Compute the worst case delay that we need to guarantee before we
2326 	 * access the device.  Many devices will be responsive much more
2327 	 * quickly than this delay, but there are some that don't respond
2328 	 * instantly to state changes.  Transitions to/from D3 state require
2329 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2330 	 * is done below with DELAY rather than a sleeper function because
2331 	 * this function can be called from contexts where we cannot sleep.
2332 	 */
2333 	highest = (oldstate > state) ? oldstate : state;
2334 	if (highest == PCI_POWERSTATE_D3)
2335 	    delay = 10000;
2336 	else if (highest == PCI_POWERSTATE_D2)
2337 	    delay = 200;
2338 	else
2339 	    delay = 0;
2340 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2341 	    & ~PCIM_PSTAT_DMASK;
2342 	result = 0;
2343 	switch (state) {
2344 	case PCI_POWERSTATE_D0:
2345 		status |= PCIM_PSTAT_D0;
2346 		break;
2347 	case PCI_POWERSTATE_D1:
2348 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2349 			return (EOPNOTSUPP);
2350 		status |= PCIM_PSTAT_D1;
2351 		break;
2352 	case PCI_POWERSTATE_D2:
2353 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2354 			return (EOPNOTSUPP);
2355 		status |= PCIM_PSTAT_D2;
2356 		break;
2357 	case PCI_POWERSTATE_D3:
2358 		status |= PCIM_PSTAT_D3;
2359 		break;
2360 	default:
2361 		return (EINVAL);
2362 	}
2363 
2364 	if (bootverbose)
2365 		kprintf(
2366 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2367 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2368 		    dinfo->cfg.func, oldstate, state);
2369 
2370 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2371 	if (delay)
2372 		DELAY(delay);
2373 	return (0);
2374 }
2375 
2376 int
2377 pci_get_powerstate_method(device_t dev, device_t child)
2378 {
2379 	struct pci_devinfo *dinfo = device_get_ivars(child);
2380 	pcicfgregs *cfg = &dinfo->cfg;
2381 	uint16_t status;
2382 	int result;
2383 
2384 	if (cfg->pp.pp_cap != 0) {
2385 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2386 		switch (status & PCIM_PSTAT_DMASK) {
2387 		case PCIM_PSTAT_D0:
2388 			result = PCI_POWERSTATE_D0;
2389 			break;
2390 		case PCIM_PSTAT_D1:
2391 			result = PCI_POWERSTATE_D1;
2392 			break;
2393 		case PCIM_PSTAT_D2:
2394 			result = PCI_POWERSTATE_D2;
2395 			break;
2396 		case PCIM_PSTAT_D3:
2397 			result = PCI_POWERSTATE_D3;
2398 			break;
2399 		default:
2400 			result = PCI_POWERSTATE_UNKNOWN;
2401 			break;
2402 		}
2403 	} else {
2404 		/* No support, device is always at D0 */
2405 		result = PCI_POWERSTATE_D0;
2406 	}
2407 	return (result);
2408 }
2409 
2410 /*
2411  * Some convenience functions for PCI device drivers.
2412  */
2413 
2414 static __inline void
2415 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2416 {
2417 	uint16_t	command;
2418 
2419 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2420 	command |= bit;
2421 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2422 }
2423 
2424 static __inline void
2425 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2426 {
2427 	uint16_t	command;
2428 
2429 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2430 	command &= ~bit;
2431 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2432 }
2433 
2434 int
2435 pci_enable_busmaster_method(device_t dev, device_t child)
2436 {
2437 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2438 	return (0);
2439 }
2440 
2441 int
2442 pci_disable_busmaster_method(device_t dev, device_t child)
2443 {
2444 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2445 	return (0);
2446 }
2447 
2448 int
2449 pci_enable_io_method(device_t dev, device_t child, int space)
2450 {
2451 	uint16_t command;
2452 	uint16_t bit;
2453 	char *error;
2454 
2455 	bit = 0;
2456 	error = NULL;
2457 
2458 	switch(space) {
2459 	case SYS_RES_IOPORT:
2460 		bit = PCIM_CMD_PORTEN;
2461 		error = "port";
2462 		break;
2463 	case SYS_RES_MEMORY:
2464 		bit = PCIM_CMD_MEMEN;
2465 		error = "memory";
2466 		break;
2467 	default:
2468 		return (EINVAL);
2469 	}
2470 	pci_set_command_bit(dev, child, bit);
2471 	/* Some devices seem to need a brief stall here, what do to? */
2472 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2473 	if (command & bit)
2474 		return (0);
2475 	device_printf(child, "failed to enable %s mapping!\n", error);
2476 	return (ENXIO);
2477 }
2478 
2479 int
2480 pci_disable_io_method(device_t dev, device_t child, int space)
2481 {
2482 	uint16_t command;
2483 	uint16_t bit;
2484 	char *error;
2485 
2486 	bit = 0;
2487 	error = NULL;
2488 
2489 	switch(space) {
2490 	case SYS_RES_IOPORT:
2491 		bit = PCIM_CMD_PORTEN;
2492 		error = "port";
2493 		break;
2494 	case SYS_RES_MEMORY:
2495 		bit = PCIM_CMD_MEMEN;
2496 		error = "memory";
2497 		break;
2498 	default:
2499 		return (EINVAL);
2500 	}
2501 	pci_clear_command_bit(dev, child, bit);
2502 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2503 	if (command & bit) {
2504 		device_printf(child, "failed to disable %s mapping!\n", error);
2505 		return (ENXIO);
2506 	}
2507 	return (0);
2508 }
2509 
2510 /*
2511  * New style pci driver.  Parent device is either a pci-host-bridge or a
2512  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2513  */
2514 
2515 void
2516 pci_print_verbose(struct pci_devinfo *dinfo)
2517 {
2518 
2519 	if (bootverbose) {
2520 		pcicfgregs *cfg = &dinfo->cfg;
2521 
2522 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2523 		    cfg->vendor, cfg->device, cfg->revid);
2524 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2525 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2526 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2527 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2528 		    cfg->mfdev);
2529 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2530 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2531 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2532 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2533 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2534 		if (cfg->intpin > 0)
2535 			kprintf("\tintpin=%c, irq=%d\n",
2536 			    cfg->intpin +'a' -1, cfg->intline);
2537 		if (cfg->pp.pp_cap) {
2538 			uint16_t status;
2539 
2540 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2541 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2542 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2543 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2544 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2545 			    status & PCIM_PSTAT_DMASK);
2546 		}
2547 		if (cfg->msi.msi_location) {
2548 			int ctrl;
2549 
2550 			ctrl = cfg->msi.msi_ctrl;
2551 			kprintf("\tMSI supports %d message%s%s%s\n",
2552 			    cfg->msi.msi_msgnum,
2553 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2554 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2555 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2556 		}
2557 		if (cfg->msix.msix_location) {
2558 			kprintf("\tMSI-X supports %d message%s ",
2559 			    cfg->msix.msix_msgnum,
2560 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2561 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2562 				kprintf("in map 0x%x\n",
2563 				    cfg->msix.msix_table_bar);
2564 			else
2565 				kprintf("in maps 0x%x and 0x%x\n",
2566 				    cfg->msix.msix_table_bar,
2567 				    cfg->msix.msix_pba_bar);
2568 		}
2569 		pci_print_verbose_expr(cfg);
2570 	}
2571 }
2572 
2573 static void
2574 pci_print_verbose_expr(const pcicfgregs *cfg)
2575 {
2576 	const struct pcicfg_expr *expr = &cfg->expr;
2577 	const char *port_name;
2578 	uint16_t port_type;
2579 
2580 	if (!bootverbose)
2581 		return;
2582 
2583 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2584 		return;
2585 
2586 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2587 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2588 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2589 		goto back;
2590 
2591 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2592 
2593 	switch (port_type) {
2594 	case PCIE_END_POINT:
2595 		port_name = "DEVICE";
2596 		break;
2597 	case PCIE_LEG_END_POINT:
2598 		port_name = "LEGDEV";
2599 		break;
2600 	case PCIE_ROOT_PORT:
2601 		port_name = "ROOT";
2602 		break;
2603 	case PCIE_UP_STREAM_PORT:
2604 		port_name = "UPSTREAM";
2605 		break;
2606 	case PCIE_DOWN_STREAM_PORT:
2607 		port_name = "DOWNSTRM";
2608 		break;
2609 	case PCIE_PCIE2PCI_BRIDGE:
2610 		port_name = "PCIE2PCI";
2611 		break;
2612 	case PCIE_PCI2PCIE_BRIDGE:
2613 		port_name = "PCI2PCIE";
2614 		break;
2615 	default:
2616 		port_name = NULL;
2617 		break;
2618 	}
2619 	if ((port_type == PCIE_ROOT_PORT ||
2620 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2621 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2622 		port_name = NULL;
2623 	if (port_name != NULL)
2624 		kprintf("[%s]", port_name);
2625 
2626 	if (pcie_slotimpl(cfg)) {
2627 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2628 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2629 			kprintf("[HOTPLUG]");
2630 	}
2631 back:
2632 	kprintf("\n");
2633 }
2634 
2635 static int
2636 pci_porten(device_t pcib, int b, int s, int f)
2637 {
2638 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2639 		& PCIM_CMD_PORTEN) != 0;
2640 }
2641 
2642 static int
2643 pci_memen(device_t pcib, int b, int s, int f)
2644 {
2645 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2646 		& PCIM_CMD_MEMEN) != 0;
2647 }
2648 
2649 /*
2650  * Add a resource based on a pci map register. Return 1 if the map
2651  * register is a 32bit map register or 2 if it is a 64bit register.
2652  */
2653 static int
2654 pci_add_map(device_t pcib, device_t bus, device_t dev,
2655     int b, int s, int f, int reg, struct resource_list *rl, int force,
2656     int prefetch)
2657 {
2658 	uint32_t map;
2659 	pci_addr_t base;
2660 	pci_addr_t start, end, count;
2661 	uint8_t ln2size;
2662 	uint8_t ln2range;
2663 	uint32_t testval;
2664 	uint16_t cmd;
2665 	int type;
2666 	int barlen;
2667 	struct resource *res;
2668 
2669 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2670 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2671 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2672 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2673 
2674 	if (PCI_BAR_MEM(map)) {
2675 		type = SYS_RES_MEMORY;
2676 		if (map & PCIM_BAR_MEM_PREFETCH)
2677 			prefetch = 1;
2678 	} else
2679 		type = SYS_RES_IOPORT;
2680 	ln2size = pci_mapsize(testval);
2681 	ln2range = pci_maprange(testval);
2682 	base = pci_mapbase(map);
2683 	barlen = ln2range == 64 ? 2 : 1;
2684 
2685 	/*
2686 	 * For I/O registers, if bottom bit is set, and the next bit up
2687 	 * isn't clear, we know we have a BAR that doesn't conform to the
2688 	 * spec, so ignore it.  Also, sanity check the size of the data
2689 	 * areas to the type of memory involved.  Memory must be at least
2690 	 * 16 bytes in size, while I/O ranges must be at least 4.
2691 	 */
2692 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2693 		return (barlen);
2694 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2695 	    (type == SYS_RES_IOPORT && ln2size < 2))
2696 		return (barlen);
2697 
2698 	if (ln2range == 64)
2699 		/* Read the other half of a 64bit map register */
2700 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2701 	if (bootverbose) {
2702 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2703 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2704 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2705 			kprintf(", port disabled\n");
2706 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2707 			kprintf(", memory disabled\n");
2708 		else
2709 			kprintf(", enabled\n");
2710 	}
2711 
2712 	/*
2713 	 * If base is 0, then we have problems.  It is best to ignore
2714 	 * such entries for the moment.  These will be allocated later if
2715 	 * the driver specifically requests them.  However, some
2716 	 * removable busses look better when all resources are allocated,
2717 	 * so allow '0' to be overriden.
2718 	 *
2719 	 * Similarly treat maps whose values is the same as the test value
2720 	 * read back.  These maps have had all f's written to them by the
2721 	 * BIOS in an attempt to disable the resources.
2722 	 */
2723 	if (!force && (base == 0 || map == testval))
2724 		return (barlen);
2725 	if ((u_long)base != base) {
2726 		device_printf(bus,
2727 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2728 		    pci_get_domain(dev), b, s, f, reg);
2729 		return (barlen);
2730 	}
2731 
2732 	/*
2733 	 * This code theoretically does the right thing, but has
2734 	 * undesirable side effects in some cases where peripherals
2735 	 * respond oddly to having these bits enabled.  Let the user
2736 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2737 	 * default).
2738 	 */
2739 	if (pci_enable_io_modes) {
2740 		/* Turn on resources that have been left off by a lazy BIOS */
2741 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2742 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2743 			cmd |= PCIM_CMD_PORTEN;
2744 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2745 		}
2746 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2747 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2748 			cmd |= PCIM_CMD_MEMEN;
2749 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2750 		}
2751 	} else {
2752 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2753 			return (barlen);
2754 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2755 			return (barlen);
2756 	}
2757 
2758 	count = 1 << ln2size;
2759 	if (base == 0 || base == pci_mapbase(testval)) {
2760 		start = 0;	/* Let the parent decide. */
2761 		end = ~0ULL;
2762 	} else {
2763 		start = base;
2764 		end = base + (1 << ln2size) - 1;
2765 	}
2766 	resource_list_add(rl, type, reg, start, end, count, -1);
2767 
2768 	/*
2769 	 * Try to allocate the resource for this BAR from our parent
2770 	 * so that this resource range is already reserved.  The
2771 	 * driver for this device will later inherit this resource in
2772 	 * pci_alloc_resource().
2773 	 */
2774 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2775 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2776 	if (res == NULL) {
2777 		/*
2778 		 * If the allocation fails, delete the resource list
2779 		 * entry to force pci_alloc_resource() to allocate
2780 		 * resources from the parent.
2781 		 */
2782 		resource_list_delete(rl, type, reg);
2783 #ifdef PCI_BAR_CLEAR
2784 		/* Clear the BAR */
2785 		start = 0;
2786 #else	/* !PCI_BAR_CLEAR */
2787 		/*
2788 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2789 		 * PCI function, clearing the BAR causes HPET timer
2790 		 * stop ticking.
2791 		 */
2792 		if (bootverbose) {
2793 			kprintf("pci:%d:%d:%d: resource reservation failed "
2794 				"%#jx - %#jx\n", b, s, f,
2795 				(intmax_t)start, (intmax_t)end);
2796 		}
2797 		return (barlen);
2798 #endif	/* PCI_BAR_CLEAR */
2799 	} else {
2800 		start = rman_get_start(res);
2801 	}
2802 	pci_write_config(dev, reg, start, 4);
2803 	if (ln2range == 64)
2804 		pci_write_config(dev, reg + 4, start >> 32, 4);
2805 	return (barlen);
2806 }
2807 
2808 /*
2809  * For ATA devices we need to decide early what addressing mode to use.
2810  * Legacy demands that the primary and secondary ATA ports sits on the
2811  * same addresses that old ISA hardware did. This dictates that we use
2812  * those addresses and ignore the BAR's if we cannot set PCI native
2813  * addressing mode.
2814  */
2815 static void
2816 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2817     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2818 {
2819 	int rid, type, progif;
2820 #if 0
2821 	/* if this device supports PCI native addressing use it */
2822 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2823 	if ((progif & 0x8a) == 0x8a) {
2824 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2825 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2826 			kprintf("Trying ATA native PCI addressing mode\n");
2827 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2828 		}
2829 	}
2830 #endif
2831 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2832 	type = SYS_RES_IOPORT;
2833 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2834 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2835 		    prefetchmask & (1 << 0));
2836 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2837 		    prefetchmask & (1 << 1));
2838 	} else {
2839 		rid = PCIR_BAR(0);
2840 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2841 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2842 		    0, -1);
2843 		rid = PCIR_BAR(1);
2844 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2845 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2846 		    0, -1);
2847 	}
2848 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2849 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2850 		    prefetchmask & (1 << 2));
2851 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2852 		    prefetchmask & (1 << 3));
2853 	} else {
2854 		rid = PCIR_BAR(2);
2855 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2856 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2857 		    0, -1);
2858 		rid = PCIR_BAR(3);
2859 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2860 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2861 		    0, -1);
2862 	}
2863 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2864 	    prefetchmask & (1 << 4));
2865 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2866 	    prefetchmask & (1 << 5));
2867 }
2868 
2869 static void
2870 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2871 {
2872 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2873 	pcicfgregs *cfg = &dinfo->cfg;
2874 	char tunable_name[64];
2875 	int irq;
2876 
2877 	/* Has to have an intpin to have an interrupt. */
2878 	if (cfg->intpin == 0)
2879 		return;
2880 
2881 	/* Let the user override the IRQ with a tunable. */
2882 	irq = PCI_INVALID_IRQ;
2883 	ksnprintf(tunable_name, sizeof(tunable_name),
2884 	    "hw.pci%d.%d.%d.INT%c.irq",
2885 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2886 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2887 		irq = PCI_INVALID_IRQ;
2888 
2889 	/*
2890 	 * If we didn't get an IRQ via the tunable, then we either use the
2891 	 * IRQ value in the intline register or we ask the bus to route an
2892 	 * interrupt for us.  If force_route is true, then we only use the
2893 	 * value in the intline register if the bus was unable to assign an
2894 	 * IRQ.
2895 	 */
2896 	if (!PCI_INTERRUPT_VALID(irq)) {
2897 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2898 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2899 		if (!PCI_INTERRUPT_VALID(irq))
2900 			irq = cfg->intline;
2901 	}
2902 
2903 	/* If after all that we don't have an IRQ, just bail. */
2904 	if (!PCI_INTERRUPT_VALID(irq))
2905 		return;
2906 
2907 	/* Update the config register if it changed. */
2908 	if (irq != cfg->intline) {
2909 		cfg->intline = irq;
2910 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2911 	}
2912 
2913 	/* Add this IRQ as rid 0 interrupt resource. */
2914 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2915 	    machintr_intr_cpuid(irq));
2916 }
2917 
2918 void
2919 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2920 {
2921 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2922 	pcicfgregs *cfg = &dinfo->cfg;
2923 	struct resource_list *rl = &dinfo->resources;
2924 	struct pci_quirk *q;
2925 	int b, i, f, s;
2926 
2927 	b = cfg->bus;
2928 	s = cfg->slot;
2929 	f = cfg->func;
2930 
2931 	/* ATA devices needs special map treatment */
2932 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2933 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2934 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2935 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2936 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2937 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2938 	else
2939 		for (i = 0; i < cfg->nummaps;)
2940 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2941 			    rl, force, prefetchmask & (1 << i));
2942 
2943 	/*
2944 	 * Add additional, quirked resources.
2945 	 */
2946 	for (q = &pci_quirks[0]; q->devid; q++) {
2947 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2948 		    && q->type == PCI_QUIRK_MAP_REG)
2949 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2950 			  force, 0);
2951 	}
2952 
2953 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2954 		/*
2955 		 * Try to re-route interrupts. Sometimes the BIOS or
2956 		 * firmware may leave bogus values in these registers.
2957 		 * If the re-route fails, then just stick with what we
2958 		 * have.
2959 		 */
2960 		pci_assign_interrupt(bus, dev, 1);
2961 	}
2962 }
2963 
2964 void
2965 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2966 {
2967 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2968 	device_t pcib = device_get_parent(dev);
2969 	struct pci_devinfo *dinfo;
2970 	int maxslots;
2971 	int s, f, pcifunchigh;
2972 	uint8_t hdrtype;
2973 
2974 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2975 	    ("dinfo_size too small"));
2976 	maxslots = PCIB_MAXSLOTS(pcib);
2977 	for (s = 0; s <= maxslots; s++) {
2978 		pcifunchigh = 0;
2979 		f = 0;
2980 		DELAY(1);
2981 		hdrtype = REG(PCIR_HDRTYPE, 1);
2982 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2983 			continue;
2984 		if (hdrtype & PCIM_MFDEV)
2985 			pcifunchigh = PCI_FUNCMAX;
2986 		for (f = 0; f <= pcifunchigh; f++) {
2987 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2988 			    dinfo_size);
2989 			if (dinfo != NULL) {
2990 				pci_add_child(dev, dinfo);
2991 			}
2992 		}
2993 	}
2994 #undef REG
2995 }
2996 
2997 void
2998 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2999 {
3000 	device_t pcib;
3001 
3002 	pcib = device_get_parent(bus);
3003 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3004 	device_set_ivars(dinfo->cfg.dev, dinfo);
3005 	resource_list_init(&dinfo->resources);
3006 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3007 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3008 	pci_print_verbose(dinfo);
3009 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3010 }
3011 
3012 static int
3013 pci_probe(device_t dev)
3014 {
3015 	device_set_desc(dev, "PCI bus");
3016 
3017 	/* Allow other subclasses to override this driver. */
3018 	return (-1000);
3019 }
3020 
3021 static int
3022 pci_attach(device_t dev)
3023 {
3024 	int busno, domain;
3025 
3026 	/*
3027 	 * Since there can be multiple independantly numbered PCI
3028 	 * busses on systems with multiple PCI domains, we can't use
3029 	 * the unit number to decide which bus we are probing. We ask
3030 	 * the parent pcib what our domain and bus numbers are.
3031 	 */
3032 	domain = pcib_get_domain(dev);
3033 	busno = pcib_get_bus(dev);
3034 	if (bootverbose)
3035 		device_printf(dev, "domain=%d, physical bus=%d\n",
3036 		    domain, busno);
3037 
3038 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3039 
3040 	return (bus_generic_attach(dev));
3041 }
3042 
3043 int
3044 pci_suspend(device_t dev)
3045 {
3046 	int dstate, error, i, numdevs;
3047 	device_t acpi_dev, child, *devlist;
3048 	struct pci_devinfo *dinfo;
3049 
3050 	/*
3051 	 * Save the PCI configuration space for each child and set the
3052 	 * device in the appropriate power state for this sleep state.
3053 	 */
3054 	acpi_dev = NULL;
3055 	if (pci_do_power_resume)
3056 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3057 	device_get_children(dev, &devlist, &numdevs);
3058 	for (i = 0; i < numdevs; i++) {
3059 		child = devlist[i];
3060 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3061 		pci_cfg_save(child, dinfo, 0);
3062 	}
3063 
3064 	/* Suspend devices before potentially powering them down. */
3065 	error = bus_generic_suspend(dev);
3066 	if (error) {
3067 		kfree(devlist, M_TEMP);
3068 		return (error);
3069 	}
3070 
3071 	/*
3072 	 * Always set the device to D3.  If ACPI suggests a different
3073 	 * power state, use it instead.  If ACPI is not present, the
3074 	 * firmware is responsible for managing device power.  Skip
3075 	 * children who aren't attached since they are powered down
3076 	 * separately.  Only manage type 0 devices for now.
3077 	 */
3078 	for (i = 0; acpi_dev && i < numdevs; i++) {
3079 		child = devlist[i];
3080 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3081 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3082 			dstate = PCI_POWERSTATE_D3;
3083 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3084 			pci_set_powerstate(child, dstate);
3085 		}
3086 	}
3087 	kfree(devlist, M_TEMP);
3088 	return (0);
3089 }
3090 
3091 int
3092 pci_resume(device_t dev)
3093 {
3094 	int i, numdevs;
3095 	device_t acpi_dev, child, *devlist;
3096 	struct pci_devinfo *dinfo;
3097 
3098 	/*
3099 	 * Set each child to D0 and restore its PCI configuration space.
3100 	 */
3101 	acpi_dev = NULL;
3102 	if (pci_do_power_resume)
3103 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3104 	device_get_children(dev, &devlist, &numdevs);
3105 	for (i = 0; i < numdevs; i++) {
3106 		/*
3107 		 * Notify ACPI we're going to D0 but ignore the result.  If
3108 		 * ACPI is not present, the firmware is responsible for
3109 		 * managing device power.  Only manage type 0 devices for now.
3110 		 */
3111 		child = devlist[i];
3112 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3113 		if (acpi_dev && device_is_attached(child) &&
3114 		    dinfo->cfg.hdrtype == 0) {
3115 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3116 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3117 		}
3118 
3119 		/* Now the device is powered up, restore its config space. */
3120 		pci_cfg_restore(child, dinfo);
3121 	}
3122 	kfree(devlist, M_TEMP);
3123 	return (bus_generic_resume(dev));
3124 }
3125 
3126 static void
3127 pci_load_vendor_data(void)
3128 {
3129 	caddr_t vendordata, info;
3130 
3131 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3132 		info = preload_search_info(vendordata, MODINFO_ADDR);
3133 		pci_vendordata = *(char **)info;
3134 		info = preload_search_info(vendordata, MODINFO_SIZE);
3135 		pci_vendordata_size = *(size_t *)info;
3136 		/* terminate the database */
3137 		pci_vendordata[pci_vendordata_size] = '\n';
3138 	}
3139 }
3140 
3141 void
3142 pci_driver_added(device_t dev, driver_t *driver)
3143 {
3144 	int numdevs;
3145 	device_t *devlist;
3146 	device_t child;
3147 	struct pci_devinfo *dinfo;
3148 	int i;
3149 
3150 	if (bootverbose)
3151 		device_printf(dev, "driver added\n");
3152 	DEVICE_IDENTIFY(driver, dev);
3153 	device_get_children(dev, &devlist, &numdevs);
3154 	for (i = 0; i < numdevs; i++) {
3155 		child = devlist[i];
3156 		if (device_get_state(child) != DS_NOTPRESENT)
3157 			continue;
3158 		dinfo = device_get_ivars(child);
3159 		pci_print_verbose(dinfo);
3160 		if (bootverbose)
3161 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3162 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3163 			    dinfo->cfg.func);
3164 		pci_cfg_restore(child, dinfo);
3165 		if (device_probe_and_attach(child) != 0)
3166 			pci_cfg_save(child, dinfo, 1);
3167 	}
3168 	kfree(devlist, M_TEMP);
3169 }
3170 
3171 static void
3172 pci_child_detached(device_t parent __unused, device_t child)
3173 {
3174 	/* Turn child's power off */
3175 	pci_cfg_save(child, device_get_ivars(child), 1);
3176 }
3177 
3178 int
3179 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3180     driver_intr_t *intr, void *arg, void **cookiep, lwkt_serialize_t serializer)
3181 {
3182 	struct pci_devinfo *dinfo;
3183 	struct msix_table_entry *mte;
3184 	struct msix_vector *mv;
3185 	uint64_t addr;
3186 	uint32_t data;
3187 	int rid, error;
3188 	void *cookie;
3189 
3190 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3191 	    arg, &cookie, serializer);
3192 	if (error)
3193 		return (error);
3194 
3195 	/* If this is not a direct child, just bail out. */
3196 	if (device_get_parent(child) != dev) {
3197 		*cookiep = cookie;
3198 		return(0);
3199 	}
3200 
3201 	rid = rman_get_rid(irq);
3202 	if (rid == 0) {
3203 		/* Make sure that INTx is enabled */
3204 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3205 	} else {
3206 		/*
3207 		 * Check to see if the interrupt is MSI or MSI-X.
3208 		 * Ask our parent to map the MSI and give
3209 		 * us the address and data register values.
3210 		 * If we fail for some reason, teardown the
3211 		 * interrupt handler.
3212 		 */
3213 		dinfo = device_get_ivars(child);
3214 		if (dinfo->cfg.msi.msi_alloc > 0) {
3215 			if (dinfo->cfg.msi.msi_addr == 0) {
3216 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3217 			    ("MSI has handlers, but vectors not mapped"));
3218 				error = PCIB_MAP_MSI(device_get_parent(dev),
3219 				    child, rman_get_start(irq), &addr, &data,
3220 				    rman_get_cpuid(irq));
3221 				if (error)
3222 					goto bad;
3223 				dinfo->cfg.msi.msi_addr = addr;
3224 				dinfo->cfg.msi.msi_data = data;
3225 				pci_enable_msi(child, addr, data);
3226 			}
3227 			dinfo->cfg.msi.msi_handlers++;
3228 		} else {
3229 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3230 			    ("No MSI or MSI-X interrupts allocated"));
3231 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3232 			    ("MSI-X index too high"));
3233 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3234 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3235 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3236 			KASSERT(mv->mv_irq == rman_get_start(irq),
3237 			    ("IRQ mismatch"));
3238 			if (mv->mv_address == 0) {
3239 				KASSERT(mte->mte_handlers == 0,
3240 		    ("MSI-X table entry has handlers, but vector not mapped"));
3241 				error = PCIB_MAP_MSI(device_get_parent(dev),
3242 				    child, rman_get_start(irq), &addr, &data,
3243 				    rman_get_cpuid(irq));
3244 				if (error)
3245 					goto bad;
3246 				mv->mv_address = addr;
3247 				mv->mv_data = data;
3248 			}
3249 			if (mte->mte_handlers == 0) {
3250 				pci_enable_msix(child, rid - 1, mv->mv_address,
3251 				    mv->mv_data);
3252 				pci_unmask_msix(child, rid - 1);
3253 			}
3254 			mte->mte_handlers++;
3255 		}
3256 
3257 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3258 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3259 	bad:
3260 		if (error) {
3261 			(void)bus_generic_teardown_intr(dev, child, irq,
3262 			    cookie);
3263 			return (error);
3264 		}
3265 	}
3266 	*cookiep = cookie;
3267 	return (0);
3268 }
3269 
3270 int
3271 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3272     void *cookie)
3273 {
3274 	struct msix_table_entry *mte;
3275 	struct resource_list_entry *rle;
3276 	struct pci_devinfo *dinfo;
3277 	int rid, error;
3278 
3279 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3280 		return (EINVAL);
3281 
3282 	/* If this isn't a direct child, just bail out */
3283 	if (device_get_parent(child) != dev)
3284 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3285 
3286 	rid = rman_get_rid(irq);
3287 	if (rid == 0) {
3288 		/* Mask INTx */
3289 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3290 	} else {
3291 		/*
3292 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3293 		 * decrement the appropriate handlers count and mask the
3294 		 * MSI-X message, or disable MSI messages if the count
3295 		 * drops to 0.
3296 		 */
3297 		dinfo = device_get_ivars(child);
3298 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3299 		if (rle->res != irq)
3300 			return (EINVAL);
3301 		if (dinfo->cfg.msi.msi_alloc > 0) {
3302 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3303 			    ("MSI-X index too high"));
3304 			if (dinfo->cfg.msi.msi_handlers == 0)
3305 				return (EINVAL);
3306 			dinfo->cfg.msi.msi_handlers--;
3307 			if (dinfo->cfg.msi.msi_handlers == 0)
3308 				pci_disable_msi(child);
3309 		} else {
3310 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3311 			    ("No MSI or MSI-X interrupts allocated"));
3312 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3313 			    ("MSI-X index too high"));
3314 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3315 			if (mte->mte_handlers == 0)
3316 				return (EINVAL);
3317 			mte->mte_handlers--;
3318 			if (mte->mte_handlers == 0)
3319 				pci_mask_msix(child, rid - 1);
3320 		}
3321 	}
3322 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3323 	if (rid > 0)
3324 		KASSERT(error == 0,
3325 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3326 	return (error);
3327 }
3328 
3329 int
3330 pci_print_child(device_t dev, device_t child)
3331 {
3332 	struct pci_devinfo *dinfo;
3333 	struct resource_list *rl;
3334 	int retval = 0;
3335 
3336 	dinfo = device_get_ivars(child);
3337 	rl = &dinfo->resources;
3338 
3339 	retval += bus_print_child_header(dev, child);
3340 
3341 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3342 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3343 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3344 	if (device_get_flags(dev))
3345 		retval += kprintf(" flags %#x", device_get_flags(dev));
3346 
3347 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3348 	    pci_get_function(child));
3349 
3350 	retval += bus_print_child_footer(dev, child);
3351 
3352 	return (retval);
3353 }
3354 
3355 static struct
3356 {
3357 	int	class;
3358 	int	subclass;
3359 	char	*desc;
3360 } pci_nomatch_tab[] = {
3361 	{PCIC_OLD,		-1,			"old"},
3362 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3363 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3364 	{PCIC_STORAGE,		-1,			"mass storage"},
3365 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3366 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3367 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3368 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3369 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3370 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3371 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3372 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3373 	{PCIC_NETWORK,		-1,			"network"},
3374 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3375 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3376 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3377 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3378 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3379 	{PCIC_DISPLAY,		-1,			"display"},
3380 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3381 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3382 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3383 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3384 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3385 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3386 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3387 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3388 	{PCIC_MEMORY,		-1,			"memory"},
3389 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3390 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3391 	{PCIC_BRIDGE,		-1,			"bridge"},
3392 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3393 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3394 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3395 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3396 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3397 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3398 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3399 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3400 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3401 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3402 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3403 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3404 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3405 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3406 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3407 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3408 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3409 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3410 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3411 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3412 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3413 	{PCIC_INPUTDEV,		-1,			"input device"},
3414 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3415 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3416 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3417 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3418 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3419 	{PCIC_DOCKING,		-1,			"docking station"},
3420 	{PCIC_PROCESSOR,	-1,			"processor"},
3421 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3422 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3423 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3424 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3425 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3426 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3427 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3428 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3429 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3430 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3431 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3432 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3433 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3434 	{PCIC_SATCOM,		-1,			"satellite communication"},
3435 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3436 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3437 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3438 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3439 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3440 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3441 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3442 	{PCIC_DASP,		-1,			"dasp"},
3443 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3444 	{0, 0,		NULL}
3445 };
3446 
3447 void
3448 pci_probe_nomatch(device_t dev, device_t child)
3449 {
3450 	int	i;
3451 	char	*cp, *scp, *device;
3452 
3453 	/*
3454 	 * Look for a listing for this device in a loaded device database.
3455 	 */
3456 	if ((device = pci_describe_device(child)) != NULL) {
3457 		device_printf(dev, "<%s>", device);
3458 		kfree(device, M_DEVBUF);
3459 	} else {
3460 		/*
3461 		 * Scan the class/subclass descriptions for a general
3462 		 * description.
3463 		 */
3464 		cp = "unknown";
3465 		scp = NULL;
3466 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3467 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3468 				if (pci_nomatch_tab[i].subclass == -1) {
3469 					cp = pci_nomatch_tab[i].desc;
3470 				} else if (pci_nomatch_tab[i].subclass ==
3471 				    pci_get_subclass(child)) {
3472 					scp = pci_nomatch_tab[i].desc;
3473 				}
3474 			}
3475 		}
3476 		device_printf(dev, "<%s%s%s>",
3477 		    cp ? cp : "",
3478 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3479 		    scp ? scp : "");
3480 	}
3481 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3482 		pci_get_vendor(child), pci_get_device(child),
3483 		pci_get_slot(child), pci_get_function(child));
3484 	if (pci_get_intpin(child) > 0) {
3485 		int irq;
3486 
3487 		irq = pci_get_irq(child);
3488 		if (PCI_INTERRUPT_VALID(irq))
3489 			kprintf(" irq %d", irq);
3490 	}
3491 	kprintf("\n");
3492 
3493 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3494 }
3495 
3496 /*
3497  * Parse the PCI device database, if loaded, and return a pointer to a
3498  * description of the device.
3499  *
3500  * The database is flat text formatted as follows:
3501  *
3502  * Any line not in a valid format is ignored.
3503  * Lines are terminated with newline '\n' characters.
3504  *
3505  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3506  * the vendor name.
3507  *
3508  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3509  * - devices cannot be listed without a corresponding VENDOR line.
3510  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3511  * another TAB, then the device name.
3512  */
3513 
3514 /*
3515  * Assuming (ptr) points to the beginning of a line in the database,
3516  * return the vendor or device and description of the next entry.
3517  * The value of (vendor) or (device) inappropriate for the entry type
3518  * is set to -1.  Returns nonzero at the end of the database.
3519  *
3520  * Note that this is slightly unrobust in the face of corrupt data;
3521  * we attempt to safeguard against this by spamming the end of the
3522  * database with a newline when we initialise.
3523  */
3524 static int
3525 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3526 {
3527 	char	*cp = *ptr;
3528 	int	left;
3529 
3530 	*device = -1;
3531 	*vendor = -1;
3532 	**desc = '\0';
3533 	for (;;) {
3534 		left = pci_vendordata_size - (cp - pci_vendordata);
3535 		if (left <= 0) {
3536 			*ptr = cp;
3537 			return(1);
3538 		}
3539 
3540 		/* vendor entry? */
3541 		if (*cp != '\t' &&
3542 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3543 			break;
3544 		/* device entry? */
3545 		if (*cp == '\t' &&
3546 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3547 			break;
3548 
3549 		/* skip to next line */
3550 		while (*cp != '\n' && left > 0) {
3551 			cp++;
3552 			left--;
3553 		}
3554 		if (*cp == '\n') {
3555 			cp++;
3556 			left--;
3557 		}
3558 	}
3559 	/* skip to next line */
3560 	while (*cp != '\n' && left > 0) {
3561 		cp++;
3562 		left--;
3563 	}
3564 	if (*cp == '\n' && left > 0)
3565 		cp++;
3566 	*ptr = cp;
3567 	return(0);
3568 }
3569 
3570 static char *
3571 pci_describe_device(device_t dev)
3572 {
3573 	int	vendor, device;
3574 	char	*desc, *vp, *dp, *line;
3575 
3576 	desc = vp = dp = NULL;
3577 
3578 	/*
3579 	 * If we have no vendor data, we can't do anything.
3580 	 */
3581 	if (pci_vendordata == NULL)
3582 		goto out;
3583 
3584 	/*
3585 	 * Scan the vendor data looking for this device
3586 	 */
3587 	line = pci_vendordata;
3588 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3589 		goto out;
3590 	for (;;) {
3591 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3592 			goto out;
3593 		if (vendor == pci_get_vendor(dev))
3594 			break;
3595 	}
3596 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3597 		goto out;
3598 	for (;;) {
3599 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3600 			*dp = 0;
3601 			break;
3602 		}
3603 		if (vendor != -1) {
3604 			*dp = 0;
3605 			break;
3606 		}
3607 		if (device == pci_get_device(dev))
3608 			break;
3609 	}
3610 	if (dp[0] == '\0')
3611 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3612 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3613 	    NULL)
3614 		ksprintf(desc, "%s, %s", vp, dp);
3615  out:
3616 	if (vp != NULL)
3617 		kfree(vp, M_DEVBUF);
3618 	if (dp != NULL)
3619 		kfree(dp, M_DEVBUF);
3620 	return(desc);
3621 }
3622 
3623 int
3624 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3625 {
3626 	struct pci_devinfo *dinfo;
3627 	pcicfgregs *cfg;
3628 
3629 	dinfo = device_get_ivars(child);
3630 	cfg = &dinfo->cfg;
3631 
3632 	switch (which) {
3633 	case PCI_IVAR_ETHADDR:
3634 		/*
3635 		 * The generic accessor doesn't deal with failure, so
3636 		 * we set the return value, then return an error.
3637 		 */
3638 		*((uint8_t **) result) = NULL;
3639 		return (EINVAL);
3640 	case PCI_IVAR_SUBVENDOR:
3641 		*result = cfg->subvendor;
3642 		break;
3643 	case PCI_IVAR_SUBDEVICE:
3644 		*result = cfg->subdevice;
3645 		break;
3646 	case PCI_IVAR_VENDOR:
3647 		*result = cfg->vendor;
3648 		break;
3649 	case PCI_IVAR_DEVICE:
3650 		*result = cfg->device;
3651 		break;
3652 	case PCI_IVAR_DEVID:
3653 		*result = (cfg->device << 16) | cfg->vendor;
3654 		break;
3655 	case PCI_IVAR_CLASS:
3656 		*result = cfg->baseclass;
3657 		break;
3658 	case PCI_IVAR_SUBCLASS:
3659 		*result = cfg->subclass;
3660 		break;
3661 	case PCI_IVAR_PROGIF:
3662 		*result = cfg->progif;
3663 		break;
3664 	case PCI_IVAR_REVID:
3665 		*result = cfg->revid;
3666 		break;
3667 	case PCI_IVAR_INTPIN:
3668 		*result = cfg->intpin;
3669 		break;
3670 	case PCI_IVAR_IRQ:
3671 		*result = cfg->intline;
3672 		break;
3673 	case PCI_IVAR_DOMAIN:
3674 		*result = cfg->domain;
3675 		break;
3676 	case PCI_IVAR_BUS:
3677 		*result = cfg->bus;
3678 		break;
3679 	case PCI_IVAR_SLOT:
3680 		*result = cfg->slot;
3681 		break;
3682 	case PCI_IVAR_FUNCTION:
3683 		*result = cfg->func;
3684 		break;
3685 	case PCI_IVAR_CMDREG:
3686 		*result = cfg->cmdreg;
3687 		break;
3688 	case PCI_IVAR_CACHELNSZ:
3689 		*result = cfg->cachelnsz;
3690 		break;
3691 	case PCI_IVAR_MINGNT:
3692 		*result = cfg->mingnt;
3693 		break;
3694 	case PCI_IVAR_MAXLAT:
3695 		*result = cfg->maxlat;
3696 		break;
3697 	case PCI_IVAR_LATTIMER:
3698 		*result = cfg->lattimer;
3699 		break;
3700 	case PCI_IVAR_PCIXCAP_PTR:
3701 		*result = cfg->pcix.pcix_ptr;
3702 		break;
3703 	case PCI_IVAR_PCIECAP_PTR:
3704 		*result = cfg->expr.expr_ptr;
3705 		break;
3706 	case PCI_IVAR_VPDCAP_PTR:
3707 		*result = cfg->vpd.vpd_reg;
3708 		break;
3709 	default:
3710 		return (ENOENT);
3711 	}
3712 	return (0);
3713 }
3714 
3715 int
3716 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3717 {
3718 	struct pci_devinfo *dinfo;
3719 
3720 	dinfo = device_get_ivars(child);
3721 
3722 	switch (which) {
3723 	case PCI_IVAR_INTPIN:
3724 		dinfo->cfg.intpin = value;
3725 		return (0);
3726 	case PCI_IVAR_ETHADDR:
3727 	case PCI_IVAR_SUBVENDOR:
3728 	case PCI_IVAR_SUBDEVICE:
3729 	case PCI_IVAR_VENDOR:
3730 	case PCI_IVAR_DEVICE:
3731 	case PCI_IVAR_DEVID:
3732 	case PCI_IVAR_CLASS:
3733 	case PCI_IVAR_SUBCLASS:
3734 	case PCI_IVAR_PROGIF:
3735 	case PCI_IVAR_REVID:
3736 	case PCI_IVAR_IRQ:
3737 	case PCI_IVAR_DOMAIN:
3738 	case PCI_IVAR_BUS:
3739 	case PCI_IVAR_SLOT:
3740 	case PCI_IVAR_FUNCTION:
3741 		return (EINVAL);	/* disallow for now */
3742 
3743 	default:
3744 		return (ENOENT);
3745 	}
3746 }
3747 #ifdef notyet
3748 #include "opt_ddb.h"
3749 #ifdef DDB
3750 #include <ddb/ddb.h>
3751 #include <sys/cons.h>
3752 
3753 /*
3754  * List resources based on pci map registers, used for within ddb
3755  */
3756 
3757 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3758 {
3759 	struct pci_devinfo *dinfo;
3760 	struct devlist *devlist_head;
3761 	struct pci_conf *p;
3762 	const char *name;
3763 	int i, error, none_count;
3764 
3765 	none_count = 0;
3766 	/* get the head of the device queue */
3767 	devlist_head = &pci_devq;
3768 
3769 	/*
3770 	 * Go through the list of devices and print out devices
3771 	 */
3772 	for (error = 0, i = 0,
3773 	     dinfo = STAILQ_FIRST(devlist_head);
3774 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3775 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3776 
3777 		/* Populate pd_name and pd_unit */
3778 		name = NULL;
3779 		if (dinfo->cfg.dev)
3780 			name = device_get_name(dinfo->cfg.dev);
3781 
3782 		p = &dinfo->conf;
3783 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3784 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3785 			(name && *name) ? name : "none",
3786 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3787 			none_count++,
3788 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3789 			p->pc_sel.pc_func, (p->pc_class << 16) |
3790 			(p->pc_subclass << 8) | p->pc_progif,
3791 			(p->pc_subdevice << 16) | p->pc_subvendor,
3792 			(p->pc_device << 16) | p->pc_vendor,
3793 			p->pc_revid, p->pc_hdr);
3794 	}
3795 }
3796 #endif /* DDB */
3797 #endif
3798 
3799 static struct resource *
3800 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3801     u_long start, u_long end, u_long count, u_int flags)
3802 {
3803 	struct pci_devinfo *dinfo = device_get_ivars(child);
3804 	struct resource_list *rl = &dinfo->resources;
3805 	struct resource_list_entry *rle;
3806 	struct resource *res;
3807 	pci_addr_t map, testval;
3808 	int mapsize;
3809 
3810 	/*
3811 	 * Weed out the bogons, and figure out how large the BAR/map
3812 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3813 	 * Note: atapci in legacy mode are special and handled elsewhere
3814 	 * in the code.  If you have a atapci device in legacy mode and
3815 	 * it fails here, that other code is broken.
3816 	 */
3817 	res = NULL;
3818 	map = pci_read_config(child, *rid, 4);
3819 	pci_write_config(child, *rid, 0xffffffff, 4);
3820 	testval = pci_read_config(child, *rid, 4);
3821 	if (pci_maprange(testval) == 64)
3822 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3823 	if (pci_mapbase(testval) == 0)
3824 		goto out;
3825 
3826 	/*
3827 	 * Restore the original value of the BAR.  We may have reprogrammed
3828 	 * the BAR of the low-level console device and when booting verbose,
3829 	 * we need the console device addressable.
3830 	 */
3831 	pci_write_config(child, *rid, map, 4);
3832 
3833 	if (PCI_BAR_MEM(testval)) {
3834 		if (type != SYS_RES_MEMORY) {
3835 			if (bootverbose)
3836 				device_printf(dev,
3837 				    "child %s requested type %d for rid %#x,"
3838 				    " but the BAR says it is an memio\n",
3839 				    device_get_nameunit(child), type, *rid);
3840 			goto out;
3841 		}
3842 	} else {
3843 		if (type != SYS_RES_IOPORT) {
3844 			if (bootverbose)
3845 				device_printf(dev,
3846 				    "child %s requested type %d for rid %#x,"
3847 				    " but the BAR says it is an ioport\n",
3848 				    device_get_nameunit(child), type, *rid);
3849 			goto out;
3850 		}
3851 	}
3852 	/*
3853 	 * For real BARs, we need to override the size that
3854 	 * the driver requests, because that's what the BAR
3855 	 * actually uses and we would otherwise have a
3856 	 * situation where we might allocate the excess to
3857 	 * another driver, which won't work.
3858 	 */
3859 	mapsize = pci_mapsize(testval);
3860 	count = 1UL << mapsize;
3861 	if (RF_ALIGNMENT(flags) < mapsize)
3862 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3863 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3864 		flags |= RF_PREFETCHABLE;
3865 
3866 	/*
3867 	 * Allocate enough resource, and then write back the
3868 	 * appropriate bar for that resource.
3869 	 */
3870 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3871 	    start, end, count, flags, -1);
3872 	if (res == NULL) {
3873 		device_printf(child,
3874 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3875 		    count, *rid, type, start, end);
3876 		goto out;
3877 	}
3878 	resource_list_add(rl, type, *rid, start, end, count, -1);
3879 	rle = resource_list_find(rl, type, *rid);
3880 	if (rle == NULL)
3881 		panic("pci_alloc_map: unexpectedly can't find resource.");
3882 	rle->res = res;
3883 	rle->start = rman_get_start(res);
3884 	rle->end = rman_get_end(res);
3885 	rle->count = count;
3886 	if (bootverbose)
3887 		device_printf(child,
3888 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3889 		    count, *rid, type, rman_get_start(res));
3890 	map = rman_get_start(res);
3891 out:;
3892 	pci_write_config(child, *rid, map, 4);
3893 	if (pci_maprange(testval) == 64)
3894 		pci_write_config(child, *rid + 4, map >> 32, 4);
3895 	return (res);
3896 }
3897 
3898 
3899 struct resource *
3900 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3901     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3902 {
3903 	struct pci_devinfo *dinfo = device_get_ivars(child);
3904 	struct resource_list *rl = &dinfo->resources;
3905 	struct resource_list_entry *rle;
3906 	pcicfgregs *cfg = &dinfo->cfg;
3907 
3908 	/*
3909 	 * Perform lazy resource allocation
3910 	 */
3911 	if (device_get_parent(child) == dev) {
3912 		switch (type) {
3913 		case SYS_RES_IRQ:
3914 			/*
3915 			 * Can't alloc legacy interrupt once MSI messages
3916 			 * have been allocated.
3917 			 */
3918 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3919 			    cfg->msix.msix_alloc > 0))
3920 				return (NULL);
3921 			/*
3922 			 * If the child device doesn't have an
3923 			 * interrupt routed and is deserving of an
3924 			 * interrupt, try to assign it one.
3925 			 */
3926 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3927 			    (cfg->intpin != 0))
3928 				pci_assign_interrupt(dev, child, 0);
3929 			break;
3930 		case SYS_RES_IOPORT:
3931 		case SYS_RES_MEMORY:
3932 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3933 				/*
3934 				 * Enable the I/O mode.  We should
3935 				 * also be assigning resources too
3936 				 * when none are present.  The
3937 				 * resource_list_alloc kind of sorta does
3938 				 * this...
3939 				 */
3940 				if (PCI_ENABLE_IO(dev, child, type))
3941 					return (NULL);
3942 			}
3943 			rle = resource_list_find(rl, type, *rid);
3944 			if (rle == NULL)
3945 				return (pci_alloc_map(dev, child, type, rid,
3946 				    start, end, count, flags));
3947 			break;
3948 		}
3949 		/*
3950 		 * If we've already allocated the resource, then
3951 		 * return it now.  But first we may need to activate
3952 		 * it, since we don't allocate the resource as active
3953 		 * above.  Normally this would be done down in the
3954 		 * nexus, but since we short-circuit that path we have
3955 		 * to do its job here.  Not sure if we should kfree the
3956 		 * resource if it fails to activate.
3957 		 */
3958 		rle = resource_list_find(rl, type, *rid);
3959 		if (rle != NULL && rle->res != NULL) {
3960 			if (bootverbose)
3961 				device_printf(child,
3962 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3963 				    rman_get_size(rle->res), *rid, type,
3964 				    rman_get_start(rle->res));
3965 			if ((flags & RF_ACTIVE) &&
3966 			    bus_generic_activate_resource(dev, child, type,
3967 			    *rid, rle->res) != 0)
3968 				return (NULL);
3969 			return (rle->res);
3970 		}
3971 	}
3972 	return (resource_list_alloc(rl, dev, child, type, rid,
3973 	    start, end, count, flags, cpuid));
3974 }
3975 
3976 void
3977 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3978 {
3979 	struct pci_devinfo *dinfo;
3980 	struct resource_list *rl;
3981 	struct resource_list_entry *rle;
3982 
3983 	if (device_get_parent(child) != dev)
3984 		return;
3985 
3986 	dinfo = device_get_ivars(child);
3987 	rl = &dinfo->resources;
3988 	rle = resource_list_find(rl, type, rid);
3989 	if (rle) {
3990 		if (rle->res) {
3991 			if (rman_get_device(rle->res) != dev ||
3992 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3993 				device_printf(dev, "delete_resource: "
3994 				    "Resource still owned by child, oops. "
3995 				    "(type=%d, rid=%d, addr=%lx)\n",
3996 				    rle->type, rle->rid,
3997 				    rman_get_start(rle->res));
3998 				return;
3999 			}
4000 			bus_release_resource(dev, type, rid, rle->res);
4001 		}
4002 		resource_list_delete(rl, type, rid);
4003 	}
4004 	/*
4005 	 * Why do we turn off the PCI configuration BAR when we delete a
4006 	 * resource? -- imp
4007 	 */
4008 	pci_write_config(child, rid, 0, 4);
4009 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4010 }
4011 
4012 struct resource_list *
4013 pci_get_resource_list (device_t dev, device_t child)
4014 {
4015 	struct pci_devinfo *dinfo = device_get_ivars(child);
4016 
4017 	if (dinfo == NULL)
4018 		return (NULL);
4019 
4020 	return (&dinfo->resources);
4021 }
4022 
4023 uint32_t
4024 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4025 {
4026 	struct pci_devinfo *dinfo = device_get_ivars(child);
4027 	pcicfgregs *cfg = &dinfo->cfg;
4028 
4029 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4030 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4031 }
4032 
4033 void
4034 pci_write_config_method(device_t dev, device_t child, int reg,
4035     uint32_t val, int width)
4036 {
4037 	struct pci_devinfo *dinfo = device_get_ivars(child);
4038 	pcicfgregs *cfg = &dinfo->cfg;
4039 
4040 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4041 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4042 }
4043 
4044 int
4045 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4046     size_t buflen)
4047 {
4048 
4049 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4050 	    pci_get_function(child));
4051 	return (0);
4052 }
4053 
4054 int
4055 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4056     size_t buflen)
4057 {
4058 	struct pci_devinfo *dinfo;
4059 	pcicfgregs *cfg;
4060 
4061 	dinfo = device_get_ivars(child);
4062 	cfg = &dinfo->cfg;
4063 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4064 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4065 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4066 	    cfg->progif);
4067 	return (0);
4068 }
4069 
4070 int
4071 pci_assign_interrupt_method(device_t dev, device_t child)
4072 {
4073 	struct pci_devinfo *dinfo = device_get_ivars(child);
4074 	pcicfgregs *cfg = &dinfo->cfg;
4075 
4076 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4077 	    cfg->intpin));
4078 }
4079 
4080 static int
4081 pci_modevent(module_t mod, int what, void *arg)
4082 {
4083 	static struct cdev *pci_cdev;
4084 
4085 	switch (what) {
4086 	case MOD_LOAD:
4087 		STAILQ_INIT(&pci_devq);
4088 		pci_generation = 0;
4089 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4090 				    "pci");
4091 		pci_load_vendor_data();
4092 		break;
4093 
4094 	case MOD_UNLOAD:
4095 		destroy_dev(pci_cdev);
4096 		break;
4097 	}
4098 
4099 	return (0);
4100 }
4101 
4102 void
4103 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4104 {
4105 	int i;
4106 
4107 	/*
4108 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4109 	 * which we know need special treatment.  Type 2 devices are
4110 	 * cardbus bridges which also require special treatment.
4111 	 * Other types are unknown, and we err on the side of safety
4112 	 * by ignoring them.
4113 	 */
4114 	if (dinfo->cfg.hdrtype != 0)
4115 		return;
4116 
4117 	/*
4118 	 * Restore the device to full power mode.  We must do this
4119 	 * before we restore the registers because moving from D3 to
4120 	 * D0 will cause the chip's BARs and some other registers to
4121 	 * be reset to some unknown power on reset values.  Cut down
4122 	 * the noise on boot by doing nothing if we are already in
4123 	 * state D0.
4124 	 */
4125 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4126 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4127 	}
4128 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4129 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4130 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4131 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4132 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4133 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4134 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4135 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4136 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4137 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4138 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4139 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4140 
4141 	/* Restore MSI and MSI-X configurations if they are present. */
4142 	if (dinfo->cfg.msi.msi_location != 0)
4143 		pci_resume_msi(dev);
4144 	if (dinfo->cfg.msix.msix_location != 0)
4145 		pci_resume_msix(dev);
4146 }
4147 
4148 void
4149 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4150 {
4151 	int i;
4152 	uint32_t cls;
4153 	int ps;
4154 
4155 	/*
4156 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4157 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4158 	 * which also require special treatment.  Other types are unknown, and
4159 	 * we err on the side of safety by ignoring them.  Powering down
4160 	 * bridges should not be undertaken lightly.
4161 	 */
4162 	if (dinfo->cfg.hdrtype != 0)
4163 		return;
4164 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4165 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4166 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4167 
4168 	/*
4169 	 * Some drivers apparently write to these registers w/o updating our
4170 	 * cached copy.  No harm happens if we update the copy, so do so here
4171 	 * so we can restore them.  The COMMAND register is modified by the
4172 	 * bus w/o updating the cache.  This should represent the normally
4173 	 * writable portion of the 'defined' part of type 0 headers.  In
4174 	 * theory we also need to save/restore the PCI capability structures
4175 	 * we know about, but apart from power we don't know any that are
4176 	 * writable.
4177 	 */
4178 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4179 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4180 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4181 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4182 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4183 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4184 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4185 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4186 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4187 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4188 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4189 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4190 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4191 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4192 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4193 
4194 	/*
4195 	 * don't set the state for display devices, base peripherals and
4196 	 * memory devices since bad things happen when they are powered down.
4197 	 * We should (a) have drivers that can easily detach and (b) use
4198 	 * generic drivers for these devices so that some device actually
4199 	 * attaches.  We need to make sure that when we implement (a) we don't
4200 	 * power the device down on a reattach.
4201 	 */
4202 	cls = pci_get_class(dev);
4203 	if (!setstate)
4204 		return;
4205 	switch (pci_do_power_nodriver)
4206 	{
4207 		case 0:		/* NO powerdown at all */
4208 			return;
4209 		case 1:		/* Conservative about what to power down */
4210 			if (cls == PCIC_STORAGE)
4211 				return;
4212 			/*FALLTHROUGH*/
4213 		case 2:		/* Agressive about what to power down */
4214 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4215 			    cls == PCIC_BASEPERIPH)
4216 				return;
4217 			/*FALLTHROUGH*/
4218 		case 3:		/* Power down everything */
4219 			break;
4220 	}
4221 	/*
4222 	 * PCI spec says we can only go into D3 state from D0 state.
4223 	 * Transition from D[12] into D0 before going to D3 state.
4224 	 */
4225 	ps = pci_get_powerstate(dev);
4226 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4227 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4228 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4229 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4230 }
4231 
4232 #ifdef COMPAT_OLDPCI
4233 
4234 /*
4235  * Locate the parent of a PCI device by scanning the PCI devlist
4236  * and return the entry for the parent.
4237  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4238  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4239  */
4240 pcicfgregs *
4241 pci_devlist_get_parent(pcicfgregs *cfg)
4242 {
4243 	struct devlist *devlist_head;
4244 	struct pci_devinfo *dinfo;
4245 	pcicfgregs *bridge_cfg;
4246 	int i;
4247 
4248 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4249 
4250 	/* If the device is on PCI bus 0, look for the host */
4251 	if (cfg->bus == 0) {
4252 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4253 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4254 			bridge_cfg = &dinfo->cfg;
4255 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4256 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4257 		    		&& bridge_cfg->bus == cfg->bus) {
4258 				return bridge_cfg;
4259 			}
4260 		}
4261 	}
4262 
4263 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4264 	if (cfg->bus > 0) {
4265 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4266 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4267 			bridge_cfg = &dinfo->cfg;
4268 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4269 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4270 				&& bridge_cfg->secondarybus == cfg->bus) {
4271 				return bridge_cfg;
4272 			}
4273 		}
4274 	}
4275 
4276 	return NULL;
4277 }
4278 
4279 #endif	/* COMPAT_OLDPCI */
4280 
4281 int
4282 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4283 {
4284 	int rid, type;
4285 	u_int flags;
4286 	char env[64];
4287 
4288 	rid = 0;
4289 	type = PCI_INTR_TYPE_LEGACY;
4290 	flags = RF_SHAREABLE | RF_ACTIVE;
4291 
4292 	ksnprintf(env, sizeof(env), "hw.%s.msi.enable",
4293 	    device_get_nameunit(dev));
4294 	kgetenv_int(env, &msi_enable);
4295 
4296 	if (msi_enable) {
4297 		int cpu = -1;
4298 
4299 		ksnprintf(env, sizeof(env), "hw.%s.msi.cpu",
4300 		    device_get_nameunit(dev));
4301 		kgetenv_int(env, &cpu);
4302 		if (cpu >= ncpus)
4303 			cpu = ncpus - 1;
4304 
4305 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4306 			flags &= ~RF_SHAREABLE;
4307 			type = PCI_INTR_TYPE_MSI;
4308 		}
4309 	}
4310 
4311 	*rid0 = rid;
4312 	*flags0 = flags;
4313 
4314 	return type;
4315 }
4316