xref: /dragonfly/sys/bus/pci/pci.c (revision 4d0c54c1)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include "pcib_if.h"
63 #include "pci_if.h"
64 
65 #ifdef __HAVE_ACPI
66 #include <contrib/dev/acpica/acpi.h>
67 #include "acpi_if.h"
68 #else
69 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
70 #endif
71 
72 extern struct dev_ops pcic_ops;	/* XXX */
73 
74 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
75 
76 static uint32_t		pci_mapbase(unsigned mapreg);
77 static const char	*pci_maptype(unsigned mapreg);
78 static int		pci_mapsize(unsigned testval);
79 static int		pci_maprange(unsigned mapreg);
80 static void		pci_fixancient(pcicfgregs *cfg);
81 
82 static int		pci_porten(device_t pcib, int b, int s, int f);
83 static int		pci_memen(device_t pcib, int b, int s, int f);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
87 			    int b, int s, int f, int reg,
88 			    struct resource_list *rl, int force, int prefetch);
89 static int		pci_probe(device_t dev);
90 static int		pci_attach(device_t dev);
91 static void		pci_child_detached(device_t, device_t);
92 static void		pci_load_vendor_data(void);
93 static int		pci_describe_parse_line(char **ptr, int *vendor,
94 			    int *device, char **desc);
95 static char		*pci_describe_device(device_t dev);
96 static int		pci_modevent(module_t mod, int what, void *arg);
97 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
98 			    pcicfgregs *cfg);
99 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
100 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t *data);
102 #if 0
103 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t data);
105 #endif
106 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
107 static void		pci_disable_msi(device_t dev);
108 static void		pci_enable_msi(device_t dev, uint64_t address,
109 			    uint16_t data);
110 static void		pci_setup_msix_vector(device_t dev, u_int index,
111 			    uint64_t address, uint32_t data);
112 static void		pci_mask_msix_vector(device_t dev, u_int index);
113 static void		pci_unmask_msix_vector(device_t dev, u_int index);
114 static void		pci_mask_msix_allvectors(device_t dev);
115 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pcie_slotimpl(const pcicfgregs *);
120 static void		pci_print_verbose_expr(const pcicfgregs *);
121 
122 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_subvendor(device_t, int, int,
128 			    pcicfgregs *);
129 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 	DEVMETHOD(device_detach,	bus_generic_detach),
137 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
138 	DEVMETHOD(device_suspend,	pci_suspend),
139 	DEVMETHOD(device_resume,	pci_resume),
140 
141 	/* Bus interface */
142 	DEVMETHOD(bus_print_child,	pci_print_child),
143 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
144 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
145 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
146 	DEVMETHOD(bus_driver_added,	pci_driver_added),
147 	DEVMETHOD(bus_child_detached,	pci_child_detached),
148 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150 
151 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
152 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
153 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
154 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
155 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
156 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
157 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
159 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
160 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
161 
162 	/* PCI interface */
163 	DEVMETHOD(pci_read_config,	pci_read_config_method),
164 	DEVMETHOD(pci_write_config,	pci_write_config_method),
165 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
175 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
176 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
177 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
178 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
179 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
180 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
181 
182 	{ 0, 0 }
183 };
184 
185 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
186 
187 static devclass_t pci_devclass;
188 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
189 MODULE_VERSION(pci, 1);
190 
191 static char	*pci_vendordata;
192 static size_t	pci_vendordata_size;
193 
194 
195 static const struct pci_read_cap {
196 	int		cap;
197 	pci_read_cap_t	read_cap;
198 } pci_read_caps[] = {
199 	{ PCIY_PMG,		pci_read_cap_pmgt },
200 	{ PCIY_HT,		pci_read_cap_ht },
201 	{ PCIY_MSI,		pci_read_cap_msi },
202 	{ PCIY_MSIX,		pci_read_cap_msix },
203 	{ PCIY_VPD,		pci_read_cap_vpd },
204 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
205 	{ PCIY_PCIX,		pci_read_cap_pcix },
206 	{ PCIY_EXPRESS,		pci_read_cap_express },
207 	{ 0, NULL } /* required last entry */
208 };
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
215 	int	arg1;
216 	int	arg2;
217 };
218 
219 struct pci_quirk pci_quirks[] = {
220 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
221 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
224 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 
226 	/*
227 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
228 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
229 	 */
230 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 
233 	/*
234 	 * MSI doesn't work on earlier Intel chipsets including
235 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
236 	 */
237 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
247 	 * bridge.
248 	 */
249 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	{ 0 }
252 };
253 
254 /* map register information */
255 #define	PCI_MAPMEM	0x01	/* memory map */
256 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
257 #define	PCI_MAPPORT	0x04	/* port map */
258 
259 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
260 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
261 
262 struct devlist pci_devq;
263 uint32_t pci_generation;
264 uint32_t pci_numdevs = 0;
265 static int pcie_chipset, pcix_chipset;
266 
267 /* sysctl vars */
268 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
269 
270 static int pci_enable_io_modes = 1;
271 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
272 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
273     &pci_enable_io_modes, 1,
274     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
275 enable these bits correctly.  We'd like to do this all the time, but there\n\
276 are some peripherals that this causes problems with.");
277 
278 static int pci_do_power_nodriver = 0;
279 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
280 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
281     &pci_do_power_nodriver, 0,
282   "Place a function into D3 state when no driver attaches to it.  0 means\n\
283 disable.  1 means conservatively place devices into D3 state.  2 means\n\
284 aggressively place devices into D3 state.  3 means put absolutely everything\n\
285 in D3 state.");
286 
287 static int pci_do_power_resume = 1;
288 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
289 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
290     &pci_do_power_resume, 1,
291   "Transition from D3 -> D0 on resume.");
292 
293 static int pci_do_msi = 1;
294 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
295 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
296     "Enable support for MSI interrupts");
297 
298 static int pci_do_msix = 1;
299 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
300 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
301     "Enable support for MSI-X interrupts");
302 
303 static int pci_honor_msi_blacklist = 1;
304 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
305 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
306     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
307 
308 static int pci_msi_cpuid;
309 
310 /* Find a device_t by bus/slot/function in domain 0 */
311 
312 device_t
313 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 
316 	return (pci_find_dbsf(0, bus, slot, func));
317 }
318 
319 /* Find a device_t by domain/bus/slot/function */
320 
321 device_t
322 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323 {
324 	struct pci_devinfo *dinfo;
325 
326 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327 		if ((dinfo->cfg.domain == domain) &&
328 		    (dinfo->cfg.bus == bus) &&
329 		    (dinfo->cfg.slot == slot) &&
330 		    (dinfo->cfg.func == func)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 /* Find a device_t by vendor/device ID */
339 
340 device_t
341 pci_find_device(uint16_t vendor, uint16_t device)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.vendor == vendor) &&
347 		    (dinfo->cfg.device == device)) {
348 			return (dinfo->cfg.dev);
349 		}
350 	}
351 
352 	return (NULL);
353 }
354 
355 /* return base address of memory or port map */
356 
357 static uint32_t
358 pci_mapbase(uint32_t mapreg)
359 {
360 
361 	if (PCI_BAR_MEM(mapreg))
362 		return (mapreg & PCIM_BAR_MEM_BASE);
363 	else
364 		return (mapreg & PCIM_BAR_IO_BASE);
365 }
366 
367 /* return map type of memory or port map */
368 
369 static const char *
370 pci_maptype(unsigned mapreg)
371 {
372 
373 	if (PCI_BAR_IO(mapreg))
374 		return ("I/O Port");
375 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
376 		return ("Prefetchable Memory");
377 	return ("Memory");
378 }
379 
380 /* return log2 of map size decoded for memory or port map */
381 
382 static int
383 pci_mapsize(uint32_t testval)
384 {
385 	int ln2size;
386 
387 	testval = pci_mapbase(testval);
388 	ln2size = 0;
389 	if (testval != 0) {
390 		while ((testval & 1) == 0)
391 		{
392 			ln2size++;
393 			testval >>= 1;
394 		}
395 	}
396 	return (ln2size);
397 }
398 
399 /* return log2 of address range supported by map register */
400 
401 static int
402 pci_maprange(unsigned mapreg)
403 {
404 	int ln2range = 0;
405 
406 	if (PCI_BAR_IO(mapreg))
407 		ln2range = 32;
408 	else
409 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
410 		case PCIM_BAR_MEM_32:
411 			ln2range = 32;
412 			break;
413 		case PCIM_BAR_MEM_1MB:
414 			ln2range = 20;
415 			break;
416 		case PCIM_BAR_MEM_64:
417 			ln2range = 64;
418 			break;
419 		}
420 	return (ln2range);
421 }
422 
423 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
424 
425 static void
426 pci_fixancient(pcicfgregs *cfg)
427 {
428 	if (cfg->hdrtype != 0)
429 		return;
430 
431 	/* PCI to PCI bridges use header type 1 */
432 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
433 		cfg->hdrtype = 1;
434 }
435 
436 /* extract header type specific config data */
437 
438 static void
439 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	switch (cfg->hdrtype) {
443 	case 0:
444 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
445 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
446 		cfg->nummaps	    = PCI_MAXMAPS_0;
447 		break;
448 	case 1:
449 		cfg->nummaps	    = PCI_MAXMAPS_1;
450 #ifdef COMPAT_OLDPCI
451 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
452 #endif
453 		break;
454 	case 2:
455 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
456 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
457 		cfg->nummaps	    = PCI_MAXMAPS_2;
458 #ifdef COMPAT_OLDPCI
459 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
460 #endif
461 		break;
462 	}
463 #undef REG
464 }
465 
466 /* read configuration header into pcicfgregs structure */
467 struct pci_devinfo *
468 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
469 {
470 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
471 	pcicfgregs *cfg = NULL;
472 	struct pci_devinfo *devlist_entry;
473 	struct devlist *devlist_head;
474 
475 	devlist_head = &pci_devq;
476 
477 	devlist_entry = NULL;
478 
479 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
480 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
481 
482 		cfg = &devlist_entry->cfg;
483 
484 		cfg->domain		= d;
485 		cfg->bus		= b;
486 		cfg->slot		= s;
487 		cfg->func		= f;
488 		cfg->vendor		= REG(PCIR_VENDOR, 2);
489 		cfg->device		= REG(PCIR_DEVICE, 2);
490 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
491 		cfg->statreg		= REG(PCIR_STATUS, 2);
492 		cfg->baseclass		= REG(PCIR_CLASS, 1);
493 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
494 		cfg->progif		= REG(PCIR_PROGIF, 1);
495 		cfg->revid		= REG(PCIR_REVID, 1);
496 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
497 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
498 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
499 		cfg->intpin		= REG(PCIR_INTPIN, 1);
500 		cfg->intline		= REG(PCIR_INTLINE, 1);
501 
502 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
503 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
504 
505 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
506 		cfg->hdrtype		&= ~PCIM_MFDEV;
507 
508 		pci_fixancient(cfg);
509 		pci_hdrtypedata(pcib, b, s, f, cfg);
510 
511 		pci_read_capabilities(pcib, cfg);
512 
513 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
514 
515 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
516 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
517 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
518 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
519 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
520 
521 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
522 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
523 		devlist_entry->conf.pc_vendor = cfg->vendor;
524 		devlist_entry->conf.pc_device = cfg->device;
525 
526 		devlist_entry->conf.pc_class = cfg->baseclass;
527 		devlist_entry->conf.pc_subclass = cfg->subclass;
528 		devlist_entry->conf.pc_progif = cfg->progif;
529 		devlist_entry->conf.pc_revid = cfg->revid;
530 
531 		pci_numdevs++;
532 		pci_generation++;
533 	}
534 	return (devlist_entry);
535 #undef REG
536 }
537 
538 static int
539 pci_fixup_nextptr(int *nextptr0)
540 {
541 	int nextptr = *nextptr0;
542 
543 	/* "Next pointer" is only one byte */
544 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
545 
546 	if (nextptr & 0x3) {
547 		/*
548 		 * PCI local bus spec 3.0:
549 		 *
550 		 * "... The bottom two bits of all pointers are reserved
551 		 *  and must be implemented as 00b although software must
552 		 *  mask them to allow for future uses of these bits ..."
553 		 */
554 		if (bootverbose) {
555 			kprintf("Illegal PCI extended capability "
556 				"offset, fixup 0x%02x -> 0x%02x\n",
557 				nextptr, nextptr & ~0x3);
558 		}
559 		nextptr &= ~0x3;
560 	}
561 	*nextptr0 = nextptr;
562 
563 	if (nextptr < 0x40) {
564 		if (nextptr != 0) {
565 			kprintf("Illegal PCI extended capability "
566 				"offset 0x%02x", nextptr);
567 		}
568 		return 0;
569 	}
570 	return 1;
571 }
572 
573 static void
574 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
575 {
576 #define REG(n, w)	\
577 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
578 
579 	struct pcicfg_pp *pp = &cfg->pp;
580 
581 	if (pp->pp_cap)
582 		return;
583 
584 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
585 	pp->pp_status = ptr + PCIR_POWER_STATUS;
586 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
587 
588 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
589 		/*
590 		 * XXX
591 		 * We should write to data_select and read back from
592 		 * data_scale to determine whether data register is
593 		 * implemented.
594 		 */
595 #ifdef foo
596 		pp->pp_data = ptr + PCIR_POWER_DATA;
597 #else
598 		pp->pp_data = 0;
599 #endif
600 	}
601 
602 #undef REG
603 }
604 
605 static void
606 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
607 {
608 #if defined(__i386__) || defined(__x86_64__)
609 
610 #define REG(n, w)	\
611 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612 
613 	struct pcicfg_ht *ht = &cfg->ht;
614 	uint64_t addr;
615 	uint32_t val;
616 
617 	/* Determine HT-specific capability type. */
618 	val = REG(ptr + PCIR_HT_COMMAND, 2);
619 
620 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
621 		cfg->ht.ht_slave = ptr;
622 
623 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
624 		return;
625 
626 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
627 		/* Sanity check the mapping window. */
628 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
629 		addr <<= 32;
630 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
631 		if (addr != MSI_X86_ADDR_BASE) {
632 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
633 				"has non-default MSI window 0x%llx\n",
634 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
635 				(long long)addr);
636 		}
637 	} else {
638 		addr = MSI_X86_ADDR_BASE;
639 	}
640 
641 	ht->ht_msimap = ptr;
642 	ht->ht_msictrl = val;
643 	ht->ht_msiaddr = addr;
644 
645 #undef REG
646 
647 #endif	/* __i386__ || __x86_64__ */
648 }
649 
650 static void
651 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
652 {
653 #define REG(n, w)	\
654 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
655 
656 	struct pcicfg_msi *msi = &cfg->msi;
657 
658 	msi->msi_location = ptr;
659 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
660 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
661 
662 #undef REG
663 }
664 
665 static void
666 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
667 {
668 #define REG(n, w)	\
669 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
670 
671 	struct pcicfg_msix *msix = &cfg->msix;
672 	uint32_t val;
673 
674 	msix->msix_location = ptr;
675 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
676 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
677 
678 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
679 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
680 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
681 
682 	val = REG(ptr + PCIR_MSIX_PBA, 4);
683 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
684 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
685 
686 	TAILQ_INIT(&msix->msix_vectors);
687 
688 #undef REG
689 }
690 
691 static void
692 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
693 {
694 	cfg->vpd.vpd_reg = ptr;
695 }
696 
697 static void
698 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
699 {
700 #define REG(n, w)	\
701 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
702 
703 	/* Should always be true. */
704 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
705 		uint32_t val;
706 
707 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
708 		cfg->subvendor = val & 0xffff;
709 		cfg->subdevice = val >> 16;
710 	}
711 
712 #undef REG
713 }
714 
715 static void
716 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
717 {
718 	/*
719 	 * Assume we have a PCI-X chipset if we have
720 	 * at least one PCI-PCI bridge with a PCI-X
721 	 * capability.  Note that some systems with
722 	 * PCI-express or HT chipsets might match on
723 	 * this check as well.
724 	 */
725 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
726 		pcix_chipset = 1;
727 
728 	cfg->pcix.pcix_ptr = ptr;
729 }
730 
731 static int
732 pcie_slotimpl(const pcicfgregs *cfg)
733 {
734 	const struct pcicfg_expr *expr = &cfg->expr;
735 	uint16_t port_type;
736 
737 	/*
738 	 * - Slot implemented bit is meaningful iff current port is
739 	 *   root port or down stream port.
740 	 * - Testing for root port or down stream port is meanningful
741 	 *   iff PCI configure has type 1 header.
742 	 */
743 
744 	if (cfg->hdrtype != 1)
745 		return 0;
746 
747 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
748 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
749 		return 0;
750 
751 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
752 		return 0;
753 
754 	return 1;
755 }
756 
757 static void
758 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 #define REG(n, w)	\
761 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
762 
763 	struct pcicfg_expr *expr = &cfg->expr;
764 
765 	/*
766 	 * Assume we have a PCI-express chipset if we have
767 	 * at least one PCI-express device.
768 	 */
769 	pcie_chipset = 1;
770 
771 	expr->expr_ptr = ptr;
772 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
773 
774 	/*
775 	 * Read slot capabilities.  Slot capabilities exists iff
776 	 * current port's slot is implemented
777 	 */
778 	if (pcie_slotimpl(cfg))
779 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
780 
781 #undef REG
782 }
783 
784 static void
785 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
786 {
787 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
788 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
789 
790 	uint32_t val;
791 	int nextptr, ptrptr;
792 
793 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
794 		/* No capabilities */
795 		return;
796 	}
797 
798 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
799 	case 0:
800 	case 1:
801 		ptrptr = PCIR_CAP_PTR;
802 		break;
803 	case 2:
804 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
805 		break;
806 	default:
807 		return;				/* no capabilities support */
808 	}
809 	nextptr = REG(ptrptr, 1);	/* sanity check? */
810 
811 	/*
812 	 * Read capability entries.
813 	 */
814 	while (pci_fixup_nextptr(&nextptr)) {
815 		const struct pci_read_cap *rc;
816 		int ptr = nextptr;
817 
818 		/* Find the next entry */
819 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
820 
821 		/* Process this entry */
822 		val = REG(ptr + PCICAP_ID, 1);
823 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
824 			if (rc->cap == val) {
825 				rc->read_cap(pcib, ptr, nextptr, cfg);
826 				break;
827 			}
828 		}
829 	}
830 
831 #if defined(__i386__) || defined(__x86_64__)
832 	/*
833 	 * Enable the MSI mapping window for all HyperTransport
834 	 * slaves.  PCI-PCI bridges have their windows enabled via
835 	 * PCIB_MAP_MSI().
836 	 */
837 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
838 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
839 		device_printf(pcib,
840 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
841 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
842 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
843 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
844 		     2);
845 	}
846 #endif
847 
848 /* REG and WREG use carry through to next functions */
849 }
850 
851 /*
852  * PCI Vital Product Data
853  */
854 
855 #define	PCI_VPD_TIMEOUT		1000000
856 
857 static int
858 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
859 {
860 	int count = PCI_VPD_TIMEOUT;
861 
862 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
863 
864 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
865 
866 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
867 		if (--count < 0)
868 			return (ENXIO);
869 		DELAY(1);	/* limit looping */
870 	}
871 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
872 
873 	return (0);
874 }
875 
876 #if 0
877 static int
878 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
879 {
880 	int count = PCI_VPD_TIMEOUT;
881 
882 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
883 
884 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
885 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
886 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
887 		if (--count < 0)
888 			return (ENXIO);
889 		DELAY(1);	/* limit looping */
890 	}
891 
892 	return (0);
893 }
894 #endif
895 
896 #undef PCI_VPD_TIMEOUT
897 
898 struct vpd_readstate {
899 	device_t	pcib;
900 	pcicfgregs	*cfg;
901 	uint32_t	val;
902 	int		bytesinval;
903 	int		off;
904 	uint8_t		cksum;
905 };
906 
907 static int
908 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
909 {
910 	uint32_t reg;
911 	uint8_t byte;
912 
913 	if (vrs->bytesinval == 0) {
914 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
915 			return (ENXIO);
916 		vrs->val = le32toh(reg);
917 		vrs->off += 4;
918 		byte = vrs->val & 0xff;
919 		vrs->bytesinval = 3;
920 	} else {
921 		vrs->val = vrs->val >> 8;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval--;
924 	}
925 
926 	vrs->cksum += byte;
927 	*data = byte;
928 	return (0);
929 }
930 
931 int
932 pcie_slot_implemented(device_t dev)
933 {
934 	struct pci_devinfo *dinfo = device_get_ivars(dev);
935 
936 	return pcie_slotimpl(&dinfo->cfg);
937 }
938 
939 void
940 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
941 {
942 	uint8_t expr_ptr;
943 	uint16_t val;
944 
945 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
946 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
947 		panic("%s: invalid max read request size 0x%02x",
948 		      device_get_nameunit(dev), rqsize);
949 	}
950 
951 	expr_ptr = pci_get_pciecap_ptr(dev);
952 	if (!expr_ptr)
953 		panic("%s: not PCIe device", device_get_nameunit(dev));
954 
955 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
956 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
957 		if (bootverbose)
958 			device_printf(dev, "adjust device control 0x%04x", val);
959 
960 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
961 		val |= rqsize;
962 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
963 
964 		if (bootverbose)
965 			kprintf(" -> 0x%04x\n", val);
966 	}
967 }
968 
969 uint16_t
970 pcie_get_max_readrq(device_t dev)
971 {
972 	uint8_t expr_ptr;
973 	uint16_t val;
974 
975 	expr_ptr = pci_get_pciecap_ptr(dev);
976 	if (!expr_ptr)
977 		panic("%s: not PCIe device", device_get_nameunit(dev));
978 
979 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
980 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
981 }
982 
983 static void
984 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
985 {
986 	struct vpd_readstate vrs;
987 	int state;
988 	int name;
989 	int remain;
990 	int i;
991 	int alloc, off;		/* alloc/off for RO/W arrays */
992 	int cksumvalid;
993 	int dflen;
994 	uint8_t byte;
995 	uint8_t byte2;
996 
997 	/* init vpd reader */
998 	vrs.bytesinval = 0;
999 	vrs.off = 0;
1000 	vrs.pcib = pcib;
1001 	vrs.cfg = cfg;
1002 	vrs.cksum = 0;
1003 
1004 	state = 0;
1005 	name = remain = i = 0;	/* shut up stupid gcc */
1006 	alloc = off = 0;	/* shut up stupid gcc */
1007 	dflen = 0;		/* shut up stupid gcc */
1008 	cksumvalid = -1;
1009 	while (state >= 0) {
1010 		if (vpd_nextbyte(&vrs, &byte)) {
1011 			state = -2;
1012 			break;
1013 		}
1014 #if 0
1015 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1016 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1017 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1018 #endif
1019 		switch (state) {
1020 		case 0:		/* item name */
1021 			if (byte & 0x80) {
1022 				if (vpd_nextbyte(&vrs, &byte2)) {
1023 					state = -2;
1024 					break;
1025 				}
1026 				remain = byte2;
1027 				if (vpd_nextbyte(&vrs, &byte2)) {
1028 					state = -2;
1029 					break;
1030 				}
1031 				remain |= byte2 << 8;
1032 				if (remain > (0x7f*4 - vrs.off)) {
1033 					state = -1;
1034 					kprintf(
1035 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1036 					    cfg->domain, cfg->bus, cfg->slot,
1037 					    cfg->func, remain);
1038 				}
1039 				name = byte & 0x7f;
1040 			} else {
1041 				remain = byte & 0x7;
1042 				name = (byte >> 3) & 0xf;
1043 			}
1044 			switch (name) {
1045 			case 0x2:	/* String */
1046 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1047 				    M_DEVBUF, M_WAITOK);
1048 				i = 0;
1049 				state = 1;
1050 				break;
1051 			case 0xf:	/* End */
1052 				state = -1;
1053 				break;
1054 			case 0x10:	/* VPD-R */
1055 				alloc = 8;
1056 				off = 0;
1057 				cfg->vpd.vpd_ros = kmalloc(alloc *
1058 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1059 				    M_WAITOK | M_ZERO);
1060 				state = 2;
1061 				break;
1062 			case 0x11:	/* VPD-W */
1063 				alloc = 8;
1064 				off = 0;
1065 				cfg->vpd.vpd_w = kmalloc(alloc *
1066 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1067 				    M_WAITOK | M_ZERO);
1068 				state = 5;
1069 				break;
1070 			default:	/* Invalid data, abort */
1071 				state = -1;
1072 				break;
1073 			}
1074 			break;
1075 
1076 		case 1:	/* Identifier String */
1077 			cfg->vpd.vpd_ident[i++] = byte;
1078 			remain--;
1079 			if (remain == 0)  {
1080 				cfg->vpd.vpd_ident[i] = '\0';
1081 				state = 0;
1082 			}
1083 			break;
1084 
1085 		case 2:	/* VPD-R Keyword Header */
1086 			if (off == alloc) {
1087 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1088 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1089 				    M_DEVBUF, M_WAITOK | M_ZERO);
1090 			}
1091 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1092 			if (vpd_nextbyte(&vrs, &byte2)) {
1093 				state = -2;
1094 				break;
1095 			}
1096 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1097 			if (vpd_nextbyte(&vrs, &byte2)) {
1098 				state = -2;
1099 				break;
1100 			}
1101 			dflen = byte2;
1102 			if (dflen == 0 &&
1103 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1104 			    2) == 0) {
1105 				/*
1106 				 * if this happens, we can't trust the rest
1107 				 * of the VPD.
1108 				 */
1109 				kprintf(
1110 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1111 				    cfg->domain, cfg->bus, cfg->slot,
1112 				    cfg->func, dflen);
1113 				cksumvalid = 0;
1114 				state = -1;
1115 				break;
1116 			} else if (dflen == 0) {
1117 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1118 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1119 				    M_DEVBUF, M_WAITOK);
1120 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1121 			} else
1122 				cfg->vpd.vpd_ros[off].value = kmalloc(
1123 				    (dflen + 1) *
1124 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1125 				    M_DEVBUF, M_WAITOK);
1126 			remain -= 3;
1127 			i = 0;
1128 			/* keep in sync w/ state 3's transistions */
1129 			if (dflen == 0 && remain == 0)
1130 				state = 0;
1131 			else if (dflen == 0)
1132 				state = 2;
1133 			else
1134 				state = 3;
1135 			break;
1136 
1137 		case 3:	/* VPD-R Keyword Value */
1138 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1139 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1140 			    "RV", 2) == 0 && cksumvalid == -1) {
1141 				if (vrs.cksum == 0)
1142 					cksumvalid = 1;
1143 				else {
1144 					if (bootverbose)
1145 						kprintf(
1146 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1147 						    cfg->domain, cfg->bus,
1148 						    cfg->slot, cfg->func,
1149 						    vrs.cksum);
1150 					cksumvalid = 0;
1151 					state = -1;
1152 					break;
1153 				}
1154 			}
1155 			dflen--;
1156 			remain--;
1157 			/* keep in sync w/ state 2's transistions */
1158 			if (dflen == 0)
1159 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1160 			if (dflen == 0 && remain == 0) {
1161 				cfg->vpd.vpd_rocnt = off;
1162 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1163 				    off * sizeof(*cfg->vpd.vpd_ros),
1164 				    M_DEVBUF, M_WAITOK | M_ZERO);
1165 				state = 0;
1166 			} else if (dflen == 0)
1167 				state = 2;
1168 			break;
1169 
1170 		case 4:
1171 			remain--;
1172 			if (remain == 0)
1173 				state = 0;
1174 			break;
1175 
1176 		case 5:	/* VPD-W Keyword Header */
1177 			if (off == alloc) {
1178 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1179 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1180 				    M_DEVBUF, M_WAITOK | M_ZERO);
1181 			}
1182 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1183 			if (vpd_nextbyte(&vrs, &byte2)) {
1184 				state = -2;
1185 				break;
1186 			}
1187 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1188 			if (vpd_nextbyte(&vrs, &byte2)) {
1189 				state = -2;
1190 				break;
1191 			}
1192 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1193 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1194 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1195 			    sizeof(*cfg->vpd.vpd_w[off].value),
1196 			    M_DEVBUF, M_WAITOK);
1197 			remain -= 3;
1198 			i = 0;
1199 			/* keep in sync w/ state 6's transistions */
1200 			if (dflen == 0 && remain == 0)
1201 				state = 0;
1202 			else if (dflen == 0)
1203 				state = 5;
1204 			else
1205 				state = 6;
1206 			break;
1207 
1208 		case 6:	/* VPD-W Keyword Value */
1209 			cfg->vpd.vpd_w[off].value[i++] = byte;
1210 			dflen--;
1211 			remain--;
1212 			/* keep in sync w/ state 5's transistions */
1213 			if (dflen == 0)
1214 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1215 			if (dflen == 0 && remain == 0) {
1216 				cfg->vpd.vpd_wcnt = off;
1217 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1218 				    off * sizeof(*cfg->vpd.vpd_w),
1219 				    M_DEVBUF, M_WAITOK | M_ZERO);
1220 				state = 0;
1221 			} else if (dflen == 0)
1222 				state = 5;
1223 			break;
1224 
1225 		default:
1226 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1227 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1228 			    state);
1229 			state = -1;
1230 			break;
1231 		}
1232 	}
1233 
1234 	if (cksumvalid == 0 || state < -1) {
1235 		/* read-only data bad, clean up */
1236 		if (cfg->vpd.vpd_ros != NULL) {
1237 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1238 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1239 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1240 			cfg->vpd.vpd_ros = NULL;
1241 		}
1242 	}
1243 	if (state < -1) {
1244 		/* I/O error, clean up */
1245 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1246 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1247 		if (cfg->vpd.vpd_ident != NULL) {
1248 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1249 			cfg->vpd.vpd_ident = NULL;
1250 		}
1251 		if (cfg->vpd.vpd_w != NULL) {
1252 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1253 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1254 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1255 			cfg->vpd.vpd_w = NULL;
1256 		}
1257 	}
1258 	cfg->vpd.vpd_cached = 1;
1259 #undef REG
1260 #undef WREG
1261 }
1262 
1263 int
1264 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1265 {
1266 	struct pci_devinfo *dinfo = device_get_ivars(child);
1267 	pcicfgregs *cfg = &dinfo->cfg;
1268 
1269 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1270 		pci_read_vpd(device_get_parent(dev), cfg);
1271 
1272 	*identptr = cfg->vpd.vpd_ident;
1273 
1274 	if (*identptr == NULL)
1275 		return (ENXIO);
1276 
1277 	return (0);
1278 }
1279 
1280 int
1281 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1282 	const char **vptr)
1283 {
1284 	struct pci_devinfo *dinfo = device_get_ivars(child);
1285 	pcicfgregs *cfg = &dinfo->cfg;
1286 	int i;
1287 
1288 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1289 		pci_read_vpd(device_get_parent(dev), cfg);
1290 
1291 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1292 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1293 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1294 			*vptr = cfg->vpd.vpd_ros[i].value;
1295 		}
1296 
1297 	if (i != cfg->vpd.vpd_rocnt)
1298 		return (0);
1299 
1300 	*vptr = NULL;
1301 	return (ENXIO);
1302 }
1303 
1304 /*
1305  * Return the offset in configuration space of the requested extended
1306  * capability entry or 0 if the specified capability was not found.
1307  */
1308 int
1309 pci_find_extcap_method(device_t dev, device_t child, int capability,
1310     int *capreg)
1311 {
1312 	struct pci_devinfo *dinfo = device_get_ivars(child);
1313 	pcicfgregs *cfg = &dinfo->cfg;
1314 	u_int32_t status;
1315 	u_int8_t ptr;
1316 
1317 	/*
1318 	 * Check the CAP_LIST bit of the PCI status register first.
1319 	 */
1320 	status = pci_read_config(child, PCIR_STATUS, 2);
1321 	if (!(status & PCIM_STATUS_CAPPRESENT))
1322 		return (ENXIO);
1323 
1324 	/*
1325 	 * Determine the start pointer of the capabilities list.
1326 	 */
1327 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1328 	case 0:
1329 	case 1:
1330 		ptr = PCIR_CAP_PTR;
1331 		break;
1332 	case 2:
1333 		ptr = PCIR_CAP_PTR_2;
1334 		break;
1335 	default:
1336 		/* XXX: panic? */
1337 		return (ENXIO);		/* no extended capabilities support */
1338 	}
1339 	ptr = pci_read_config(child, ptr, 1);
1340 
1341 	/*
1342 	 * Traverse the capabilities list.
1343 	 */
1344 	while (ptr != 0) {
1345 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1346 			if (capreg != NULL)
1347 				*capreg = ptr;
1348 			return (0);
1349 		}
1350 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1351 	}
1352 
1353 	return (ENOENT);
1354 }
1355 
1356 /*
1357  * Support for MSI-X message interrupts.
1358  */
1359 static void
1360 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1361     uint32_t data)
1362 {
1363 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1364 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365 	uint32_t offset;
1366 
1367 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1368 	offset = msix->msix_table_offset + index * 16;
1369 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1370 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1371 	bus_write_4(msix->msix_table_res, offset + 8, data);
1372 
1373 	/* Enable MSI -> HT mapping. */
1374 	pci_ht_map_msi(dev, address);
1375 }
1376 
1377 static void
1378 pci_mask_msix_vector(device_t dev, u_int index)
1379 {
1380 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382 	uint32_t offset, val;
1383 
1384 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1385 	offset = msix->msix_table_offset + index * 16 + 12;
1386 	val = bus_read_4(msix->msix_table_res, offset);
1387 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1388 		val |= PCIM_MSIX_VCTRL_MASK;
1389 		bus_write_4(msix->msix_table_res, offset, val);
1390 	}
1391 }
1392 
1393 static void
1394 pci_unmask_msix_vector(device_t dev, u_int index)
1395 {
1396 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1397 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1398 	uint32_t offset, val;
1399 
1400 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1401 	offset = msix->msix_table_offset + index * 16 + 12;
1402 	val = bus_read_4(msix->msix_table_res, offset);
1403 	if (val & PCIM_MSIX_VCTRL_MASK) {
1404 		val &= ~PCIM_MSIX_VCTRL_MASK;
1405 		bus_write_4(msix->msix_table_res, offset, val);
1406 	}
1407 }
1408 
1409 int
1410 pci_pending_msix_vector(device_t dev, u_int index)
1411 {
1412 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1413 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1414 	uint32_t offset, bit;
1415 
1416 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1417 	    ("MSI-X is not setup yet"));
1418 
1419 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1420 	offset = msix->msix_pba_offset + (index / 32) * 4;
1421 	bit = 1 << index % 32;
1422 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1423 }
1424 
1425 /*
1426  * Restore MSI-X registers and table during resume.  If MSI-X is
1427  * enabled then walk the virtual table to restore the actual MSI-X
1428  * table.
1429  */
1430 static void
1431 pci_resume_msix(device_t dev)
1432 {
1433 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1434 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1435 
1436 	if (msix->msix_table_res != NULL) {
1437 		const struct msix_vector *mv;
1438 
1439 		pci_mask_msix_allvectors(dev);
1440 
1441 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1442 			u_int vector;
1443 
1444 			if (mv->mv_address == 0)
1445 				continue;
1446 
1447 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1448 			pci_setup_msix_vector(dev, vector,
1449 			    mv->mv_address, mv->mv_data);
1450 			pci_unmask_msix_vector(dev, vector);
1451 		}
1452 	}
1453 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1454 	    msix->msix_ctrl, 2);
1455 }
1456 
1457 /*
1458  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1459  *
1460  * After this function returns, the MSI-X's rid will be saved in rid0.
1461  */
1462 int
1463 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1464     int *rid0, int cpuid)
1465 {
1466 	struct pci_devinfo *dinfo = device_get_ivars(child);
1467 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1468 	struct msix_vector *mv;
1469 	struct resource_list_entry *rle;
1470 	int error, irq, rid;
1471 
1472 	KASSERT(msix->msix_table_res != NULL &&
1473 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1474 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1475 	KASSERT(vector < msix->msix_msgnum,
1476 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1477 
1478 	if (bootverbose) {
1479 		device_printf(child,
1480 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1481 		    vector, msix->msix_msgnum);
1482 	}
1483 
1484 	/* Set rid according to vector number */
1485 	rid = PCI_MSIX_VEC2RID(vector);
1486 
1487 	/* Vector has already been allocated */
1488 	mv = pci_find_msix_vector(child, rid);
1489 	if (mv != NULL)
1490 		return EBUSY;
1491 
1492 	/* Allocate a message. */
1493 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1494 	if (error)
1495 		return error;
1496 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1497 	    irq, irq, 1, cpuid);
1498 
1499 	if (bootverbose) {
1500 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1501 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1502 		    rle->start, cpuid);
1503 	}
1504 
1505 	/* Update counts of alloc'd messages. */
1506 	msix->msix_alloc++;
1507 
1508 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1509 	mv->mv_rid = rid;
1510 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1511 
1512 	*rid0 = rid;
1513 	return 0;
1514 }
1515 
1516 int
1517 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1518 {
1519 	struct pci_devinfo *dinfo = device_get_ivars(child);
1520 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1521 	struct resource_list_entry *rle;
1522 	struct msix_vector *mv;
1523 	int irq, cpuid;
1524 
1525 	KASSERT(msix->msix_table_res != NULL &&
1526 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1527 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1528 	KASSERT(rid > 0, ("invalid rid %d", rid));
1529 
1530 	mv = pci_find_msix_vector(child, rid);
1531 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1532 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1533 
1534 	/* Make sure resource is no longer allocated. */
1535 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1536 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1537 	KASSERT(rle->res == NULL,
1538 	    ("MSI-X resource is still allocated, rid %d", rid));
1539 
1540 	irq = rle->start;
1541 	cpuid = rle->cpuid;
1542 
1543 	/* Free the resource list entries. */
1544 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1545 
1546 	/* Release the IRQ. */
1547 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1548 
1549 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1550 	kfree(mv, M_DEVBUF);
1551 
1552 	msix->msix_alloc--;
1553 	return (0);
1554 }
1555 
1556 /*
1557  * Return the max supported MSI-X messages this device supports.
1558  * Basically, assuming the MD code can alloc messages, this function
1559  * should return the maximum value that pci_alloc_msix() can return.
1560  * Thus, it is subject to the tunables, etc.
1561  */
1562 int
1563 pci_msix_count_method(device_t dev, device_t child)
1564 {
1565 	struct pci_devinfo *dinfo = device_get_ivars(child);
1566 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1567 
1568 	if (pci_do_msix && msix->msix_location != 0)
1569 		return (msix->msix_msgnum);
1570 	return (0);
1571 }
1572 
1573 int
1574 pci_setup_msix(device_t dev)
1575 {
1576 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1577 	pcicfgregs *cfg = &dinfo->cfg;
1578 	struct resource_list_entry *rle;
1579 	struct resource *table_res, *pba_res;
1580 
1581 	KASSERT(cfg->msix.msix_table_res == NULL &&
1582 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1583 
1584 	/* If rid 0 is allocated, then fail. */
1585 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1586 	if (rle != NULL && rle->res != NULL)
1587 		return (ENXIO);
1588 
1589 	/* Already have allocated MSIs? */
1590 	if (cfg->msi.msi_alloc != 0)
1591 		return (ENXIO);
1592 
1593 	/* If MSI is blacklisted for this system, fail. */
1594 	if (pci_msi_blacklisted())
1595 		return (ENXIO);
1596 
1597 	/* MSI-X capability present? */
1598 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1599 	    !pci_do_msix)
1600 		return (ENODEV);
1601 
1602 	KASSERT(cfg->msix.msix_alloc == 0 &&
1603 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1604 	    ("MSI-X vector has been allocated"));
1605 
1606 	/* Make sure the appropriate BARs are mapped. */
1607 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1608 	    cfg->msix.msix_table_bar);
1609 	if (rle == NULL || rle->res == NULL ||
1610 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1611 		return (ENXIO);
1612 	table_res = rle->res;
1613 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1614 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1615 		    cfg->msix.msix_pba_bar);
1616 		if (rle == NULL || rle->res == NULL ||
1617 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1618 			return (ENXIO);
1619 	}
1620 	pba_res = rle->res;
1621 
1622 	cfg->msix.msix_table_res = table_res;
1623 	cfg->msix.msix_pba_res = pba_res;
1624 
1625 	pci_mask_msix_allvectors(dev);
1626 
1627 	return 0;
1628 }
1629 
1630 void
1631 pci_teardown_msix(device_t dev)
1632 {
1633 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1634 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635 
1636 	KASSERT(msix->msix_table_res != NULL &&
1637 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1638 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1639 	    ("MSI-X vector is still allocated"));
1640 
1641 	pci_mask_msix_allvectors(dev);
1642 
1643 	msix->msix_table_res = NULL;
1644 	msix->msix_pba_res = NULL;
1645 }
1646 
1647 void
1648 pci_enable_msix(device_t dev)
1649 {
1650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1651 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1652 
1653 	KASSERT(msix->msix_table_res != NULL &&
1654 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1655 
1656 	/* Update control register to enable MSI-X. */
1657 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1658 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1659 	    msix->msix_ctrl, 2);
1660 }
1661 
1662 void
1663 pci_disable_msix(device_t dev)
1664 {
1665 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1666 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1667 
1668 	KASSERT(msix->msix_table_res != NULL &&
1669 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1670 
1671 	/* Disable MSI -> HT mapping. */
1672 	pci_ht_map_msi(dev, 0);
1673 
1674 	/* Update control register to disable MSI-X. */
1675 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1676 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1677 	    msix->msix_ctrl, 2);
1678 }
1679 
1680 static void
1681 pci_mask_msix_allvectors(device_t dev)
1682 {
1683 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1684 	u_int i;
1685 
1686 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1687 		pci_mask_msix_vector(dev, i);
1688 }
1689 
1690 static struct msix_vector *
1691 pci_find_msix_vector(device_t dev, int rid)
1692 {
1693 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1694 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1695 	struct msix_vector *mv;
1696 
1697 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1698 		if (mv->mv_rid == rid)
1699 			return mv;
1700 	}
1701 	return NULL;
1702 }
1703 
1704 /*
1705  * HyperTransport MSI mapping control
1706  */
1707 void
1708 pci_ht_map_msi(device_t dev, uint64_t addr)
1709 {
1710 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1711 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1712 
1713 	if (!ht->ht_msimap)
1714 		return;
1715 
1716 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1717 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1718 		/* Enable MSI -> HT mapping. */
1719 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1720 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1721 		    ht->ht_msictrl, 2);
1722 	}
1723 
1724 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1725 		/* Disable MSI -> HT mapping. */
1726 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1727 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1728 		    ht->ht_msictrl, 2);
1729 	}
1730 }
1731 
1732 /*
1733  * Support for MSI message signalled interrupts.
1734  */
1735 void
1736 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1737 {
1738 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1739 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1740 
1741 	/* Write data and address values. */
1742 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1743 	    address & 0xffffffff, 4);
1744 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1745 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1746 		    address >> 32, 4);
1747 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1748 		    data, 2);
1749 	} else
1750 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1751 		    2);
1752 
1753 	/* Enable MSI in the control register. */
1754 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1755 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1756 	    2);
1757 
1758 	/* Enable MSI -> HT mapping. */
1759 	pci_ht_map_msi(dev, address);
1760 }
1761 
1762 void
1763 pci_disable_msi(device_t dev)
1764 {
1765 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1766 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1767 
1768 	/* Disable MSI -> HT mapping. */
1769 	pci_ht_map_msi(dev, 0);
1770 
1771 	/* Disable MSI in the control register. */
1772 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1773 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1774 	    2);
1775 }
1776 
1777 /*
1778  * Restore MSI registers during resume.  If MSI is enabled then
1779  * restore the data and address registers in addition to the control
1780  * register.
1781  */
1782 static void
1783 pci_resume_msi(device_t dev)
1784 {
1785 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1786 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1787 	uint64_t address;
1788 	uint16_t data;
1789 
1790 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1791 		address = msi->msi_addr;
1792 		data = msi->msi_data;
1793 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1794 		    address & 0xffffffff, 4);
1795 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1796 			pci_write_config(dev, msi->msi_location +
1797 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1798 			pci_write_config(dev, msi->msi_location +
1799 			    PCIR_MSI_DATA_64BIT, data, 2);
1800 		} else
1801 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1802 			    data, 2);
1803 	}
1804 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1805 	    2);
1806 }
1807 
1808 /*
1809  * Returns true if the specified device is blacklisted because MSI
1810  * doesn't work.
1811  */
1812 int
1813 pci_msi_device_blacklisted(device_t dev)
1814 {
1815 	struct pci_quirk *q;
1816 
1817 	if (!pci_honor_msi_blacklist)
1818 		return (0);
1819 
1820 	for (q = &pci_quirks[0]; q->devid; q++) {
1821 		if (q->devid == pci_get_devid(dev) &&
1822 		    q->type == PCI_QUIRK_DISABLE_MSI)
1823 			return (1);
1824 	}
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1830  * we just check for blacklisted chipsets as represented by the
1831  * host-PCI bridge at device 0:0:0.  In the future, it may become
1832  * necessary to check other system attributes, such as the kenv values
1833  * that give the motherboard manufacturer and model number.
1834  */
1835 static int
1836 pci_msi_blacklisted(void)
1837 {
1838 	device_t dev;
1839 
1840 	if (!pci_honor_msi_blacklist)
1841 		return (0);
1842 
1843 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1844 	if (!(pcie_chipset || pcix_chipset))
1845 		return (1);
1846 
1847 	dev = pci_find_bsf(0, 0, 0);
1848 	if (dev != NULL)
1849 		return (pci_msi_device_blacklisted(dev));
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Attempt to allocate count MSI messages on start_cpuid.
1855  *
1856  * If start_cpuid < 0, then the MSI messages' target CPU will be
1857  * selected automaticly.
1858  *
1859  * If the caller explicitly specified the MSI messages' target CPU,
1860  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1861  * messages on the specified CPU, if the allocation fails due to MD
1862  * does not have enough vectors (EMSGSIZE), then we will try next
1863  * available CPU, until the allocation fails on all CPUs.
1864  *
1865  * EMSGSIZE will be returned, if all available CPUs does not have
1866  * enough vectors for the requested amount of MSI messages.  Caller
1867  * should either reduce the amount of MSI messages to be requested,
1868  * or simply giving up using MSI.
1869  *
1870  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1871  * returned in 'rid' array, if the allocation succeeds.
1872  */
1873 int
1874 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1875     int start_cpuid)
1876 {
1877 	struct pci_devinfo *dinfo = device_get_ivars(child);
1878 	pcicfgregs *cfg = &dinfo->cfg;
1879 	struct resource_list_entry *rle;
1880 	int error, i, irqs[32], cpuid = 0;
1881 	uint16_t ctrl;
1882 
1883 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1884 	    ("invalid MSI count %d", count));
1885 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1886 
1887 	/* If rid 0 is allocated, then fail. */
1888 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1889 	if (rle != NULL && rle->res != NULL)
1890 		return (ENXIO);
1891 
1892 	/* Already have allocated messages? */
1893 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1894 		return (ENXIO);
1895 
1896 	/* If MSI is blacklisted for this system, fail. */
1897 	if (pci_msi_blacklisted())
1898 		return (ENXIO);
1899 
1900 	/* MSI capability present? */
1901 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1902 	    !pci_do_msi)
1903 		return (ENODEV);
1904 
1905 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1906 	    count, cfg->msi.msi_msgnum));
1907 
1908 	if (bootverbose) {
1909 		device_printf(child,
1910 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1911 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1912 	}
1913 
1914 	if (start_cpuid < 0)
1915 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1916 
1917 	error = EINVAL;
1918 	for (i = 0; i < ncpus; ++i) {
1919 		cpuid = (start_cpuid + i) % ncpus;
1920 
1921 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1922 		    cfg->msi.msi_msgnum, irqs, cpuid);
1923 		if (error == 0)
1924 			break;
1925 		else if (error != EMSGSIZE)
1926 			return error;
1927 	}
1928 	if (error)
1929 		return error;
1930 
1931 	/*
1932 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1933 	 * the irqs[] array, so add new resources starting at rid 1.
1934 	 */
1935 	for (i = 0; i < count; i++) {
1936 		rid[i] = i + 1;
1937 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1938 		    irqs[i], irqs[i], 1, cpuid);
1939 	}
1940 
1941 	if (bootverbose) {
1942 		if (count == 1) {
1943 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1944 			    irqs[0], cpuid);
1945 		} else {
1946 			int run;
1947 
1948 			/*
1949 			 * Be fancy and try to print contiguous runs
1950 			 * of IRQ values as ranges.  'run' is true if
1951 			 * we are in a range.
1952 			 */
1953 			device_printf(child, "using IRQs %d", irqs[0]);
1954 			run = 0;
1955 			for (i = 1; i < count; i++) {
1956 
1957 				/* Still in a run? */
1958 				if (irqs[i] == irqs[i - 1] + 1) {
1959 					run = 1;
1960 					continue;
1961 				}
1962 
1963 				/* Finish previous range. */
1964 				if (run) {
1965 					kprintf("-%d", irqs[i - 1]);
1966 					run = 0;
1967 				}
1968 
1969 				/* Start new range. */
1970 				kprintf(",%d", irqs[i]);
1971 			}
1972 
1973 			/* Unfinished range? */
1974 			if (run)
1975 				kprintf("-%d", irqs[count - 1]);
1976 			kprintf(" for MSI on cpu%d\n", cpuid);
1977 		}
1978 	}
1979 
1980 	/* Update control register with count. */
1981 	ctrl = cfg->msi.msi_ctrl;
1982 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1983 	ctrl |= (ffs(count) - 1) << 4;
1984 	cfg->msi.msi_ctrl = ctrl;
1985 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1986 
1987 	/* Update counts of alloc'd messages. */
1988 	cfg->msi.msi_alloc = count;
1989 	cfg->msi.msi_handlers = 0;
1990 	return (0);
1991 }
1992 
1993 /* Release the MSI messages associated with this device. */
1994 int
1995 pci_release_msi_method(device_t dev, device_t child)
1996 {
1997 	struct pci_devinfo *dinfo = device_get_ivars(child);
1998 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1999 	struct resource_list_entry *rle;
2000 	int i, irqs[32], cpuid = -1;
2001 
2002 	/* Do we have any messages to release? */
2003 	if (msi->msi_alloc == 0)
2004 		return (ENODEV);
2005 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2006 
2007 	/* Make sure none of the resources are allocated. */
2008 	if (msi->msi_handlers > 0)
2009 		return (EBUSY);
2010 	for (i = 0; i < msi->msi_alloc; i++) {
2011 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2012 		KASSERT(rle != NULL, ("missing MSI resource"));
2013 		if (rle->res != NULL)
2014 			return (EBUSY);
2015 		if (i == 0) {
2016 			cpuid = rle->cpuid;
2017 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2018 			    ("invalid MSI target cpuid %d", cpuid));
2019 		} else {
2020 			KASSERT(rle->cpuid == cpuid,
2021 			    ("MSI targets different cpus, "
2022 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2023 		}
2024 		irqs[i] = rle->start;
2025 	}
2026 
2027 	/* Update control register with 0 count. */
2028 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2029 	    ("%s: MSI still enabled", __func__));
2030 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2031 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2032 	    msi->msi_ctrl, 2);
2033 
2034 	/* Release the messages. */
2035 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2036 	    cpuid);
2037 	for (i = 0; i < msi->msi_alloc; i++)
2038 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 
2040 	/* Update alloc count. */
2041 	msi->msi_alloc = 0;
2042 	msi->msi_addr = 0;
2043 	msi->msi_data = 0;
2044 	return (0);
2045 }
2046 
2047 /*
2048  * Return the max supported MSI messages this device supports.
2049  * Basically, assuming the MD code can alloc messages, this function
2050  * should return the maximum value that pci_alloc_msi() can return.
2051  * Thus, it is subject to the tunables, etc.
2052  */
2053 int
2054 pci_msi_count_method(device_t dev, device_t child)
2055 {
2056 	struct pci_devinfo *dinfo = device_get_ivars(child);
2057 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2058 
2059 	if (pci_do_msi && msi->msi_location != 0)
2060 		return (msi->msi_msgnum);
2061 	return (0);
2062 }
2063 
2064 /* kfree pcicfgregs structure and all depending data structures */
2065 
2066 int
2067 pci_freecfg(struct pci_devinfo *dinfo)
2068 {
2069 	struct devlist *devlist_head;
2070 	int i;
2071 
2072 	devlist_head = &pci_devq;
2073 
2074 	if (dinfo->cfg.vpd.vpd_reg) {
2075 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2076 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2077 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2078 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2079 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2080 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2081 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2082 	}
2083 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2084 	kfree(dinfo, M_DEVBUF);
2085 
2086 	/* increment the generation count */
2087 	pci_generation++;
2088 
2089 	/* we're losing one device */
2090 	pci_numdevs--;
2091 	return (0);
2092 }
2093 
2094 /*
2095  * PCI power manangement
2096  */
2097 int
2098 pci_set_powerstate_method(device_t dev, device_t child, int state)
2099 {
2100 	struct pci_devinfo *dinfo = device_get_ivars(child);
2101 	pcicfgregs *cfg = &dinfo->cfg;
2102 	uint16_t status;
2103 	int result, oldstate, highest, delay;
2104 
2105 	if (cfg->pp.pp_cap == 0)
2106 		return (EOPNOTSUPP);
2107 
2108 	/*
2109 	 * Optimize a no state change request away.  While it would be OK to
2110 	 * write to the hardware in theory, some devices have shown odd
2111 	 * behavior when going from D3 -> D3.
2112 	 */
2113 	oldstate = pci_get_powerstate(child);
2114 	if (oldstate == state)
2115 		return (0);
2116 
2117 	/*
2118 	 * The PCI power management specification states that after a state
2119 	 * transition between PCI power states, system software must
2120 	 * guarantee a minimal delay before the function accesses the device.
2121 	 * Compute the worst case delay that we need to guarantee before we
2122 	 * access the device.  Many devices will be responsive much more
2123 	 * quickly than this delay, but there are some that don't respond
2124 	 * instantly to state changes.  Transitions to/from D3 state require
2125 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2126 	 * is done below with DELAY rather than a sleeper function because
2127 	 * this function can be called from contexts where we cannot sleep.
2128 	 */
2129 	highest = (oldstate > state) ? oldstate : state;
2130 	if (highest == PCI_POWERSTATE_D3)
2131 	    delay = 10000;
2132 	else if (highest == PCI_POWERSTATE_D2)
2133 	    delay = 200;
2134 	else
2135 	    delay = 0;
2136 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2137 	    & ~PCIM_PSTAT_DMASK;
2138 	result = 0;
2139 	switch (state) {
2140 	case PCI_POWERSTATE_D0:
2141 		status |= PCIM_PSTAT_D0;
2142 		break;
2143 	case PCI_POWERSTATE_D1:
2144 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2145 			return (EOPNOTSUPP);
2146 		status |= PCIM_PSTAT_D1;
2147 		break;
2148 	case PCI_POWERSTATE_D2:
2149 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2150 			return (EOPNOTSUPP);
2151 		status |= PCIM_PSTAT_D2;
2152 		break;
2153 	case PCI_POWERSTATE_D3:
2154 		status |= PCIM_PSTAT_D3;
2155 		break;
2156 	default:
2157 		return (EINVAL);
2158 	}
2159 
2160 	if (bootverbose)
2161 		kprintf(
2162 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2163 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2164 		    dinfo->cfg.func, oldstate, state);
2165 
2166 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2167 	if (delay)
2168 		DELAY(delay);
2169 	return (0);
2170 }
2171 
2172 int
2173 pci_get_powerstate_method(device_t dev, device_t child)
2174 {
2175 	struct pci_devinfo *dinfo = device_get_ivars(child);
2176 	pcicfgregs *cfg = &dinfo->cfg;
2177 	uint16_t status;
2178 	int result;
2179 
2180 	if (cfg->pp.pp_cap != 0) {
2181 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2182 		switch (status & PCIM_PSTAT_DMASK) {
2183 		case PCIM_PSTAT_D0:
2184 			result = PCI_POWERSTATE_D0;
2185 			break;
2186 		case PCIM_PSTAT_D1:
2187 			result = PCI_POWERSTATE_D1;
2188 			break;
2189 		case PCIM_PSTAT_D2:
2190 			result = PCI_POWERSTATE_D2;
2191 			break;
2192 		case PCIM_PSTAT_D3:
2193 			result = PCI_POWERSTATE_D3;
2194 			break;
2195 		default:
2196 			result = PCI_POWERSTATE_UNKNOWN;
2197 			break;
2198 		}
2199 	} else {
2200 		/* No support, device is always at D0 */
2201 		result = PCI_POWERSTATE_D0;
2202 	}
2203 	return (result);
2204 }
2205 
2206 /*
2207  * Some convenience functions for PCI device drivers.
2208  */
2209 
2210 static __inline void
2211 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2212 {
2213 	uint16_t	command;
2214 
2215 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2216 	command |= bit;
2217 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2218 }
2219 
2220 static __inline void
2221 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2222 {
2223 	uint16_t	command;
2224 
2225 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2226 	command &= ~bit;
2227 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2228 }
2229 
2230 int
2231 pci_enable_busmaster_method(device_t dev, device_t child)
2232 {
2233 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2234 	return (0);
2235 }
2236 
2237 int
2238 pci_disable_busmaster_method(device_t dev, device_t child)
2239 {
2240 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2241 	return (0);
2242 }
2243 
2244 int
2245 pci_enable_io_method(device_t dev, device_t child, int space)
2246 {
2247 	uint16_t command;
2248 	uint16_t bit;
2249 	char *error;
2250 
2251 	bit = 0;
2252 	error = NULL;
2253 
2254 	switch(space) {
2255 	case SYS_RES_IOPORT:
2256 		bit = PCIM_CMD_PORTEN;
2257 		error = "port";
2258 		break;
2259 	case SYS_RES_MEMORY:
2260 		bit = PCIM_CMD_MEMEN;
2261 		error = "memory";
2262 		break;
2263 	default:
2264 		return (EINVAL);
2265 	}
2266 	pci_set_command_bit(dev, child, bit);
2267 	/* Some devices seem to need a brief stall here, what do to? */
2268 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2269 	if (command & bit)
2270 		return (0);
2271 	device_printf(child, "failed to enable %s mapping!\n", error);
2272 	return (ENXIO);
2273 }
2274 
2275 int
2276 pci_disable_io_method(device_t dev, device_t child, int space)
2277 {
2278 	uint16_t command;
2279 	uint16_t bit;
2280 	char *error;
2281 
2282 	bit = 0;
2283 	error = NULL;
2284 
2285 	switch(space) {
2286 	case SYS_RES_IOPORT:
2287 		bit = PCIM_CMD_PORTEN;
2288 		error = "port";
2289 		break;
2290 	case SYS_RES_MEMORY:
2291 		bit = PCIM_CMD_MEMEN;
2292 		error = "memory";
2293 		break;
2294 	default:
2295 		return (EINVAL);
2296 	}
2297 	pci_clear_command_bit(dev, child, bit);
2298 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2299 	if (command & bit) {
2300 		device_printf(child, "failed to disable %s mapping!\n", error);
2301 		return (ENXIO);
2302 	}
2303 	return (0);
2304 }
2305 
2306 /*
2307  * New style pci driver.  Parent device is either a pci-host-bridge or a
2308  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2309  */
2310 
2311 void
2312 pci_print_verbose(struct pci_devinfo *dinfo)
2313 {
2314 
2315 	if (bootverbose) {
2316 		pcicfgregs *cfg = &dinfo->cfg;
2317 
2318 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2319 		    cfg->vendor, cfg->device, cfg->revid);
2320 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2321 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2322 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2323 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2324 		    cfg->mfdev);
2325 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2326 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2327 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2328 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2329 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2330 		if (cfg->intpin > 0)
2331 			kprintf("\tintpin=%c, irq=%d\n",
2332 			    cfg->intpin +'a' -1, cfg->intline);
2333 		if (cfg->pp.pp_cap) {
2334 			uint16_t status;
2335 
2336 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2337 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2338 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2339 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2340 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2341 			    status & PCIM_PSTAT_DMASK);
2342 		}
2343 		if (cfg->msi.msi_location) {
2344 			int ctrl;
2345 
2346 			ctrl = cfg->msi.msi_ctrl;
2347 			kprintf("\tMSI supports %d message%s%s%s\n",
2348 			    cfg->msi.msi_msgnum,
2349 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2350 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2351 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2352 		}
2353 		if (cfg->msix.msix_location) {
2354 			kprintf("\tMSI-X supports %d message%s ",
2355 			    cfg->msix.msix_msgnum,
2356 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2357 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2358 				kprintf("in map 0x%x\n",
2359 				    cfg->msix.msix_table_bar);
2360 			else
2361 				kprintf("in maps 0x%x and 0x%x\n",
2362 				    cfg->msix.msix_table_bar,
2363 				    cfg->msix.msix_pba_bar);
2364 		}
2365 		pci_print_verbose_expr(cfg);
2366 	}
2367 }
2368 
2369 static void
2370 pci_print_verbose_expr(const pcicfgregs *cfg)
2371 {
2372 	const struct pcicfg_expr *expr = &cfg->expr;
2373 	const char *port_name;
2374 	uint16_t port_type;
2375 
2376 	if (!bootverbose)
2377 		return;
2378 
2379 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2380 		return;
2381 
2382 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2383 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2384 
2385 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2386 
2387 	switch (port_type) {
2388 	case PCIE_END_POINT:
2389 		port_name = "DEVICE";
2390 		break;
2391 	case PCIE_LEG_END_POINT:
2392 		port_name = "LEGDEV";
2393 		break;
2394 	case PCIE_ROOT_PORT:
2395 		port_name = "ROOT";
2396 		break;
2397 	case PCIE_UP_STREAM_PORT:
2398 		port_name = "UPSTREAM";
2399 		break;
2400 	case PCIE_DOWN_STREAM_PORT:
2401 		port_name = "DOWNSTRM";
2402 		break;
2403 	case PCIE_PCIE2PCI_BRIDGE:
2404 		port_name = "PCIE2PCI";
2405 		break;
2406 	case PCIE_PCI2PCIE_BRIDGE:
2407 		port_name = "PCI2PCIE";
2408 		break;
2409 	case PCIE_ROOT_END_POINT:
2410 		port_name = "ROOTDEV";
2411 		break;
2412 	case PCIE_ROOT_EVT_COLL:
2413 		port_name = "ROOTEVTC";
2414 		break;
2415 	default:
2416 		port_name = NULL;
2417 		break;
2418 	}
2419 	if ((port_type == PCIE_ROOT_PORT ||
2420 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2421 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2422 		port_name = NULL;
2423 	if (port_name != NULL)
2424 		kprintf("[%s]", port_name);
2425 
2426 	if (pcie_slotimpl(cfg)) {
2427 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2428 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2429 			kprintf("[HOTPLUG]");
2430 	}
2431 	kprintf("\n");
2432 }
2433 
2434 static int
2435 pci_porten(device_t pcib, int b, int s, int f)
2436 {
2437 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2438 		& PCIM_CMD_PORTEN) != 0;
2439 }
2440 
2441 static int
2442 pci_memen(device_t pcib, int b, int s, int f)
2443 {
2444 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2445 		& PCIM_CMD_MEMEN) != 0;
2446 }
2447 
2448 /*
2449  * Add a resource based on a pci map register. Return 1 if the map
2450  * register is a 32bit map register or 2 if it is a 64bit register.
2451  */
2452 static int
2453 pci_add_map(device_t pcib, device_t bus, device_t dev,
2454     int b, int s, int f, int reg, struct resource_list *rl, int force,
2455     int prefetch)
2456 {
2457 	uint32_t map;
2458 	uint16_t old_cmd;
2459 	pci_addr_t base;
2460 	pci_addr_t start, end, count;
2461 	uint8_t ln2size;
2462 	uint8_t ln2range;
2463 	uint32_t testval;
2464 	uint16_t cmd;
2465 	int type;
2466 	int barlen;
2467 	struct resource *res;
2468 
2469 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2470 
2471         /* Disable access to device memory */
2472 	old_cmd = 0;
2473 	if (PCI_BAR_MEM(map)) {
2474 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2475 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2476 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2477 	}
2478 
2479 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2480 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2481 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2482 
2483         /* Restore memory access mode */
2484 	if (PCI_BAR_MEM(map)) {
2485 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2486 	}
2487 
2488 	if (PCI_BAR_MEM(map)) {
2489 		type = SYS_RES_MEMORY;
2490 		if (map & PCIM_BAR_MEM_PREFETCH)
2491 			prefetch = 1;
2492 	} else
2493 		type = SYS_RES_IOPORT;
2494 	ln2size = pci_mapsize(testval);
2495 	ln2range = pci_maprange(testval);
2496 	base = pci_mapbase(map);
2497 	barlen = ln2range == 64 ? 2 : 1;
2498 
2499 	/*
2500 	 * For I/O registers, if bottom bit is set, and the next bit up
2501 	 * isn't clear, we know we have a BAR that doesn't conform to the
2502 	 * spec, so ignore it.  Also, sanity check the size of the data
2503 	 * areas to the type of memory involved.  Memory must be at least
2504 	 * 16 bytes in size, while I/O ranges must be at least 4.
2505 	 */
2506 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2507 		return (barlen);
2508 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2509 	    (type == SYS_RES_IOPORT && ln2size < 2))
2510 		return (barlen);
2511 
2512 	if (ln2range == 64)
2513 		/* Read the other half of a 64bit map register */
2514 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2515 	if (bootverbose) {
2516 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2517 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2518 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2519 			kprintf(", port disabled\n");
2520 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2521 			kprintf(", memory disabled\n");
2522 		else
2523 			kprintf(", enabled\n");
2524 	}
2525 
2526 	/*
2527 	 * If base is 0, then we have problems.  It is best to ignore
2528 	 * such entries for the moment.  These will be allocated later if
2529 	 * the driver specifically requests them.  However, some
2530 	 * removable busses look better when all resources are allocated,
2531 	 * so allow '0' to be overriden.
2532 	 *
2533 	 * Similarly treat maps whose values is the same as the test value
2534 	 * read back.  These maps have had all f's written to them by the
2535 	 * BIOS in an attempt to disable the resources.
2536 	 */
2537 	if (!force && (base == 0 || map == testval))
2538 		return (barlen);
2539 	if ((u_long)base != base) {
2540 		device_printf(bus,
2541 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2542 		    pci_get_domain(dev), b, s, f, reg);
2543 		return (barlen);
2544 	}
2545 
2546 	/*
2547 	 * This code theoretically does the right thing, but has
2548 	 * undesirable side effects in some cases where peripherals
2549 	 * respond oddly to having these bits enabled.  Let the user
2550 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2551 	 * default).
2552 	 */
2553 	if (pci_enable_io_modes) {
2554 		/* Turn on resources that have been left off by a lazy BIOS */
2555 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2556 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2557 			cmd |= PCIM_CMD_PORTEN;
2558 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2559 		}
2560 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2561 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2562 			cmd |= PCIM_CMD_MEMEN;
2563 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2564 		}
2565 	} else {
2566 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2567 			return (barlen);
2568 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2569 			return (barlen);
2570 	}
2571 
2572 	count = 1 << ln2size;
2573 	if (base == 0 || base == pci_mapbase(testval)) {
2574 		start = 0;	/* Let the parent decide. */
2575 		end = ~0ULL;
2576 	} else {
2577 		start = base;
2578 		end = base + (1 << ln2size) - 1;
2579 	}
2580 	resource_list_add(rl, type, reg, start, end, count, -1);
2581 
2582 	/*
2583 	 * Try to allocate the resource for this BAR from our parent
2584 	 * so that this resource range is already reserved.  The
2585 	 * driver for this device will later inherit this resource in
2586 	 * pci_alloc_resource().
2587 	 */
2588 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2589 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2590 	if (res == NULL) {
2591 		/*
2592 		 * If the allocation fails, delete the resource list
2593 		 * entry to force pci_alloc_resource() to allocate
2594 		 * resources from the parent.
2595 		 */
2596 		resource_list_delete(rl, type, reg);
2597 #ifdef PCI_BAR_CLEAR
2598 		/* Clear the BAR */
2599 		start = 0;
2600 #else	/* !PCI_BAR_CLEAR */
2601 		/*
2602 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2603 		 * PCI function, clearing the BAR causes HPET timer
2604 		 * stop ticking.
2605 		 */
2606 		if (bootverbose) {
2607 			kprintf("pci:%d:%d:%d: resource reservation failed "
2608 				"%#jx - %#jx\n", b, s, f,
2609 				(intmax_t)start, (intmax_t)end);
2610 		}
2611 		return (barlen);
2612 #endif	/* PCI_BAR_CLEAR */
2613 	} else {
2614 		start = rman_get_start(res);
2615 	}
2616 	pci_write_config(dev, reg, start, 4);
2617 	if (ln2range == 64)
2618 		pci_write_config(dev, reg + 4, start >> 32, 4);
2619 	return (barlen);
2620 }
2621 
2622 /*
2623  * For ATA devices we need to decide early what addressing mode to use.
2624  * Legacy demands that the primary and secondary ATA ports sits on the
2625  * same addresses that old ISA hardware did. This dictates that we use
2626  * those addresses and ignore the BAR's if we cannot set PCI native
2627  * addressing mode.
2628  */
2629 static void
2630 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2631     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2632 {
2633 	int rid, type, progif;
2634 #if 0
2635 	/* if this device supports PCI native addressing use it */
2636 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2637 	if ((progif & 0x8a) == 0x8a) {
2638 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2639 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2640 			kprintf("Trying ATA native PCI addressing mode\n");
2641 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2642 		}
2643 	}
2644 #endif
2645 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2646 	type = SYS_RES_IOPORT;
2647 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2648 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2649 		    prefetchmask & (1 << 0));
2650 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2651 		    prefetchmask & (1 << 1));
2652 	} else {
2653 		rid = PCIR_BAR(0);
2654 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2655 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2656 		    0, -1);
2657 		rid = PCIR_BAR(1);
2658 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2659 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2660 		    0, -1);
2661 	}
2662 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2663 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2664 		    prefetchmask & (1 << 2));
2665 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2666 		    prefetchmask & (1 << 3));
2667 	} else {
2668 		rid = PCIR_BAR(2);
2669 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2670 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2671 		    0, -1);
2672 		rid = PCIR_BAR(3);
2673 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2674 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2675 		    0, -1);
2676 	}
2677 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2678 	    prefetchmask & (1 << 4));
2679 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2680 	    prefetchmask & (1 << 5));
2681 }
2682 
2683 static void
2684 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2685 {
2686 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2687 	pcicfgregs *cfg = &dinfo->cfg;
2688 	char tunable_name[64];
2689 	int irq;
2690 
2691 	/* Has to have an intpin to have an interrupt. */
2692 	if (cfg->intpin == 0)
2693 		return;
2694 
2695 	/* Let the user override the IRQ with a tunable. */
2696 	irq = PCI_INVALID_IRQ;
2697 	ksnprintf(tunable_name, sizeof(tunable_name),
2698 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2699 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2700 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2701 		if (irq >= 255 || irq <= 0) {
2702 			irq = PCI_INVALID_IRQ;
2703 		} else {
2704 			if (machintr_legacy_intr_find(irq,
2705 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2706 				device_printf(dev,
2707 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2708 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2709 				    cfg->intpin + 'A' - 1, irq);
2710 				irq = PCI_INVALID_IRQ;
2711 			} else {
2712 				BUS_CONFIG_INTR(bus, dev, irq,
2713 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2714 			}
2715 		}
2716 	}
2717 
2718 	/*
2719 	 * If we didn't get an IRQ via the tunable, then we either use the
2720 	 * IRQ value in the intline register or we ask the bus to route an
2721 	 * interrupt for us.  If force_route is true, then we only use the
2722 	 * value in the intline register if the bus was unable to assign an
2723 	 * IRQ.
2724 	 */
2725 	if (!PCI_INTERRUPT_VALID(irq)) {
2726 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2727 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2728 		if (!PCI_INTERRUPT_VALID(irq))
2729 			irq = cfg->intline;
2730 	}
2731 
2732 	/* If after all that we don't have an IRQ, just bail. */
2733 	if (!PCI_INTERRUPT_VALID(irq))
2734 		return;
2735 
2736 	/* Update the config register if it changed. */
2737 	if (irq != cfg->intline) {
2738 		cfg->intline = irq;
2739 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2740 	}
2741 
2742 	/* Add this IRQ as rid 0 interrupt resource. */
2743 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2744 	    machintr_legacy_intr_cpuid(irq));
2745 }
2746 
2747 void
2748 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2749 {
2750 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2751 	pcicfgregs *cfg = &dinfo->cfg;
2752 	struct resource_list *rl = &dinfo->resources;
2753 	struct pci_quirk *q;
2754 	int b, i, f, s;
2755 
2756 	b = cfg->bus;
2757 	s = cfg->slot;
2758 	f = cfg->func;
2759 
2760 	/* ATA devices needs special map treatment */
2761 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2762 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2763 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2764 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2765 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2766 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2767 	else
2768 		for (i = 0; i < cfg->nummaps;)
2769 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2770 			    rl, force, prefetchmask & (1 << i));
2771 
2772 	/*
2773 	 * Add additional, quirked resources.
2774 	 */
2775 	for (q = &pci_quirks[0]; q->devid; q++) {
2776 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2777 		    && q->type == PCI_QUIRK_MAP_REG)
2778 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2779 			  force, 0);
2780 	}
2781 
2782 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2783 		/*
2784 		 * Try to re-route interrupts. Sometimes the BIOS or
2785 		 * firmware may leave bogus values in these registers.
2786 		 * If the re-route fails, then just stick with what we
2787 		 * have.
2788 		 */
2789 		pci_assign_interrupt(bus, dev, 1);
2790 	}
2791 }
2792 
2793 void
2794 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2795 {
2796 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2797 	device_t pcib = device_get_parent(dev);
2798 	struct pci_devinfo *dinfo;
2799 	int maxslots;
2800 	int s, f, pcifunchigh;
2801 	uint8_t hdrtype;
2802 
2803 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2804 	    ("dinfo_size too small"));
2805 	maxslots = PCIB_MAXSLOTS(pcib);
2806 	for (s = 0; s <= maxslots; s++) {
2807 		pcifunchigh = 0;
2808 		f = 0;
2809 		DELAY(1);
2810 		hdrtype = REG(PCIR_HDRTYPE, 1);
2811 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2812 			continue;
2813 		if (hdrtype & PCIM_MFDEV)
2814 			pcifunchigh = PCI_FUNCMAX;
2815 		for (f = 0; f <= pcifunchigh; f++) {
2816 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2817 			    dinfo_size);
2818 			if (dinfo != NULL) {
2819 				pci_add_child(dev, dinfo);
2820 			}
2821 		}
2822 	}
2823 #undef REG
2824 }
2825 
2826 void
2827 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2828 {
2829 	device_t pcib;
2830 
2831 	pcib = device_get_parent(bus);
2832 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2833 	device_set_ivars(dinfo->cfg.dev, dinfo);
2834 	resource_list_init(&dinfo->resources);
2835 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2836 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2837 	pci_print_verbose(dinfo);
2838 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2839 }
2840 
2841 static int
2842 pci_probe(device_t dev)
2843 {
2844 	device_set_desc(dev, "PCI bus");
2845 
2846 	/* Allow other subclasses to override this driver. */
2847 	return (-1000);
2848 }
2849 
2850 static int
2851 pci_attach(device_t dev)
2852 {
2853 	int busno, domain;
2854 
2855 	/*
2856 	 * Since there can be multiple independantly numbered PCI
2857 	 * busses on systems with multiple PCI domains, we can't use
2858 	 * the unit number to decide which bus we are probing. We ask
2859 	 * the parent pcib what our domain and bus numbers are.
2860 	 */
2861 	domain = pcib_get_domain(dev);
2862 	busno = pcib_get_bus(dev);
2863 	if (bootverbose)
2864 		device_printf(dev, "domain=%d, physical bus=%d\n",
2865 		    domain, busno);
2866 
2867 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2868 
2869 	return (bus_generic_attach(dev));
2870 }
2871 
2872 int
2873 pci_suspend(device_t dev)
2874 {
2875 	int dstate, error, i, numdevs;
2876 	device_t acpi_dev, child, *devlist;
2877 	struct pci_devinfo *dinfo;
2878 
2879 	/*
2880 	 * Save the PCI configuration space for each child and set the
2881 	 * device in the appropriate power state for this sleep state.
2882 	 */
2883 	acpi_dev = NULL;
2884 	if (pci_do_power_resume)
2885 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2886 	device_get_children(dev, &devlist, &numdevs);
2887 	for (i = 0; i < numdevs; i++) {
2888 		child = devlist[i];
2889 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2890 		pci_cfg_save(child, dinfo, 0);
2891 	}
2892 
2893 	/* Suspend devices before potentially powering them down. */
2894 	error = bus_generic_suspend(dev);
2895 	if (error) {
2896 		kfree(devlist, M_TEMP);
2897 		return (error);
2898 	}
2899 
2900 	/*
2901 	 * Always set the device to D3.  If ACPI suggests a different
2902 	 * power state, use it instead.  If ACPI is not present, the
2903 	 * firmware is responsible for managing device power.  Skip
2904 	 * children who aren't attached since they are powered down
2905 	 * separately.  Only manage type 0 devices for now.
2906 	 */
2907 	for (i = 0; acpi_dev && i < numdevs; i++) {
2908 		child = devlist[i];
2909 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2910 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2911 			dstate = PCI_POWERSTATE_D3;
2912 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2913 			pci_set_powerstate(child, dstate);
2914 		}
2915 	}
2916 	kfree(devlist, M_TEMP);
2917 	return (0);
2918 }
2919 
2920 int
2921 pci_resume(device_t dev)
2922 {
2923 	int i, numdevs;
2924 	device_t acpi_dev, child, *devlist;
2925 	struct pci_devinfo *dinfo;
2926 
2927 	/*
2928 	 * Set each child to D0 and restore its PCI configuration space.
2929 	 */
2930 	acpi_dev = NULL;
2931 	if (pci_do_power_resume)
2932 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2933 	device_get_children(dev, &devlist, &numdevs);
2934 	for (i = 0; i < numdevs; i++) {
2935 		/*
2936 		 * Notify ACPI we're going to D0 but ignore the result.  If
2937 		 * ACPI is not present, the firmware is responsible for
2938 		 * managing device power.  Only manage type 0 devices for now.
2939 		 */
2940 		child = devlist[i];
2941 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2942 		if (acpi_dev && device_is_attached(child) &&
2943 		    dinfo->cfg.hdrtype == 0) {
2944 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2945 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2946 		}
2947 
2948 		/* Now the device is powered up, restore its config space. */
2949 		pci_cfg_restore(child, dinfo);
2950 	}
2951 	kfree(devlist, M_TEMP);
2952 	return (bus_generic_resume(dev));
2953 }
2954 
2955 static void
2956 pci_load_vendor_data(void)
2957 {
2958 	caddr_t vendordata, info;
2959 
2960 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2961 		info = preload_search_info(vendordata, MODINFO_ADDR);
2962 		pci_vendordata = *(char **)info;
2963 		info = preload_search_info(vendordata, MODINFO_SIZE);
2964 		pci_vendordata_size = *(size_t *)info;
2965 		/* terminate the database */
2966 		pci_vendordata[pci_vendordata_size] = '\n';
2967 	}
2968 }
2969 
2970 void
2971 pci_driver_added(device_t dev, driver_t *driver)
2972 {
2973 	int numdevs;
2974 	device_t *devlist;
2975 	device_t child;
2976 	struct pci_devinfo *dinfo;
2977 	int i;
2978 
2979 	if (bootverbose)
2980 		device_printf(dev, "driver added\n");
2981 	DEVICE_IDENTIFY(driver, dev);
2982 	device_get_children(dev, &devlist, &numdevs);
2983 	for (i = 0; i < numdevs; i++) {
2984 		child = devlist[i];
2985 		if (device_get_state(child) != DS_NOTPRESENT)
2986 			continue;
2987 		dinfo = device_get_ivars(child);
2988 		pci_print_verbose(dinfo);
2989 		if (bootverbose)
2990 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
2991 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2992 			    dinfo->cfg.func);
2993 		pci_cfg_restore(child, dinfo);
2994 		if (device_probe_and_attach(child) != 0)
2995 			pci_cfg_save(child, dinfo, 1);
2996 	}
2997 	kfree(devlist, M_TEMP);
2998 }
2999 
3000 static void
3001 pci_child_detached(device_t parent __unused, device_t child)
3002 {
3003 	/* Turn child's power off */
3004 	pci_cfg_save(child, device_get_ivars(child), 1);
3005 }
3006 
3007 int
3008 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3009     driver_intr_t *intr, void *arg, void **cookiep,
3010     lwkt_serialize_t serializer, const char *desc)
3011 {
3012 	int rid, error;
3013 	void *cookie;
3014 
3015 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3016 	    arg, &cookie, serializer, desc);
3017 	if (error)
3018 		return (error);
3019 
3020 	/* If this is not a direct child, just bail out. */
3021 	if (device_get_parent(child) != dev) {
3022 		*cookiep = cookie;
3023 		return(0);
3024 	}
3025 
3026 	rid = rman_get_rid(irq);
3027 	if (rid == 0) {
3028 		/* Make sure that INTx is enabled */
3029 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3030 	} else {
3031 		struct pci_devinfo *dinfo = device_get_ivars(child);
3032 		uint64_t addr;
3033 		uint32_t data;
3034 
3035 		/*
3036 		 * Check to see if the interrupt is MSI or MSI-X.
3037 		 * Ask our parent to map the MSI and give
3038 		 * us the address and data register values.
3039 		 * If we fail for some reason, teardown the
3040 		 * interrupt handler.
3041 		 */
3042 		if (dinfo->cfg.msi.msi_alloc > 0) {
3043 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3044 
3045 			if (msi->msi_addr == 0) {
3046 				KASSERT(msi->msi_handlers == 0,
3047 			    ("MSI has handlers, but vectors not mapped"));
3048 				error = PCIB_MAP_MSI(device_get_parent(dev),
3049 				    child, rman_get_start(irq), &addr, &data,
3050 				    rman_get_cpuid(irq));
3051 				if (error)
3052 					goto bad;
3053 				msi->msi_addr = addr;
3054 				msi->msi_data = data;
3055 				pci_enable_msi(child, addr, data);
3056 			}
3057 			msi->msi_handlers++;
3058 		} else {
3059 			struct msix_vector *mv;
3060 			u_int vector;
3061 
3062 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3063 			    ("No MSI-X or MSI rid %d allocated", rid));
3064 
3065 			mv = pci_find_msix_vector(child, rid);
3066 			KASSERT(mv != NULL,
3067 			    ("MSI-X rid %d is not allocated", rid));
3068 			KASSERT(mv->mv_address == 0,
3069 			    ("MSI-X rid %d has been setup", rid));
3070 
3071 			error = PCIB_MAP_MSI(device_get_parent(dev),
3072 			    child, rman_get_start(irq), &addr, &data,
3073 			    rman_get_cpuid(irq));
3074 			if (error)
3075 				goto bad;
3076 			mv->mv_address = addr;
3077 			mv->mv_data = data;
3078 
3079 			vector = PCI_MSIX_RID2VEC(rid);
3080 			pci_setup_msix_vector(child, vector,
3081 			    mv->mv_address, mv->mv_data);
3082 			pci_unmask_msix_vector(child, vector);
3083 		}
3084 
3085 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3086 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3087 	bad:
3088 		if (error) {
3089 			(void)bus_generic_teardown_intr(dev, child, irq,
3090 			    cookie);
3091 			return (error);
3092 		}
3093 	}
3094 	*cookiep = cookie;
3095 	return (0);
3096 }
3097 
3098 int
3099 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3100     void *cookie)
3101 {
3102 	int rid, error;
3103 
3104 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3105 		return (EINVAL);
3106 
3107 	/* If this isn't a direct child, just bail out */
3108 	if (device_get_parent(child) != dev)
3109 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3110 
3111 	rid = rman_get_rid(irq);
3112 	if (rid == 0) {
3113 		/* Mask INTx */
3114 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3115 	} else {
3116 		struct pci_devinfo *dinfo = device_get_ivars(child);
3117 
3118 		/*
3119 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3120 		 * decrement the appropriate handlers count and mask the
3121 		 * MSI-X message, or disable MSI messages if the count
3122 		 * drops to 0.
3123 		 */
3124 		if (dinfo->cfg.msi.msi_alloc > 0) {
3125 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3126 
3127 			KASSERT(rid <= msi->msi_alloc,
3128 			    ("MSI-X index too high"));
3129 			KASSERT(msi->msi_handlers > 0,
3130 			    ("MSI rid %d is not setup", rid));
3131 
3132 			msi->msi_handlers--;
3133 			if (msi->msi_handlers == 0)
3134 				pci_disable_msi(child);
3135 		} else {
3136 			struct msix_vector *mv;
3137 
3138 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3139 			    ("No MSI or MSI-X rid %d allocated", rid));
3140 
3141 			mv = pci_find_msix_vector(child, rid);
3142 			KASSERT(mv != NULL,
3143 			    ("MSI-X rid %d is not allocated", rid));
3144 			KASSERT(mv->mv_address != 0,
3145 			    ("MSI-X rid %d has not been setup", rid));
3146 
3147 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3148 			mv->mv_address = 0;
3149 			mv->mv_data = 0;
3150 		}
3151 	}
3152 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3153 	if (rid > 0)
3154 		KASSERT(error == 0,
3155 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3156 	return (error);
3157 }
3158 
3159 int
3160 pci_print_child(device_t dev, device_t child)
3161 {
3162 	struct pci_devinfo *dinfo;
3163 	struct resource_list *rl;
3164 	int retval = 0;
3165 
3166 	dinfo = device_get_ivars(child);
3167 	rl = &dinfo->resources;
3168 
3169 	retval += bus_print_child_header(dev, child);
3170 
3171 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3172 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3173 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3174 	if (device_get_flags(dev))
3175 		retval += kprintf(" flags %#x", device_get_flags(dev));
3176 
3177 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3178 	    pci_get_function(child));
3179 
3180 	retval += bus_print_child_footer(dev, child);
3181 
3182 	return (retval);
3183 }
3184 
3185 static struct
3186 {
3187 	int	class;
3188 	int	subclass;
3189 	char	*desc;
3190 } pci_nomatch_tab[] = {
3191 	{PCIC_OLD,		-1,			"old"},
3192 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3193 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3194 	{PCIC_STORAGE,		-1,			"mass storage"},
3195 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3196 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3197 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3198 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3199 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3200 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3201 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3202 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3203 	{PCIC_NETWORK,		-1,			"network"},
3204 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3205 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3206 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3207 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3208 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3209 	{PCIC_DISPLAY,		-1,			"display"},
3210 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3211 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3212 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3213 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3214 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3215 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3216 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3217 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3218 	{PCIC_MEMORY,		-1,			"memory"},
3219 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3220 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3221 	{PCIC_BRIDGE,		-1,			"bridge"},
3222 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3223 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3224 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3225 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3226 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3227 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3228 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3229 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3230 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3231 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3232 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3233 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3234 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3235 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3236 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3237 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3238 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3239 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3240 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3241 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3242 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3243 	{PCIC_INPUTDEV,		-1,			"input device"},
3244 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3245 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3246 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3247 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3248 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3249 	{PCIC_DOCKING,		-1,			"docking station"},
3250 	{PCIC_PROCESSOR,	-1,			"processor"},
3251 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3252 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3253 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3254 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3255 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3256 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3257 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3258 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3259 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3260 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3261 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3262 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3263 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3264 	{PCIC_SATCOM,		-1,			"satellite communication"},
3265 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3266 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3267 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3268 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3269 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3270 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3271 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3272 	{PCIC_DASP,		-1,			"dasp"},
3273 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3274 	{0, 0,		NULL}
3275 };
3276 
3277 void
3278 pci_probe_nomatch(device_t dev, device_t child)
3279 {
3280 	int	i;
3281 	char	*cp, *scp, *device;
3282 
3283 	/*
3284 	 * Look for a listing for this device in a loaded device database.
3285 	 */
3286 	if ((device = pci_describe_device(child)) != NULL) {
3287 		device_printf(dev, "<%s>", device);
3288 		kfree(device, M_DEVBUF);
3289 	} else {
3290 		/*
3291 		 * Scan the class/subclass descriptions for a general
3292 		 * description.
3293 		 */
3294 		cp = "unknown";
3295 		scp = NULL;
3296 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3297 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3298 				if (pci_nomatch_tab[i].subclass == -1) {
3299 					cp = pci_nomatch_tab[i].desc;
3300 				} else if (pci_nomatch_tab[i].subclass ==
3301 				    pci_get_subclass(child)) {
3302 					scp = pci_nomatch_tab[i].desc;
3303 				}
3304 			}
3305 		}
3306 		device_printf(dev, "<%s%s%s>",
3307 		    cp ? cp : "",
3308 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3309 		    scp ? scp : "");
3310 	}
3311 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3312 		pci_get_vendor(child), pci_get_device(child),
3313 		pci_get_slot(child), pci_get_function(child));
3314 	if (pci_get_intpin(child) > 0) {
3315 		int irq;
3316 
3317 		irq = pci_get_irq(child);
3318 		if (PCI_INTERRUPT_VALID(irq))
3319 			kprintf(" irq %d", irq);
3320 	}
3321 	kprintf("\n");
3322 
3323 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3324 }
3325 
3326 /*
3327  * Parse the PCI device database, if loaded, and return a pointer to a
3328  * description of the device.
3329  *
3330  * The database is flat text formatted as follows:
3331  *
3332  * Any line not in a valid format is ignored.
3333  * Lines are terminated with newline '\n' characters.
3334  *
3335  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3336  * the vendor name.
3337  *
3338  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3339  * - devices cannot be listed without a corresponding VENDOR line.
3340  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3341  * another TAB, then the device name.
3342  */
3343 
3344 /*
3345  * Assuming (ptr) points to the beginning of a line in the database,
3346  * return the vendor or device and description of the next entry.
3347  * The value of (vendor) or (device) inappropriate for the entry type
3348  * is set to -1.  Returns nonzero at the end of the database.
3349  *
3350  * Note that this is slightly unrobust in the face of corrupt data;
3351  * we attempt to safeguard against this by spamming the end of the
3352  * database with a newline when we initialise.
3353  */
3354 static int
3355 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3356 {
3357 	char	*cp = *ptr;
3358 	int	left;
3359 
3360 	*device = -1;
3361 	*vendor = -1;
3362 	**desc = '\0';
3363 	for (;;) {
3364 		left = pci_vendordata_size - (cp - pci_vendordata);
3365 		if (left <= 0) {
3366 			*ptr = cp;
3367 			return(1);
3368 		}
3369 
3370 		/* vendor entry? */
3371 		if (*cp != '\t' &&
3372 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3373 			break;
3374 		/* device entry? */
3375 		if (*cp == '\t' &&
3376 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3377 			break;
3378 
3379 		/* skip to next line */
3380 		while (*cp != '\n' && left > 0) {
3381 			cp++;
3382 			left--;
3383 		}
3384 		if (*cp == '\n') {
3385 			cp++;
3386 			left--;
3387 		}
3388 	}
3389 	/* skip to next line */
3390 	while (*cp != '\n' && left > 0) {
3391 		cp++;
3392 		left--;
3393 	}
3394 	if (*cp == '\n' && left > 0)
3395 		cp++;
3396 	*ptr = cp;
3397 	return(0);
3398 }
3399 
3400 static char *
3401 pci_describe_device(device_t dev)
3402 {
3403 	int	vendor, device;
3404 	char	*desc, *vp, *dp, *line;
3405 
3406 	desc = vp = dp = NULL;
3407 
3408 	/*
3409 	 * If we have no vendor data, we can't do anything.
3410 	 */
3411 	if (pci_vendordata == NULL)
3412 		goto out;
3413 
3414 	/*
3415 	 * Scan the vendor data looking for this device
3416 	 */
3417 	line = pci_vendordata;
3418 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3419 		goto out;
3420 	for (;;) {
3421 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3422 			goto out;
3423 		if (vendor == pci_get_vendor(dev))
3424 			break;
3425 	}
3426 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3427 		goto out;
3428 	for (;;) {
3429 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3430 			*dp = 0;
3431 			break;
3432 		}
3433 		if (vendor != -1) {
3434 			*dp = 0;
3435 			break;
3436 		}
3437 		if (device == pci_get_device(dev))
3438 			break;
3439 	}
3440 	if (dp[0] == '\0')
3441 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3442 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3443 	    NULL)
3444 		ksprintf(desc, "%s, %s", vp, dp);
3445  out:
3446 	if (vp != NULL)
3447 		kfree(vp, M_DEVBUF);
3448 	if (dp != NULL)
3449 		kfree(dp, M_DEVBUF);
3450 	return(desc);
3451 }
3452 
3453 int
3454 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3455 {
3456 	struct pci_devinfo *dinfo;
3457 	pcicfgregs *cfg;
3458 
3459 	dinfo = device_get_ivars(child);
3460 	cfg = &dinfo->cfg;
3461 
3462 	switch (which) {
3463 	case PCI_IVAR_ETHADDR:
3464 		/*
3465 		 * The generic accessor doesn't deal with failure, so
3466 		 * we set the return value, then return an error.
3467 		 */
3468 		*((uint8_t **) result) = NULL;
3469 		return (EINVAL);
3470 	case PCI_IVAR_SUBVENDOR:
3471 		*result = cfg->subvendor;
3472 		break;
3473 	case PCI_IVAR_SUBDEVICE:
3474 		*result = cfg->subdevice;
3475 		break;
3476 	case PCI_IVAR_VENDOR:
3477 		*result = cfg->vendor;
3478 		break;
3479 	case PCI_IVAR_DEVICE:
3480 		*result = cfg->device;
3481 		break;
3482 	case PCI_IVAR_DEVID:
3483 		*result = (cfg->device << 16) | cfg->vendor;
3484 		break;
3485 	case PCI_IVAR_CLASS:
3486 		*result = cfg->baseclass;
3487 		break;
3488 	case PCI_IVAR_SUBCLASS:
3489 		*result = cfg->subclass;
3490 		break;
3491 	case PCI_IVAR_PROGIF:
3492 		*result = cfg->progif;
3493 		break;
3494 	case PCI_IVAR_REVID:
3495 		*result = cfg->revid;
3496 		break;
3497 	case PCI_IVAR_INTPIN:
3498 		*result = cfg->intpin;
3499 		break;
3500 	case PCI_IVAR_IRQ:
3501 		*result = cfg->intline;
3502 		break;
3503 	case PCI_IVAR_DOMAIN:
3504 		*result = cfg->domain;
3505 		break;
3506 	case PCI_IVAR_BUS:
3507 		*result = cfg->bus;
3508 		break;
3509 	case PCI_IVAR_SLOT:
3510 		*result = cfg->slot;
3511 		break;
3512 	case PCI_IVAR_FUNCTION:
3513 		*result = cfg->func;
3514 		break;
3515 	case PCI_IVAR_CMDREG:
3516 		*result = cfg->cmdreg;
3517 		break;
3518 	case PCI_IVAR_CACHELNSZ:
3519 		*result = cfg->cachelnsz;
3520 		break;
3521 	case PCI_IVAR_MINGNT:
3522 		*result = cfg->mingnt;
3523 		break;
3524 	case PCI_IVAR_MAXLAT:
3525 		*result = cfg->maxlat;
3526 		break;
3527 	case PCI_IVAR_LATTIMER:
3528 		*result = cfg->lattimer;
3529 		break;
3530 	case PCI_IVAR_PCIXCAP_PTR:
3531 		*result = cfg->pcix.pcix_ptr;
3532 		break;
3533 	case PCI_IVAR_PCIECAP_PTR:
3534 		*result = cfg->expr.expr_ptr;
3535 		break;
3536 	case PCI_IVAR_VPDCAP_PTR:
3537 		*result = cfg->vpd.vpd_reg;
3538 		break;
3539 	default:
3540 		return (ENOENT);
3541 	}
3542 	return (0);
3543 }
3544 
3545 int
3546 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3547 {
3548 	struct pci_devinfo *dinfo;
3549 
3550 	dinfo = device_get_ivars(child);
3551 
3552 	switch (which) {
3553 	case PCI_IVAR_INTPIN:
3554 		dinfo->cfg.intpin = value;
3555 		return (0);
3556 	case PCI_IVAR_ETHADDR:
3557 	case PCI_IVAR_SUBVENDOR:
3558 	case PCI_IVAR_SUBDEVICE:
3559 	case PCI_IVAR_VENDOR:
3560 	case PCI_IVAR_DEVICE:
3561 	case PCI_IVAR_DEVID:
3562 	case PCI_IVAR_CLASS:
3563 	case PCI_IVAR_SUBCLASS:
3564 	case PCI_IVAR_PROGIF:
3565 	case PCI_IVAR_REVID:
3566 	case PCI_IVAR_IRQ:
3567 	case PCI_IVAR_DOMAIN:
3568 	case PCI_IVAR_BUS:
3569 	case PCI_IVAR_SLOT:
3570 	case PCI_IVAR_FUNCTION:
3571 		return (EINVAL);	/* disallow for now */
3572 
3573 	default:
3574 		return (ENOENT);
3575 	}
3576 }
3577 #ifdef notyet
3578 #include "opt_ddb.h"
3579 #ifdef DDB
3580 #include <ddb/ddb.h>
3581 #include <sys/cons.h>
3582 
3583 /*
3584  * List resources based on pci map registers, used for within ddb
3585  */
3586 
3587 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3588 {
3589 	struct pci_devinfo *dinfo;
3590 	struct devlist *devlist_head;
3591 	struct pci_conf *p;
3592 	const char *name;
3593 	int i, error, none_count;
3594 
3595 	none_count = 0;
3596 	/* get the head of the device queue */
3597 	devlist_head = &pci_devq;
3598 
3599 	/*
3600 	 * Go through the list of devices and print out devices
3601 	 */
3602 	for (error = 0, i = 0,
3603 	     dinfo = STAILQ_FIRST(devlist_head);
3604 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3605 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3606 
3607 		/* Populate pd_name and pd_unit */
3608 		name = NULL;
3609 		if (dinfo->cfg.dev)
3610 			name = device_get_name(dinfo->cfg.dev);
3611 
3612 		p = &dinfo->conf;
3613 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3614 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3615 			(name && *name) ? name : "none",
3616 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3617 			none_count++,
3618 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3619 			p->pc_sel.pc_func, (p->pc_class << 16) |
3620 			(p->pc_subclass << 8) | p->pc_progif,
3621 			(p->pc_subdevice << 16) | p->pc_subvendor,
3622 			(p->pc_device << 16) | p->pc_vendor,
3623 			p->pc_revid, p->pc_hdr);
3624 	}
3625 }
3626 #endif /* DDB */
3627 #endif
3628 
3629 static struct resource *
3630 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3631     u_long start, u_long end, u_long count, u_int flags)
3632 {
3633 	struct pci_devinfo *dinfo = device_get_ivars(child);
3634 	struct resource_list *rl = &dinfo->resources;
3635 	struct resource_list_entry *rle;
3636 	struct resource *res;
3637 	pci_addr_t map, testval;
3638 	int mapsize;
3639 
3640 	/*
3641 	 * Weed out the bogons, and figure out how large the BAR/map
3642 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3643 	 * Note: atapci in legacy mode are special and handled elsewhere
3644 	 * in the code.  If you have a atapci device in legacy mode and
3645 	 * it fails here, that other code is broken.
3646 	 */
3647 	res = NULL;
3648 	map = pci_read_config(child, *rid, 4);
3649 	pci_write_config(child, *rid, 0xffffffff, 4);
3650 	testval = pci_read_config(child, *rid, 4);
3651 	if (pci_maprange(testval) == 64)
3652 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3653 	if (pci_mapbase(testval) == 0)
3654 		goto out;
3655 
3656 	/*
3657 	 * Restore the original value of the BAR.  We may have reprogrammed
3658 	 * the BAR of the low-level console device and when booting verbose,
3659 	 * we need the console device addressable.
3660 	 */
3661 	pci_write_config(child, *rid, map, 4);
3662 
3663 	if (PCI_BAR_MEM(testval)) {
3664 		if (type != SYS_RES_MEMORY) {
3665 			if (bootverbose)
3666 				device_printf(dev,
3667 				    "child %s requested type %d for rid %#x,"
3668 				    " but the BAR says it is an memio\n",
3669 				    device_get_nameunit(child), type, *rid);
3670 			goto out;
3671 		}
3672 	} else {
3673 		if (type != SYS_RES_IOPORT) {
3674 			if (bootverbose)
3675 				device_printf(dev,
3676 				    "child %s requested type %d for rid %#x,"
3677 				    " but the BAR says it is an ioport\n",
3678 				    device_get_nameunit(child), type, *rid);
3679 			goto out;
3680 		}
3681 	}
3682 	/*
3683 	 * For real BARs, we need to override the size that
3684 	 * the driver requests, because that's what the BAR
3685 	 * actually uses and we would otherwise have a
3686 	 * situation where we might allocate the excess to
3687 	 * another driver, which won't work.
3688 	 */
3689 	mapsize = pci_mapsize(testval);
3690 	count = 1UL << mapsize;
3691 	if (RF_ALIGNMENT(flags) < mapsize)
3692 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3693 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3694 		flags |= RF_PREFETCHABLE;
3695 
3696 	/*
3697 	 * Allocate enough resource, and then write back the
3698 	 * appropriate bar for that resource.
3699 	 */
3700 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3701 	    start, end, count, flags, -1);
3702 	if (res == NULL) {
3703 		device_printf(child,
3704 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3705 		    count, *rid, type, start, end);
3706 		goto out;
3707 	}
3708 	resource_list_add(rl, type, *rid, start, end, count, -1);
3709 	rle = resource_list_find(rl, type, *rid);
3710 	if (rle == NULL)
3711 		panic("pci_alloc_map: unexpectedly can't find resource.");
3712 	rle->res = res;
3713 	rle->start = rman_get_start(res);
3714 	rle->end = rman_get_end(res);
3715 	rle->count = count;
3716 	if (bootverbose)
3717 		device_printf(child,
3718 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3719 		    count, *rid, type, rman_get_start(res));
3720 	map = rman_get_start(res);
3721 out:;
3722 	pci_write_config(child, *rid, map, 4);
3723 	if (pci_maprange(testval) == 64)
3724 		pci_write_config(child, *rid + 4, map >> 32, 4);
3725 	return (res);
3726 }
3727 
3728 
3729 struct resource *
3730 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3731     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3732 {
3733 	struct pci_devinfo *dinfo = device_get_ivars(child);
3734 	struct resource_list *rl = &dinfo->resources;
3735 	struct resource_list_entry *rle;
3736 	pcicfgregs *cfg = &dinfo->cfg;
3737 
3738 	/*
3739 	 * Perform lazy resource allocation
3740 	 */
3741 	if (device_get_parent(child) == dev) {
3742 		switch (type) {
3743 		case SYS_RES_IRQ:
3744 			/*
3745 			 * Can't alloc legacy interrupt once MSI messages
3746 			 * have been allocated.
3747 			 */
3748 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3749 			    cfg->msix.msix_alloc > 0))
3750 				return (NULL);
3751 			/*
3752 			 * If the child device doesn't have an
3753 			 * interrupt routed and is deserving of an
3754 			 * interrupt, try to assign it one.
3755 			 */
3756 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3757 			    (cfg->intpin != 0))
3758 				pci_assign_interrupt(dev, child, 0);
3759 			break;
3760 		case SYS_RES_IOPORT:
3761 		case SYS_RES_MEMORY:
3762 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3763 				/*
3764 				 * Enable the I/O mode.  We should
3765 				 * also be assigning resources too
3766 				 * when none are present.  The
3767 				 * resource_list_alloc kind of sorta does
3768 				 * this...
3769 				 */
3770 				if (PCI_ENABLE_IO(dev, child, type))
3771 					return (NULL);
3772 			}
3773 			rle = resource_list_find(rl, type, *rid);
3774 			if (rle == NULL)
3775 				return (pci_alloc_map(dev, child, type, rid,
3776 				    start, end, count, flags));
3777 			break;
3778 		}
3779 		/*
3780 		 * If we've already allocated the resource, then
3781 		 * return it now.  But first we may need to activate
3782 		 * it, since we don't allocate the resource as active
3783 		 * above.  Normally this would be done down in the
3784 		 * nexus, but since we short-circuit that path we have
3785 		 * to do its job here.  Not sure if we should kfree the
3786 		 * resource if it fails to activate.
3787 		 */
3788 		rle = resource_list_find(rl, type, *rid);
3789 		if (rle != NULL && rle->res != NULL) {
3790 			if (bootverbose)
3791 				device_printf(child,
3792 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3793 				    rman_get_size(rle->res), *rid, type,
3794 				    rman_get_start(rle->res));
3795 			if ((flags & RF_ACTIVE) &&
3796 			    bus_generic_activate_resource(dev, child, type,
3797 			    *rid, rle->res) != 0)
3798 				return (NULL);
3799 			return (rle->res);
3800 		}
3801 	}
3802 	return (resource_list_alloc(rl, dev, child, type, rid,
3803 	    start, end, count, flags, cpuid));
3804 }
3805 
3806 void
3807 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3808 {
3809 	struct pci_devinfo *dinfo;
3810 	struct resource_list *rl;
3811 	struct resource_list_entry *rle;
3812 
3813 	if (device_get_parent(child) != dev)
3814 		return;
3815 
3816 	dinfo = device_get_ivars(child);
3817 	rl = &dinfo->resources;
3818 	rle = resource_list_find(rl, type, rid);
3819 	if (rle) {
3820 		if (rle->res) {
3821 			if (rman_get_device(rle->res) != dev ||
3822 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3823 				device_printf(dev, "delete_resource: "
3824 				    "Resource still owned by child, oops. "
3825 				    "(type=%d, rid=%d, addr=%lx)\n",
3826 				    rle->type, rle->rid,
3827 				    rman_get_start(rle->res));
3828 				return;
3829 			}
3830 			bus_release_resource(dev, type, rid, rle->res);
3831 		}
3832 		resource_list_delete(rl, type, rid);
3833 	}
3834 	/*
3835 	 * Why do we turn off the PCI configuration BAR when we delete a
3836 	 * resource? -- imp
3837 	 */
3838 	pci_write_config(child, rid, 0, 4);
3839 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3840 }
3841 
3842 struct resource_list *
3843 pci_get_resource_list (device_t dev, device_t child)
3844 {
3845 	struct pci_devinfo *dinfo = device_get_ivars(child);
3846 
3847 	if (dinfo == NULL)
3848 		return (NULL);
3849 
3850 	return (&dinfo->resources);
3851 }
3852 
3853 uint32_t
3854 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3855 {
3856 	struct pci_devinfo *dinfo = device_get_ivars(child);
3857 	pcicfgregs *cfg = &dinfo->cfg;
3858 
3859 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3860 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3861 }
3862 
3863 void
3864 pci_write_config_method(device_t dev, device_t child, int reg,
3865     uint32_t val, int width)
3866 {
3867 	struct pci_devinfo *dinfo = device_get_ivars(child);
3868 	pcicfgregs *cfg = &dinfo->cfg;
3869 
3870 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3871 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3872 }
3873 
3874 int
3875 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3876     size_t buflen)
3877 {
3878 
3879 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3880 	    pci_get_function(child));
3881 	return (0);
3882 }
3883 
3884 int
3885 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3886     size_t buflen)
3887 {
3888 	struct pci_devinfo *dinfo;
3889 	pcicfgregs *cfg;
3890 
3891 	dinfo = device_get_ivars(child);
3892 	cfg = &dinfo->cfg;
3893 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3894 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3895 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3896 	    cfg->progif);
3897 	return (0);
3898 }
3899 
3900 int
3901 pci_assign_interrupt_method(device_t dev, device_t child)
3902 {
3903 	struct pci_devinfo *dinfo = device_get_ivars(child);
3904 	pcicfgregs *cfg = &dinfo->cfg;
3905 
3906 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3907 	    cfg->intpin));
3908 }
3909 
3910 static int
3911 pci_modevent(module_t mod, int what, void *arg)
3912 {
3913 	static struct cdev *pci_cdev;
3914 
3915 	switch (what) {
3916 	case MOD_LOAD:
3917 		STAILQ_INIT(&pci_devq);
3918 		pci_generation = 0;
3919 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3920 				    "pci");
3921 		pci_load_vendor_data();
3922 		break;
3923 
3924 	case MOD_UNLOAD:
3925 		destroy_dev(pci_cdev);
3926 		break;
3927 	}
3928 
3929 	return (0);
3930 }
3931 
3932 void
3933 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3934 {
3935 	int i;
3936 
3937 	/*
3938 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3939 	 * which we know need special treatment.  Type 2 devices are
3940 	 * cardbus bridges which also require special treatment.
3941 	 * Other types are unknown, and we err on the side of safety
3942 	 * by ignoring them.
3943 	 */
3944 	if (dinfo->cfg.hdrtype != 0)
3945 		return;
3946 
3947 	/*
3948 	 * Restore the device to full power mode.  We must do this
3949 	 * before we restore the registers because moving from D3 to
3950 	 * D0 will cause the chip's BARs and some other registers to
3951 	 * be reset to some unknown power on reset values.  Cut down
3952 	 * the noise on boot by doing nothing if we are already in
3953 	 * state D0.
3954 	 */
3955 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3956 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3957 	}
3958 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3959 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3960 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3961 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3962 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3963 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3964 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3965 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3966 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3967 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3968 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3969 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3970 
3971 	/* Restore MSI and MSI-X configurations if they are present. */
3972 	if (dinfo->cfg.msi.msi_location != 0)
3973 		pci_resume_msi(dev);
3974 	if (dinfo->cfg.msix.msix_location != 0)
3975 		pci_resume_msix(dev);
3976 }
3977 
3978 void
3979 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3980 {
3981 	int i;
3982 	uint32_t cls;
3983 	int ps;
3984 
3985 	/*
3986 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3987 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3988 	 * which also require special treatment.  Other types are unknown, and
3989 	 * we err on the side of safety by ignoring them.  Powering down
3990 	 * bridges should not be undertaken lightly.
3991 	 */
3992 	if (dinfo->cfg.hdrtype != 0)
3993 		return;
3994 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3995 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3996 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3997 
3998 	/*
3999 	 * Some drivers apparently write to these registers w/o updating our
4000 	 * cached copy.  No harm happens if we update the copy, so do so here
4001 	 * so we can restore them.  The COMMAND register is modified by the
4002 	 * bus w/o updating the cache.  This should represent the normally
4003 	 * writable portion of the 'defined' part of type 0 headers.  In
4004 	 * theory we also need to save/restore the PCI capability structures
4005 	 * we know about, but apart from power we don't know any that are
4006 	 * writable.
4007 	 */
4008 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4009 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4010 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4011 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4012 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4013 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4014 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4015 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4016 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4017 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4018 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4019 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4020 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4021 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4022 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4023 
4024 	/*
4025 	 * don't set the state for display devices, base peripherals and
4026 	 * memory devices since bad things happen when they are powered down.
4027 	 * We should (a) have drivers that can easily detach and (b) use
4028 	 * generic drivers for these devices so that some device actually
4029 	 * attaches.  We need to make sure that when we implement (a) we don't
4030 	 * power the device down on a reattach.
4031 	 */
4032 	cls = pci_get_class(dev);
4033 	if (!setstate)
4034 		return;
4035 	switch (pci_do_power_nodriver)
4036 	{
4037 		case 0:		/* NO powerdown at all */
4038 			return;
4039 		case 1:		/* Conservative about what to power down */
4040 			if (cls == PCIC_STORAGE)
4041 				return;
4042 			/*FALLTHROUGH*/
4043 		case 2:		/* Agressive about what to power down */
4044 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4045 			    cls == PCIC_BASEPERIPH)
4046 				return;
4047 			/*FALLTHROUGH*/
4048 		case 3:		/* Power down everything */
4049 			break;
4050 	}
4051 	/*
4052 	 * PCI spec says we can only go into D3 state from D0 state.
4053 	 * Transition from D[12] into D0 before going to D3 state.
4054 	 */
4055 	ps = pci_get_powerstate(dev);
4056 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4057 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4058 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4059 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4060 }
4061 
4062 #ifdef COMPAT_OLDPCI
4063 
4064 /*
4065  * Locate the parent of a PCI device by scanning the PCI devlist
4066  * and return the entry for the parent.
4067  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4068  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4069  */
4070 pcicfgregs *
4071 pci_devlist_get_parent(pcicfgregs *cfg)
4072 {
4073 	struct devlist *devlist_head;
4074 	struct pci_devinfo *dinfo;
4075 	pcicfgregs *bridge_cfg;
4076 	int i;
4077 
4078 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4079 
4080 	/* If the device is on PCI bus 0, look for the host */
4081 	if (cfg->bus == 0) {
4082 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4083 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4084 			bridge_cfg = &dinfo->cfg;
4085 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4086 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4087 		    		&& bridge_cfg->bus == cfg->bus) {
4088 				return bridge_cfg;
4089 			}
4090 		}
4091 	}
4092 
4093 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4094 	if (cfg->bus > 0) {
4095 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4096 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4097 			bridge_cfg = &dinfo->cfg;
4098 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4099 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4100 				&& bridge_cfg->secondarybus == cfg->bus) {
4101 				return bridge_cfg;
4102 			}
4103 		}
4104 	}
4105 
4106 	return NULL;
4107 }
4108 
4109 #endif	/* COMPAT_OLDPCI */
4110 
4111 int
4112 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4113 {
4114 	int rid, type;
4115 	u_int flags;
4116 
4117 	rid = 0;
4118 	type = PCI_INTR_TYPE_LEGACY;
4119 	flags = RF_SHAREABLE | RF_ACTIVE;
4120 
4121 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4122 	if (msi_enable) {
4123 		int cpu;
4124 
4125 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4126 		if (cpu >= ncpus)
4127 			cpu = ncpus - 1;
4128 
4129 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4130 			flags &= ~RF_SHAREABLE;
4131 			type = PCI_INTR_TYPE_MSI;
4132 		}
4133 	}
4134 
4135 	*rid0 = rid;
4136 	*flags0 = flags;
4137 
4138 	return type;
4139 }
4140