xref: /dragonfly/sys/bus/pci/pci.c (revision 0db87cb7)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include <bus/u4b/controller/xhcireg.h>
63 #include <bus/u4b/controller/ehcireg.h>
64 #include <bus/u4b/controller/ohcireg.h>
65 #include <bus/u4b/controller/uhcireg.h>
66 
67 #include "pcib_if.h"
68 #include "pci_if.h"
69 
70 #ifdef __HAVE_ACPI
71 #include <contrib/dev/acpica/acpi.h>
72 #include "acpi_if.h"
73 #else
74 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
75 #endif
76 
77 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
78 
79 static uint32_t		pci_mapbase(unsigned mapreg);
80 static const char	*pci_maptype(unsigned mapreg);
81 static int		pci_mapsize(unsigned testval);
82 static int		pci_maprange(unsigned mapreg);
83 static void		pci_fixancient(pcicfgregs *cfg);
84 
85 static int		pci_porten(device_t pcib, int b, int s, int f);
86 static int		pci_memen(device_t pcib, int b, int s, int f);
87 static void		pci_assign_interrupt(device_t bus, device_t dev,
88 			    int force_route);
89 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
90 			    int b, int s, int f, int reg,
91 			    struct resource_list *rl, int force, int prefetch);
92 static int		pci_probe(device_t dev);
93 static int		pci_attach(device_t dev);
94 static void		pci_child_detached(device_t, device_t);
95 static void		pci_load_vendor_data(void);
96 static int		pci_describe_parse_line(char **ptr, int *vendor,
97 			    int *device, char **desc);
98 static char		*pci_describe_device(device_t dev);
99 static int		pci_modevent(module_t mod, int what, void *arg);
100 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
101 			    pcicfgregs *cfg);
102 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
103 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t *data);
105 #if 0
106 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t data);
108 #endif
109 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
110 static void		pci_disable_msi(device_t dev);
111 static void		pci_enable_msi(device_t dev, uint64_t address,
112 			    uint16_t data);
113 static void		pci_setup_msix_vector(device_t dev, u_int index,
114 			    uint64_t address, uint32_t data);
115 static void		pci_mask_msix_vector(device_t dev, u_int index);
116 static void		pci_unmask_msix_vector(device_t dev, u_int index);
117 static void		pci_mask_msix_allvectors(device_t dev);
118 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
119 static int		pci_msi_blacklisted(void);
120 static void		pci_resume_msi(device_t dev);
121 static void		pci_resume_msix(device_t dev);
122 static int		pcie_slotimpl(const pcicfgregs *);
123 static void		pci_print_verbose_expr(const pcicfgregs *);
124 
125 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_subvendor(device_t, int, int,
131 			    pcicfgregs *);
132 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
133 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
134 
135 static device_method_t pci_methods[] = {
136 	/* Device interface */
137 	DEVMETHOD(device_probe,		pci_probe),
138 	DEVMETHOD(device_attach,	pci_attach),
139 	DEVMETHOD(device_detach,	bus_generic_detach),
140 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
141 	DEVMETHOD(device_suspend,	pci_suspend),
142 	DEVMETHOD(device_resume,	pci_resume),
143 
144 	/* Bus interface */
145 	DEVMETHOD(bus_print_child,	pci_print_child),
146 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
147 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
148 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
149 	DEVMETHOD(bus_driver_added,	pci_driver_added),
150 	DEVMETHOD(bus_child_detached,	pci_child_detached),
151 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
152 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
153 
154 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
155 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
156 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
157 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
158 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
159 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
160 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
161 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
162 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
163 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
164 
165 	/* PCI interface */
166 	DEVMETHOD(pci_read_config,	pci_read_config_method),
167 	DEVMETHOD(pci_write_config,	pci_write_config_method),
168 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
169 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
170 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
171 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
172 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
173 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
174 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
175 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
176 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
177 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
178 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
179 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
180 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
181 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
182 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
183 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
184 
185 	DEVMETHOD_END
186 };
187 
188 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
189 
190 static devclass_t pci_devclass;
191 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
192 MODULE_VERSION(pci, 1);
193 
194 static char	*pci_vendordata;
195 static size_t	pci_vendordata_size;
196 
197 
198 static const struct pci_read_cap {
199 	int		cap;
200 	pci_read_cap_t	read_cap;
201 } pci_read_caps[] = {
202 	{ PCIY_PMG,		pci_read_cap_pmgt },
203 	{ PCIY_HT,		pci_read_cap_ht },
204 	{ PCIY_MSI,		pci_read_cap_msi },
205 	{ PCIY_MSIX,		pci_read_cap_msix },
206 	{ PCIY_VPD,		pci_read_cap_vpd },
207 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
208 	{ PCIY_PCIX,		pci_read_cap_pcix },
209 	{ PCIY_EXPRESS,		pci_read_cap_express },
210 	{ 0, NULL } /* required last entry */
211 };
212 
213 struct pci_quirk {
214 	uint32_t devid;	/* Vendor/device of the card */
215 	int	type;
216 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
217 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
218 	int	arg1;
219 	int	arg2;
220 };
221 
222 struct pci_quirk pci_quirks[] = {
223 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
224 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
227 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 
229 	/*
230 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
231 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
232 	 */
233 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work on earlier Intel chipsets including
238 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
239 	 */
240 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	/*
249 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
250 	 * bridge.
251 	 */
252 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	{ 0 }
255 };
256 
257 /* map register information */
258 #define	PCI_MAPMEM	0x01	/* memory map */
259 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
260 #define	PCI_MAPPORT	0x04	/* port map */
261 
262 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
263 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
264 
265 struct devlist pci_devq;
266 uint32_t pci_generation;
267 uint32_t pci_numdevs = 0;
268 static int pcie_chipset, pcix_chipset;
269 
270 /* sysctl vars */
271 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
272 
273 static int pci_enable_io_modes = 1;
274 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
275 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
276     &pci_enable_io_modes, 1,
277     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
278 enable these bits correctly.  We'd like to do this all the time, but there\n\
279 are some peripherals that this causes problems with.");
280 
281 static int pci_do_power_nodriver = 0;
282 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
283 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
284     &pci_do_power_nodriver, 0,
285   "Place a function into D3 state when no driver attaches to it.  0 means\n\
286 disable.  1 means conservatively place devices into D3 state.  2 means\n\
287 aggressively place devices into D3 state.  3 means put absolutely everything\n\
288 in D3 state.");
289 
290 static int pci_do_power_resume = 1;
291 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
292 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
293     &pci_do_power_resume, 1,
294   "Transition from D3 -> D0 on resume.");
295 
296 static int pci_do_msi = 1;
297 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
298 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
299     "Enable support for MSI interrupts");
300 
301 static int pci_do_msix = 1;
302 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
303 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
304     "Enable support for MSI-X interrupts");
305 
306 static int pci_honor_msi_blacklist = 1;
307 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
308 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
309     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
310 
311 #if defined(__x86_64__)
312 static int pci_usb_takeover = 1;
313 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
314 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
315     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
316 Disable this if you depend on BIOS emulation of USB devices, that is\n\
317 you use USB devices (like keyboard or mouse) but do not load USB drivers");
318 #endif
319 
320 static int pci_msi_cpuid;
321 
322 /* Find a device_t by bus/slot/function in domain 0 */
323 
324 device_t
325 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
326 {
327 
328 	return (pci_find_dbsf(0, bus, slot, func));
329 }
330 
331 /* Find a device_t by domain/bus/slot/function */
332 
333 device_t
334 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
335 {
336 	struct pci_devinfo *dinfo;
337 
338 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
339 		if ((dinfo->cfg.domain == domain) &&
340 		    (dinfo->cfg.bus == bus) &&
341 		    (dinfo->cfg.slot == slot) &&
342 		    (dinfo->cfg.func == func)) {
343 			return (dinfo->cfg.dev);
344 		}
345 	}
346 
347 	return (NULL);
348 }
349 
350 /* Find a device_t by vendor/device ID */
351 
352 device_t
353 pci_find_device(uint16_t vendor, uint16_t device)
354 {
355 	struct pci_devinfo *dinfo;
356 
357 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
358 		if ((dinfo->cfg.vendor == vendor) &&
359 		    (dinfo->cfg.device == device)) {
360 			return (dinfo->cfg.dev);
361 		}
362 	}
363 
364 	return (NULL);
365 }
366 
367 device_t
368 pci_find_class(uint8_t class, uint8_t subclass)
369 {
370 	struct pci_devinfo *dinfo;
371 
372 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
373 		if (dinfo->cfg.baseclass == class &&
374 		    dinfo->cfg.subclass == subclass) {
375 			return (dinfo->cfg.dev);
376 		}
377 	}
378 
379 	return (NULL);
380 }
381 
382 device_t
383 pci_iterate_class(struct pci_devinfo **dinfop, uint8_t class, uint8_t subclass)
384 {
385 	struct pci_devinfo *dinfo;
386 
387 	if (*dinfop)
388 		dinfo = STAILQ_NEXT(*dinfop, pci_links);
389 	else
390 		dinfo = STAILQ_FIRST(&pci_devq);
391 
392 	while (dinfo) {
393 		if (dinfo->cfg.baseclass == class &&
394 		    dinfo->cfg.subclass == subclass) {
395 			*dinfop = dinfo;
396 			return (dinfo->cfg.dev);
397 		}
398 		dinfo = STAILQ_NEXT(dinfo, pci_links);
399 	}
400 	*dinfop = NULL;
401 	return (NULL);
402 }
403 
404 /* return base address of memory or port map */
405 
406 static uint32_t
407 pci_mapbase(uint32_t mapreg)
408 {
409 
410 	if (PCI_BAR_MEM(mapreg))
411 		return (mapreg & PCIM_BAR_MEM_BASE);
412 	else
413 		return (mapreg & PCIM_BAR_IO_BASE);
414 }
415 
416 /* return map type of memory or port map */
417 
418 static const char *
419 pci_maptype(unsigned mapreg)
420 {
421 
422 	if (PCI_BAR_IO(mapreg))
423 		return ("I/O Port");
424 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
425 		return ("Prefetchable Memory");
426 	return ("Memory");
427 }
428 
429 /* return log2 of map size decoded for memory or port map */
430 
431 static int
432 pci_mapsize(uint32_t testval)
433 {
434 	int ln2size;
435 
436 	testval = pci_mapbase(testval);
437 	ln2size = 0;
438 	if (testval != 0) {
439 		while ((testval & 1) == 0)
440 		{
441 			ln2size++;
442 			testval >>= 1;
443 		}
444 	}
445 	return (ln2size);
446 }
447 
448 /* return log2 of address range supported by map register */
449 
450 static int
451 pci_maprange(unsigned mapreg)
452 {
453 	int ln2range = 0;
454 
455 	if (PCI_BAR_IO(mapreg))
456 		ln2range = 32;
457 	else
458 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
459 		case PCIM_BAR_MEM_32:
460 			ln2range = 32;
461 			break;
462 		case PCIM_BAR_MEM_1MB:
463 			ln2range = 20;
464 			break;
465 		case PCIM_BAR_MEM_64:
466 			ln2range = 64;
467 			break;
468 		}
469 	return (ln2range);
470 }
471 
472 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
473 
474 static void
475 pci_fixancient(pcicfgregs *cfg)
476 {
477 	if (cfg->hdrtype != 0)
478 		return;
479 
480 	/* PCI to PCI bridges use header type 1 */
481 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
482 		cfg->hdrtype = 1;
483 }
484 
485 /* extract header type specific config data */
486 
487 static void
488 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
489 {
490 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
491 	switch (cfg->hdrtype) {
492 	case 0:
493 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
494 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
495 		cfg->nummaps	    = PCI_MAXMAPS_0;
496 		break;
497 	case 1:
498 		cfg->nummaps	    = PCI_MAXMAPS_1;
499 #ifdef COMPAT_OLDPCI
500 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
501 #endif
502 		break;
503 	case 2:
504 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
505 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
506 		cfg->nummaps	    = PCI_MAXMAPS_2;
507 #ifdef COMPAT_OLDPCI
508 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
509 #endif
510 		break;
511 	}
512 #undef REG
513 }
514 
515 /* read configuration header into pcicfgregs structure */
516 struct pci_devinfo *
517 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
518 {
519 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
520 	pcicfgregs *cfg = NULL;
521 	struct pci_devinfo *devlist_entry;
522 	struct devlist *devlist_head;
523 
524 	devlist_head = &pci_devq;
525 
526 	devlist_entry = NULL;
527 
528 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
529 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
530 
531 		cfg = &devlist_entry->cfg;
532 
533 		cfg->domain		= d;
534 		cfg->bus		= b;
535 		cfg->slot		= s;
536 		cfg->func		= f;
537 		cfg->vendor		= REG(PCIR_VENDOR, 2);
538 		cfg->device		= REG(PCIR_DEVICE, 2);
539 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
540 		cfg->statreg		= REG(PCIR_STATUS, 2);
541 		cfg->baseclass		= REG(PCIR_CLASS, 1);
542 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
543 		cfg->progif		= REG(PCIR_PROGIF, 1);
544 		cfg->revid		= REG(PCIR_REVID, 1);
545 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
546 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
547 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
548 		cfg->intpin		= REG(PCIR_INTPIN, 1);
549 		cfg->intline		= REG(PCIR_INTLINE, 1);
550 
551 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
552 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
553 
554 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
555 		cfg->hdrtype		&= ~PCIM_MFDEV;
556 
557 		pci_fixancient(cfg);
558 		pci_hdrtypedata(pcib, b, s, f, cfg);
559 
560 		pci_read_capabilities(pcib, cfg);
561 
562 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
563 
564 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
565 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
566 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
567 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
568 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
569 
570 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
571 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
572 		devlist_entry->conf.pc_vendor = cfg->vendor;
573 		devlist_entry->conf.pc_device = cfg->device;
574 
575 		devlist_entry->conf.pc_class = cfg->baseclass;
576 		devlist_entry->conf.pc_subclass = cfg->subclass;
577 		devlist_entry->conf.pc_progif = cfg->progif;
578 		devlist_entry->conf.pc_revid = cfg->revid;
579 
580 		pci_numdevs++;
581 		pci_generation++;
582 	}
583 	return (devlist_entry);
584 #undef REG
585 }
586 
587 static int
588 pci_fixup_nextptr(int *nextptr0)
589 {
590 	int nextptr = *nextptr0;
591 
592 	/* "Next pointer" is only one byte */
593 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
594 
595 	if (nextptr & 0x3) {
596 		/*
597 		 * PCI local bus spec 3.0:
598 		 *
599 		 * "... The bottom two bits of all pointers are reserved
600 		 *  and must be implemented as 00b although software must
601 		 *  mask them to allow for future uses of these bits ..."
602 		 */
603 		if (bootverbose) {
604 			kprintf("Illegal PCI extended capability "
605 				"offset, fixup 0x%02x -> 0x%02x\n",
606 				nextptr, nextptr & ~0x3);
607 		}
608 		nextptr &= ~0x3;
609 	}
610 	*nextptr0 = nextptr;
611 
612 	if (nextptr < 0x40) {
613 		if (nextptr != 0) {
614 			kprintf("Illegal PCI extended capability "
615 				"offset 0x%02x", nextptr);
616 		}
617 		return 0;
618 	}
619 	return 1;
620 }
621 
622 static void
623 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
624 {
625 #define REG(n, w)	\
626 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
627 
628 	struct pcicfg_pp *pp = &cfg->pp;
629 
630 	if (pp->pp_cap)
631 		return;
632 
633 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
634 	pp->pp_status = ptr + PCIR_POWER_STATUS;
635 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
636 
637 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
638 		/*
639 		 * XXX
640 		 * We should write to data_select and read back from
641 		 * data_scale to determine whether data register is
642 		 * implemented.
643 		 */
644 #ifdef foo
645 		pp->pp_data = ptr + PCIR_POWER_DATA;
646 #else
647 		pp->pp_data = 0;
648 #endif
649 	}
650 
651 #undef REG
652 }
653 
654 static void
655 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
656 {
657 #if defined(__x86_64__)
658 
659 #define REG(n, w)	\
660 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
661 
662 	struct pcicfg_ht *ht = &cfg->ht;
663 	uint64_t addr;
664 	uint32_t val;
665 
666 	/* Determine HT-specific capability type. */
667 	val = REG(ptr + PCIR_HT_COMMAND, 2);
668 
669 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
670 		cfg->ht.ht_slave = ptr;
671 
672 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
673 		return;
674 
675 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
676 		/* Sanity check the mapping window. */
677 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
678 		addr <<= 32;
679 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
680 		if (addr != MSI_X86_ADDR_BASE) {
681 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
682 				"has non-default MSI window 0x%llx\n",
683 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
684 				(long long)addr);
685 		}
686 	} else {
687 		addr = MSI_X86_ADDR_BASE;
688 	}
689 
690 	ht->ht_msimap = ptr;
691 	ht->ht_msictrl = val;
692 	ht->ht_msiaddr = addr;
693 
694 #undef REG
695 
696 #endif	/* __x86_64__ */
697 }
698 
699 static void
700 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
701 {
702 #define REG(n, w)	\
703 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
704 
705 	struct pcicfg_msi *msi = &cfg->msi;
706 
707 	msi->msi_location = ptr;
708 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
709 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
710 
711 #undef REG
712 }
713 
714 static void
715 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
716 {
717 #define REG(n, w)	\
718 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
719 
720 	struct pcicfg_msix *msix = &cfg->msix;
721 	uint32_t val;
722 
723 	msix->msix_location = ptr;
724 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
725 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
726 
727 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
728 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
729 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
730 
731 	val = REG(ptr + PCIR_MSIX_PBA, 4);
732 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
733 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
734 
735 	TAILQ_INIT(&msix->msix_vectors);
736 
737 #undef REG
738 }
739 
740 static void
741 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
742 {
743 	cfg->vpd.vpd_reg = ptr;
744 }
745 
746 static void
747 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
748 {
749 #define REG(n, w)	\
750 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
751 
752 	/* Should always be true. */
753 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
754 		uint32_t val;
755 
756 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
757 		cfg->subvendor = val & 0xffff;
758 		cfg->subdevice = val >> 16;
759 	}
760 
761 #undef REG
762 }
763 
764 static void
765 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
766 {
767 	/*
768 	 * Assume we have a PCI-X chipset if we have
769 	 * at least one PCI-PCI bridge with a PCI-X
770 	 * capability.  Note that some systems with
771 	 * PCI-express or HT chipsets might match on
772 	 * this check as well.
773 	 */
774 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
775 		pcix_chipset = 1;
776 
777 	cfg->pcix.pcix_ptr = ptr;
778 }
779 
780 static int
781 pcie_slotimpl(const pcicfgregs *cfg)
782 {
783 	const struct pcicfg_expr *expr = &cfg->expr;
784 	uint16_t port_type;
785 
786 	/*
787 	 * - Slot implemented bit is meaningful iff current port is
788 	 *   root port or down stream port.
789 	 * - Testing for root port or down stream port is meanningful
790 	 *   iff PCI configure has type 1 header.
791 	 */
792 
793 	if (cfg->hdrtype != 1)
794 		return 0;
795 
796 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
797 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
798 		return 0;
799 
800 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
801 		return 0;
802 
803 	return 1;
804 }
805 
806 static void
807 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
808 {
809 #define REG(n, w)	\
810 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
811 
812 	struct pcicfg_expr *expr = &cfg->expr;
813 
814 	/*
815 	 * Assume we have a PCI-express chipset if we have
816 	 * at least one PCI-express device.
817 	 */
818 	pcie_chipset = 1;
819 
820 	expr->expr_ptr = ptr;
821 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
822 
823 	/*
824 	 * Read slot capabilities.  Slot capabilities exists iff
825 	 * current port's slot is implemented
826 	 */
827 	if (pcie_slotimpl(cfg))
828 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
829 
830 #undef REG
831 }
832 
833 static void
834 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
835 {
836 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
837 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
838 
839 	uint32_t val;
840 	int nextptr, ptrptr;
841 
842 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
843 		/* No capabilities */
844 		return;
845 	}
846 
847 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
848 	case 0:
849 	case 1:
850 		ptrptr = PCIR_CAP_PTR;
851 		break;
852 	case 2:
853 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
854 		break;
855 	default:
856 		return;				/* no capabilities support */
857 	}
858 	nextptr = REG(ptrptr, 1);	/* sanity check? */
859 
860 	/*
861 	 * Read capability entries.
862 	 */
863 	while (pci_fixup_nextptr(&nextptr)) {
864 		const struct pci_read_cap *rc;
865 		int ptr = nextptr;
866 
867 		/* Find the next entry */
868 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
869 
870 		/* Process this entry */
871 		val = REG(ptr + PCICAP_ID, 1);
872 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
873 			if (rc->cap == val) {
874 				rc->read_cap(pcib, ptr, nextptr, cfg);
875 				break;
876 			}
877 		}
878 	}
879 
880 #if defined(__x86_64__)
881 	/*
882 	 * Enable the MSI mapping window for all HyperTransport
883 	 * slaves.  PCI-PCI bridges have their windows enabled via
884 	 * PCIB_MAP_MSI().
885 	 */
886 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
887 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
888 		device_printf(pcib,
889 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
890 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
891 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
892 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
893 		     2);
894 	}
895 #endif
896 
897 /* REG and WREG use carry through to next functions */
898 }
899 
900 /*
901  * PCI Vital Product Data
902  */
903 
904 #define	PCI_VPD_TIMEOUT		1000000
905 
906 static int
907 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
908 {
909 	int count = PCI_VPD_TIMEOUT;
910 
911 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
912 
913 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
914 
915 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
916 		if (--count < 0)
917 			return (ENXIO);
918 		DELAY(1);	/* limit looping */
919 	}
920 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
921 
922 	return (0);
923 }
924 
925 #if 0
926 static int
927 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
928 {
929 	int count = PCI_VPD_TIMEOUT;
930 
931 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
932 
933 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
934 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
935 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
936 		if (--count < 0)
937 			return (ENXIO);
938 		DELAY(1);	/* limit looping */
939 	}
940 
941 	return (0);
942 }
943 #endif
944 
945 #undef PCI_VPD_TIMEOUT
946 
947 struct vpd_readstate {
948 	device_t	pcib;
949 	pcicfgregs	*cfg;
950 	uint32_t	val;
951 	int		bytesinval;
952 	int		off;
953 	uint8_t		cksum;
954 };
955 
956 static int
957 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
958 {
959 	uint32_t reg;
960 	uint8_t byte;
961 
962 	if (vrs->bytesinval == 0) {
963 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
964 			return (ENXIO);
965 		vrs->val = le32toh(reg);
966 		vrs->off += 4;
967 		byte = vrs->val & 0xff;
968 		vrs->bytesinval = 3;
969 	} else {
970 		vrs->val = vrs->val >> 8;
971 		byte = vrs->val & 0xff;
972 		vrs->bytesinval--;
973 	}
974 
975 	vrs->cksum += byte;
976 	*data = byte;
977 	return (0);
978 }
979 
980 int
981 pcie_slot_implemented(device_t dev)
982 {
983 	struct pci_devinfo *dinfo = device_get_ivars(dev);
984 
985 	return pcie_slotimpl(&dinfo->cfg);
986 }
987 
988 void
989 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
990 {
991 	uint8_t expr_ptr;
992 	uint16_t val;
993 
994 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
995 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
996 		panic("%s: invalid max read request size 0x%02x",
997 		      device_get_nameunit(dev), rqsize);
998 	}
999 
1000 	expr_ptr = pci_get_pciecap_ptr(dev);
1001 	if (!expr_ptr)
1002 		panic("%s: not PCIe device", device_get_nameunit(dev));
1003 
1004 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1005 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
1006 		if (bootverbose)
1007 			device_printf(dev, "adjust device control 0x%04x", val);
1008 
1009 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
1010 		val |= rqsize;
1011 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
1012 
1013 		if (bootverbose)
1014 			kprintf(" -> 0x%04x\n", val);
1015 	}
1016 }
1017 
1018 uint16_t
1019 pcie_get_max_readrq(device_t dev)
1020 {
1021 	uint8_t expr_ptr;
1022 	uint16_t val;
1023 
1024 	expr_ptr = pci_get_pciecap_ptr(dev);
1025 	if (!expr_ptr)
1026 		panic("%s: not PCIe device", device_get_nameunit(dev));
1027 
1028 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1029 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1030 }
1031 
1032 static void
1033 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1034 {
1035 	struct vpd_readstate vrs;
1036 	int state;
1037 	int name;
1038 	int remain;
1039 	int i;
1040 	int alloc, off;		/* alloc/off for RO/W arrays */
1041 	int cksumvalid;
1042 	int dflen;
1043 	uint8_t byte;
1044 	uint8_t byte2;
1045 
1046 	/* init vpd reader */
1047 	vrs.bytesinval = 0;
1048 	vrs.off = 0;
1049 	vrs.pcib = pcib;
1050 	vrs.cfg = cfg;
1051 	vrs.cksum = 0;
1052 
1053 	state = 0;
1054 	name = remain = i = 0;	/* shut up stupid gcc */
1055 	alloc = off = 0;	/* shut up stupid gcc */
1056 	dflen = 0;		/* shut up stupid gcc */
1057 	cksumvalid = -1;
1058 	while (state >= 0) {
1059 		if (vpd_nextbyte(&vrs, &byte)) {
1060 			state = -2;
1061 			break;
1062 		}
1063 #if 0
1064 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1065 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1066 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1067 #endif
1068 		switch (state) {
1069 		case 0:		/* item name */
1070 			if (byte & 0x80) {
1071 				if (vpd_nextbyte(&vrs, &byte2)) {
1072 					state = -2;
1073 					break;
1074 				}
1075 				remain = byte2;
1076 				if (vpd_nextbyte(&vrs, &byte2)) {
1077 					state = -2;
1078 					break;
1079 				}
1080 				remain |= byte2 << 8;
1081 				if (remain > (0x7f*4 - vrs.off)) {
1082 					state = -1;
1083 					kprintf(
1084 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1085 					    cfg->domain, cfg->bus, cfg->slot,
1086 					    cfg->func, remain);
1087 				}
1088 				name = byte & 0x7f;
1089 			} else {
1090 				remain = byte & 0x7;
1091 				name = (byte >> 3) & 0xf;
1092 			}
1093 			switch (name) {
1094 			case 0x2:	/* String */
1095 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1096 				    M_DEVBUF, M_WAITOK);
1097 				i = 0;
1098 				state = 1;
1099 				break;
1100 			case 0xf:	/* End */
1101 				state = -1;
1102 				break;
1103 			case 0x10:	/* VPD-R */
1104 				alloc = 8;
1105 				off = 0;
1106 				cfg->vpd.vpd_ros = kmalloc(alloc *
1107 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1108 				    M_WAITOK | M_ZERO);
1109 				state = 2;
1110 				break;
1111 			case 0x11:	/* VPD-W */
1112 				alloc = 8;
1113 				off = 0;
1114 				cfg->vpd.vpd_w = kmalloc(alloc *
1115 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1116 				    M_WAITOK | M_ZERO);
1117 				state = 5;
1118 				break;
1119 			default:	/* Invalid data, abort */
1120 				state = -1;
1121 				break;
1122 			}
1123 			break;
1124 
1125 		case 1:	/* Identifier String */
1126 			cfg->vpd.vpd_ident[i++] = byte;
1127 			remain--;
1128 			if (remain == 0)  {
1129 				cfg->vpd.vpd_ident[i] = '\0';
1130 				state = 0;
1131 			}
1132 			break;
1133 
1134 		case 2:	/* VPD-R Keyword Header */
1135 			if (off == alloc) {
1136 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1137 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1138 				    M_DEVBUF, M_WAITOK | M_ZERO);
1139 			}
1140 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1141 			if (vpd_nextbyte(&vrs, &byte2)) {
1142 				state = -2;
1143 				break;
1144 			}
1145 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1146 			if (vpd_nextbyte(&vrs, &byte2)) {
1147 				state = -2;
1148 				break;
1149 			}
1150 			dflen = byte2;
1151 			if (dflen == 0 &&
1152 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1153 			    2) == 0) {
1154 				/*
1155 				 * if this happens, we can't trust the rest
1156 				 * of the VPD.
1157 				 */
1158 				kprintf(
1159 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1160 				    cfg->domain, cfg->bus, cfg->slot,
1161 				    cfg->func, dflen);
1162 				cksumvalid = 0;
1163 				state = -1;
1164 				break;
1165 			} else if (dflen == 0) {
1166 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1167 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1168 				    M_DEVBUF, M_WAITOK);
1169 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1170 			} else
1171 				cfg->vpd.vpd_ros[off].value = kmalloc(
1172 				    (dflen + 1) *
1173 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1174 				    M_DEVBUF, M_WAITOK);
1175 			remain -= 3;
1176 			i = 0;
1177 			/* keep in sync w/ state 3's transistions */
1178 			if (dflen == 0 && remain == 0)
1179 				state = 0;
1180 			else if (dflen == 0)
1181 				state = 2;
1182 			else
1183 				state = 3;
1184 			break;
1185 
1186 		case 3:	/* VPD-R Keyword Value */
1187 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1188 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1189 			    "RV", 2) == 0 && cksumvalid == -1) {
1190 				if (vrs.cksum == 0)
1191 					cksumvalid = 1;
1192 				else {
1193 					if (bootverbose)
1194 						kprintf(
1195 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1196 						    cfg->domain, cfg->bus,
1197 						    cfg->slot, cfg->func,
1198 						    vrs.cksum);
1199 					cksumvalid = 0;
1200 					state = -1;
1201 					break;
1202 				}
1203 			}
1204 			dflen--;
1205 			remain--;
1206 			/* keep in sync w/ state 2's transistions */
1207 			if (dflen == 0)
1208 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1209 			if (dflen == 0 && remain == 0) {
1210 				cfg->vpd.vpd_rocnt = off;
1211 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1212 				    off * sizeof(*cfg->vpd.vpd_ros),
1213 				    M_DEVBUF, M_WAITOK | M_ZERO);
1214 				state = 0;
1215 			} else if (dflen == 0)
1216 				state = 2;
1217 			break;
1218 
1219 		case 4:
1220 			remain--;
1221 			if (remain == 0)
1222 				state = 0;
1223 			break;
1224 
1225 		case 5:	/* VPD-W Keyword Header */
1226 			if (off == alloc) {
1227 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1228 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1229 				    M_DEVBUF, M_WAITOK | M_ZERO);
1230 			}
1231 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1232 			if (vpd_nextbyte(&vrs, &byte2)) {
1233 				state = -2;
1234 				break;
1235 			}
1236 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1237 			if (vpd_nextbyte(&vrs, &byte2)) {
1238 				state = -2;
1239 				break;
1240 			}
1241 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1242 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1243 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1244 			    sizeof(*cfg->vpd.vpd_w[off].value),
1245 			    M_DEVBUF, M_WAITOK);
1246 			remain -= 3;
1247 			i = 0;
1248 			/* keep in sync w/ state 6's transistions */
1249 			if (dflen == 0 && remain == 0)
1250 				state = 0;
1251 			else if (dflen == 0)
1252 				state = 5;
1253 			else
1254 				state = 6;
1255 			break;
1256 
1257 		case 6:	/* VPD-W Keyword Value */
1258 			cfg->vpd.vpd_w[off].value[i++] = byte;
1259 			dflen--;
1260 			remain--;
1261 			/* keep in sync w/ state 5's transistions */
1262 			if (dflen == 0)
1263 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1264 			if (dflen == 0 && remain == 0) {
1265 				cfg->vpd.vpd_wcnt = off;
1266 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1267 				    off * sizeof(*cfg->vpd.vpd_w),
1268 				    M_DEVBUF, M_WAITOK | M_ZERO);
1269 				state = 0;
1270 			} else if (dflen == 0)
1271 				state = 5;
1272 			break;
1273 
1274 		default:
1275 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1276 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1277 			    state);
1278 			state = -1;
1279 			break;
1280 		}
1281 	}
1282 
1283 	if (cksumvalid == 0 || state < -1) {
1284 		/* read-only data bad, clean up */
1285 		if (cfg->vpd.vpd_ros != NULL) {
1286 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1287 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1288 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1289 			cfg->vpd.vpd_ros = NULL;
1290 		}
1291 	}
1292 	if (state < -1) {
1293 		/* I/O error, clean up */
1294 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1295 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1296 		if (cfg->vpd.vpd_ident != NULL) {
1297 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1298 			cfg->vpd.vpd_ident = NULL;
1299 		}
1300 		if (cfg->vpd.vpd_w != NULL) {
1301 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1302 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1303 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1304 			cfg->vpd.vpd_w = NULL;
1305 		}
1306 	}
1307 	cfg->vpd.vpd_cached = 1;
1308 #undef REG
1309 #undef WREG
1310 }
1311 
1312 int
1313 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1314 {
1315 	struct pci_devinfo *dinfo = device_get_ivars(child);
1316 	pcicfgregs *cfg = &dinfo->cfg;
1317 
1318 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1319 		pci_read_vpd(device_get_parent(dev), cfg);
1320 
1321 	*identptr = cfg->vpd.vpd_ident;
1322 
1323 	if (*identptr == NULL)
1324 		return (ENXIO);
1325 
1326 	return (0);
1327 }
1328 
1329 int
1330 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1331 	const char **vptr)
1332 {
1333 	struct pci_devinfo *dinfo = device_get_ivars(child);
1334 	pcicfgregs *cfg = &dinfo->cfg;
1335 	int i;
1336 
1337 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1338 		pci_read_vpd(device_get_parent(dev), cfg);
1339 
1340 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1341 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1342 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1343 			*vptr = cfg->vpd.vpd_ros[i].value;
1344 		}
1345 
1346 	if (i != cfg->vpd.vpd_rocnt)
1347 		return (0);
1348 
1349 	*vptr = NULL;
1350 	return (ENXIO);
1351 }
1352 
1353 /*
1354  * Return the offset in configuration space of the requested extended
1355  * capability entry or 0 if the specified capability was not found.
1356  */
1357 int
1358 pci_find_extcap_method(device_t dev, device_t child, int capability,
1359     int *capreg)
1360 {
1361 	struct pci_devinfo *dinfo = device_get_ivars(child);
1362 	pcicfgregs *cfg = &dinfo->cfg;
1363 	u_int32_t status;
1364 	u_int8_t ptr;
1365 
1366 	/*
1367 	 * Check the CAP_LIST bit of the PCI status register first.
1368 	 */
1369 	status = pci_read_config(child, PCIR_STATUS, 2);
1370 	if (!(status & PCIM_STATUS_CAPPRESENT))
1371 		return (ENXIO);
1372 
1373 	/*
1374 	 * Determine the start pointer of the capabilities list.
1375 	 */
1376 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1377 	case 0:
1378 	case 1:
1379 		ptr = PCIR_CAP_PTR;
1380 		break;
1381 	case 2:
1382 		ptr = PCIR_CAP_PTR_2;
1383 		break;
1384 	default:
1385 		/* XXX: panic? */
1386 		return (ENXIO);		/* no extended capabilities support */
1387 	}
1388 	ptr = pci_read_config(child, ptr, 1);
1389 
1390 	/*
1391 	 * Traverse the capabilities list.
1392 	 */
1393 	while (ptr != 0) {
1394 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1395 			if (capreg != NULL)
1396 				*capreg = ptr;
1397 			return (0);
1398 		}
1399 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1400 	}
1401 
1402 	return (ENOENT);
1403 }
1404 
1405 /*
1406  * Support for MSI-X message interrupts.
1407  */
1408 static void
1409 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1410     uint32_t data)
1411 {
1412 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1413 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1414 	uint32_t offset;
1415 
1416 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1417 	offset = msix->msix_table_offset + index * 16;
1418 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1419 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1420 	bus_write_4(msix->msix_table_res, offset + 8, data);
1421 
1422 	/* Enable MSI -> HT mapping. */
1423 	pci_ht_map_msi(dev, address);
1424 }
1425 
1426 static void
1427 pci_mask_msix_vector(device_t dev, u_int index)
1428 {
1429 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431 	uint32_t offset, val;
1432 
1433 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1434 	offset = msix->msix_table_offset + index * 16 + 12;
1435 	val = bus_read_4(msix->msix_table_res, offset);
1436 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1437 		val |= PCIM_MSIX_VCTRL_MASK;
1438 		bus_write_4(msix->msix_table_res, offset, val);
1439 	}
1440 }
1441 
1442 static void
1443 pci_unmask_msix_vector(device_t dev, u_int index)
1444 {
1445 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1446 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1447 	uint32_t offset, val;
1448 
1449 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1450 	offset = msix->msix_table_offset + index * 16 + 12;
1451 	val = bus_read_4(msix->msix_table_res, offset);
1452 	if (val & PCIM_MSIX_VCTRL_MASK) {
1453 		val &= ~PCIM_MSIX_VCTRL_MASK;
1454 		bus_write_4(msix->msix_table_res, offset, val);
1455 	}
1456 }
1457 
1458 int
1459 pci_pending_msix_vector(device_t dev, u_int index)
1460 {
1461 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1462 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1463 	uint32_t offset, bit;
1464 
1465 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1466 	    ("MSI-X is not setup yet"));
1467 
1468 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1469 	offset = msix->msix_pba_offset + (index / 32) * 4;
1470 	bit = 1 << index % 32;
1471 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1472 }
1473 
1474 /*
1475  * Restore MSI-X registers and table during resume.  If MSI-X is
1476  * enabled then walk the virtual table to restore the actual MSI-X
1477  * table.
1478  */
1479 static void
1480 pci_resume_msix(device_t dev)
1481 {
1482 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1483 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1484 
1485 	if (msix->msix_table_res != NULL) {
1486 		const struct msix_vector *mv;
1487 
1488 		pci_mask_msix_allvectors(dev);
1489 
1490 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1491 			u_int vector;
1492 
1493 			if (mv->mv_address == 0)
1494 				continue;
1495 
1496 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1497 			pci_setup_msix_vector(dev, vector,
1498 			    mv->mv_address, mv->mv_data);
1499 			pci_unmask_msix_vector(dev, vector);
1500 		}
1501 	}
1502 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1503 	    msix->msix_ctrl, 2);
1504 }
1505 
1506 /*
1507  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1508  *
1509  * After this function returns, the MSI-X's rid will be saved in rid0.
1510  */
1511 int
1512 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1513     int *rid0, int cpuid)
1514 {
1515 	struct pci_devinfo *dinfo = device_get_ivars(child);
1516 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1517 	struct msix_vector *mv;
1518 	struct resource_list_entry *rle;
1519 	int error, irq, rid;
1520 
1521 	KASSERT(msix->msix_table_res != NULL &&
1522 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1523 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1524 	KASSERT(vector < msix->msix_msgnum,
1525 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1526 
1527 	if (bootverbose) {
1528 		device_printf(child,
1529 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1530 		    vector, msix->msix_msgnum);
1531 	}
1532 
1533 	/* Set rid according to vector number */
1534 	rid = PCI_MSIX_VEC2RID(vector);
1535 
1536 	/* Vector has already been allocated */
1537 	mv = pci_find_msix_vector(child, rid);
1538 	if (mv != NULL)
1539 		return EBUSY;
1540 
1541 	/* Allocate a message. */
1542 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1543 	if (error)
1544 		return error;
1545 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1546 	    irq, irq, 1, cpuid);
1547 
1548 	if (bootverbose) {
1549 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1550 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1551 		    rle->start, cpuid);
1552 	}
1553 
1554 	/* Update counts of alloc'd messages. */
1555 	msix->msix_alloc++;
1556 
1557 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1558 	mv->mv_rid = rid;
1559 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1560 
1561 	*rid0 = rid;
1562 	return 0;
1563 }
1564 
1565 int
1566 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1567 {
1568 	struct pci_devinfo *dinfo = device_get_ivars(child);
1569 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1570 	struct resource_list_entry *rle;
1571 	struct msix_vector *mv;
1572 	int irq, cpuid;
1573 
1574 	KASSERT(msix->msix_table_res != NULL &&
1575 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1576 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1577 	KASSERT(rid > 0, ("invalid rid %d", rid));
1578 
1579 	mv = pci_find_msix_vector(child, rid);
1580 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1581 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1582 
1583 	/* Make sure resource is no longer allocated. */
1584 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1585 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1586 	KASSERT(rle->res == NULL,
1587 	    ("MSI-X resource is still allocated, rid %d", rid));
1588 
1589 	irq = rle->start;
1590 	cpuid = rle->cpuid;
1591 
1592 	/* Free the resource list entries. */
1593 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1594 
1595 	/* Release the IRQ. */
1596 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1597 
1598 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1599 	kfree(mv, M_DEVBUF);
1600 
1601 	msix->msix_alloc--;
1602 	return (0);
1603 }
1604 
1605 /*
1606  * Return the max supported MSI-X messages this device supports.
1607  * Basically, assuming the MD code can alloc messages, this function
1608  * should return the maximum value that pci_alloc_msix() can return.
1609  * Thus, it is subject to the tunables, etc.
1610  */
1611 int
1612 pci_msix_count_method(device_t dev, device_t child)
1613 {
1614 	struct pci_devinfo *dinfo = device_get_ivars(child);
1615 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1616 
1617 	if (pci_do_msix && msix->msix_location != 0)
1618 		return (msix->msix_msgnum);
1619 	return (0);
1620 }
1621 
1622 int
1623 pci_setup_msix(device_t dev)
1624 {
1625 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1626 	pcicfgregs *cfg = &dinfo->cfg;
1627 	struct resource_list_entry *rle;
1628 	struct resource *table_res, *pba_res;
1629 
1630 	KASSERT(cfg->msix.msix_table_res == NULL &&
1631 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1632 
1633 	/* If rid 0 is allocated, then fail. */
1634 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1635 	if (rle != NULL && rle->res != NULL)
1636 		return (ENXIO);
1637 
1638 	/* Already have allocated MSIs? */
1639 	if (cfg->msi.msi_alloc != 0)
1640 		return (ENXIO);
1641 
1642 	/* If MSI is blacklisted for this system, fail. */
1643 	if (pci_msi_blacklisted())
1644 		return (ENXIO);
1645 
1646 	/* MSI-X capability present? */
1647 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1648 	    !pci_do_msix)
1649 		return (ENODEV);
1650 
1651 	KASSERT(cfg->msix.msix_alloc == 0 &&
1652 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1653 	    ("MSI-X vector has been allocated"));
1654 
1655 	/* Make sure the appropriate BARs are mapped. */
1656 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1657 	    cfg->msix.msix_table_bar);
1658 	if (rle == NULL || rle->res == NULL ||
1659 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1660 		return (ENXIO);
1661 	table_res = rle->res;
1662 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1663 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1664 		    cfg->msix.msix_pba_bar);
1665 		if (rle == NULL || rle->res == NULL ||
1666 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1667 			return (ENXIO);
1668 	}
1669 	pba_res = rle->res;
1670 
1671 	cfg->msix.msix_table_res = table_res;
1672 	cfg->msix.msix_pba_res = pba_res;
1673 
1674 	pci_mask_msix_allvectors(dev);
1675 
1676 	return 0;
1677 }
1678 
1679 void
1680 pci_teardown_msix(device_t dev)
1681 {
1682 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1683 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1684 
1685 	KASSERT(msix->msix_table_res != NULL &&
1686 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1687 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1688 	    ("MSI-X vector is still allocated"));
1689 
1690 	pci_mask_msix_allvectors(dev);
1691 
1692 	msix->msix_table_res = NULL;
1693 	msix->msix_pba_res = NULL;
1694 }
1695 
1696 void
1697 pci_enable_msix(device_t dev)
1698 {
1699 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1701 
1702 	KASSERT(msix->msix_table_res != NULL &&
1703 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1704 
1705 	/* Update control register to enable MSI-X. */
1706 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1707 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1708 	    msix->msix_ctrl, 2);
1709 }
1710 
1711 void
1712 pci_disable_msix(device_t dev)
1713 {
1714 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1715 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1716 
1717 	KASSERT(msix->msix_table_res != NULL &&
1718 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1719 
1720 	/* Disable MSI -> HT mapping. */
1721 	pci_ht_map_msi(dev, 0);
1722 
1723 	/* Update control register to disable MSI-X. */
1724 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1725 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1726 	    msix->msix_ctrl, 2);
1727 }
1728 
1729 static void
1730 pci_mask_msix_allvectors(device_t dev)
1731 {
1732 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1733 	u_int i;
1734 
1735 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1736 		pci_mask_msix_vector(dev, i);
1737 }
1738 
1739 static struct msix_vector *
1740 pci_find_msix_vector(device_t dev, int rid)
1741 {
1742 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1743 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1744 	struct msix_vector *mv;
1745 
1746 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1747 		if (mv->mv_rid == rid)
1748 			return mv;
1749 	}
1750 	return NULL;
1751 }
1752 
1753 /*
1754  * HyperTransport MSI mapping control
1755  */
1756 void
1757 pci_ht_map_msi(device_t dev, uint64_t addr)
1758 {
1759 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1760 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1761 
1762 	if (!ht->ht_msimap)
1763 		return;
1764 
1765 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1766 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1767 		/* Enable MSI -> HT mapping. */
1768 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1769 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1770 		    ht->ht_msictrl, 2);
1771 	}
1772 
1773 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1774 		/* Disable MSI -> HT mapping. */
1775 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1776 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1777 		    ht->ht_msictrl, 2);
1778 	}
1779 }
1780 
1781 /*
1782  * Support for MSI message signalled interrupts.
1783  */
1784 static void
1785 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1786 {
1787 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1788 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1789 
1790 	/* Write data and address values. */
1791 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1792 	    address & 0xffffffff, 4);
1793 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1794 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1795 		    address >> 32, 4);
1796 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1797 		    data, 2);
1798 	} else
1799 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1800 		    2);
1801 
1802 	/* Enable MSI in the control register. */
1803 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1804 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1805 	    2);
1806 
1807 	/* Enable MSI -> HT mapping. */
1808 	pci_ht_map_msi(dev, address);
1809 }
1810 
1811 static void
1812 pci_disable_msi(device_t dev)
1813 {
1814 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1815 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1816 
1817 	/* Disable MSI -> HT mapping. */
1818 	pci_ht_map_msi(dev, 0);
1819 
1820 	/* Disable MSI in the control register. */
1821 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1822 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1823 	    2);
1824 }
1825 
1826 /*
1827  * Restore MSI registers during resume.  If MSI is enabled then
1828  * restore the data and address registers in addition to the control
1829  * register.
1830  */
1831 static void
1832 pci_resume_msi(device_t dev)
1833 {
1834 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1835 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1836 	uint64_t address;
1837 	uint16_t data;
1838 
1839 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1840 		address = msi->msi_addr;
1841 		data = msi->msi_data;
1842 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1843 		    address & 0xffffffff, 4);
1844 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1845 			pci_write_config(dev, msi->msi_location +
1846 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1847 			pci_write_config(dev, msi->msi_location +
1848 			    PCIR_MSI_DATA_64BIT, data, 2);
1849 		} else
1850 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1851 			    data, 2);
1852 	}
1853 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1854 	    2);
1855 }
1856 
1857 /*
1858  * Returns true if the specified device is blacklisted because MSI
1859  * doesn't work.
1860  */
1861 int
1862 pci_msi_device_blacklisted(device_t dev)
1863 {
1864 	struct pci_quirk *q;
1865 
1866 	if (!pci_honor_msi_blacklist)
1867 		return (0);
1868 
1869 	for (q = &pci_quirks[0]; q->devid; q++) {
1870 		if (q->devid == pci_get_devid(dev) &&
1871 		    q->type == PCI_QUIRK_DISABLE_MSI)
1872 			return (1);
1873 	}
1874 	return (0);
1875 }
1876 
1877 /*
1878  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1879  * we just check for blacklisted chipsets as represented by the
1880  * host-PCI bridge at device 0:0:0.  In the future, it may become
1881  * necessary to check other system attributes, such as the kenv values
1882  * that give the motherboard manufacturer and model number.
1883  */
1884 static int
1885 pci_msi_blacklisted(void)
1886 {
1887 	device_t dev;
1888 
1889 	if (!pci_honor_msi_blacklist)
1890 		return (0);
1891 
1892 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1893 	if (!(pcie_chipset || pcix_chipset))
1894 		return (1);
1895 
1896 	dev = pci_find_bsf(0, 0, 0);
1897 	if (dev != NULL)
1898 		return (pci_msi_device_blacklisted(dev));
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Attempt to allocate count MSI messages on start_cpuid.
1904  *
1905  * If start_cpuid < 0, then the MSI messages' target CPU will be
1906  * selected automaticly.
1907  *
1908  * If the caller explicitly specified the MSI messages' target CPU,
1909  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1910  * messages on the specified CPU, if the allocation fails due to MD
1911  * does not have enough vectors (EMSGSIZE), then we will try next
1912  * available CPU, until the allocation fails on all CPUs.
1913  *
1914  * EMSGSIZE will be returned, if all available CPUs does not have
1915  * enough vectors for the requested amount of MSI messages.  Caller
1916  * should either reduce the amount of MSI messages to be requested,
1917  * or simply giving up using MSI.
1918  *
1919  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1920  * returned in 'rid' array, if the allocation succeeds.
1921  */
1922 int
1923 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1924     int start_cpuid)
1925 {
1926 	struct pci_devinfo *dinfo = device_get_ivars(child);
1927 	pcicfgregs *cfg = &dinfo->cfg;
1928 	struct resource_list_entry *rle;
1929 	int error, i, irqs[32], cpuid = 0;
1930 	uint16_t ctrl;
1931 
1932 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1933 	    ("invalid MSI count %d", count));
1934 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1935 
1936 	/* If rid 0 is allocated, then fail. */
1937 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1938 	if (rle != NULL && rle->res != NULL)
1939 		return (ENXIO);
1940 
1941 	/* Already have allocated messages? */
1942 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1943 		return (ENXIO);
1944 
1945 	/* If MSI is blacklisted for this system, fail. */
1946 	if (pci_msi_blacklisted())
1947 		return (ENXIO);
1948 
1949 	/* MSI capability present? */
1950 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1951 	    !pci_do_msi)
1952 		return (ENODEV);
1953 
1954 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1955 	    count, cfg->msi.msi_msgnum));
1956 
1957 	if (bootverbose) {
1958 		device_printf(child,
1959 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1960 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1961 	}
1962 
1963 	if (start_cpuid < 0)
1964 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1965 
1966 	error = EINVAL;
1967 	for (i = 0; i < ncpus; ++i) {
1968 		cpuid = (start_cpuid + i) % ncpus;
1969 
1970 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1971 		    cfg->msi.msi_msgnum, irqs, cpuid);
1972 		if (error == 0)
1973 			break;
1974 		else if (error != EMSGSIZE)
1975 			return error;
1976 	}
1977 	if (error)
1978 		return error;
1979 
1980 	/*
1981 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1982 	 * the irqs[] array, so add new resources starting at rid 1.
1983 	 */
1984 	for (i = 0; i < count; i++) {
1985 		rid[i] = i + 1;
1986 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1987 		    irqs[i], irqs[i], 1, cpuid);
1988 	}
1989 
1990 	if (bootverbose) {
1991 		if (count == 1) {
1992 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1993 			    irqs[0], cpuid);
1994 		} else {
1995 			int run;
1996 
1997 			/*
1998 			 * Be fancy and try to print contiguous runs
1999 			 * of IRQ values as ranges.  'run' is true if
2000 			 * we are in a range.
2001 			 */
2002 			device_printf(child, "using IRQs %d", irqs[0]);
2003 			run = 0;
2004 			for (i = 1; i < count; i++) {
2005 
2006 				/* Still in a run? */
2007 				if (irqs[i] == irqs[i - 1] + 1) {
2008 					run = 1;
2009 					continue;
2010 				}
2011 
2012 				/* Finish previous range. */
2013 				if (run) {
2014 					kprintf("-%d", irqs[i - 1]);
2015 					run = 0;
2016 				}
2017 
2018 				/* Start new range. */
2019 				kprintf(",%d", irqs[i]);
2020 			}
2021 
2022 			/* Unfinished range? */
2023 			if (run)
2024 				kprintf("-%d", irqs[count - 1]);
2025 			kprintf(" for MSI on cpu%d\n", cpuid);
2026 		}
2027 	}
2028 
2029 	/* Update control register with count. */
2030 	ctrl = cfg->msi.msi_ctrl;
2031 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2032 	ctrl |= (ffs(count) - 1) << 4;
2033 	cfg->msi.msi_ctrl = ctrl;
2034 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2035 
2036 	/* Update counts of alloc'd messages. */
2037 	cfg->msi.msi_alloc = count;
2038 	cfg->msi.msi_handlers = 0;
2039 	return (0);
2040 }
2041 
2042 /* Release the MSI messages associated with this device. */
2043 int
2044 pci_release_msi_method(device_t dev, device_t child)
2045 {
2046 	struct pci_devinfo *dinfo = device_get_ivars(child);
2047 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2048 	struct resource_list_entry *rle;
2049 	int i, irqs[32], cpuid = -1;
2050 
2051 	/* Do we have any messages to release? */
2052 	if (msi->msi_alloc == 0)
2053 		return (ENODEV);
2054 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2055 
2056 	/* Make sure none of the resources are allocated. */
2057 	if (msi->msi_handlers > 0)
2058 		return (EBUSY);
2059 	for (i = 0; i < msi->msi_alloc; i++) {
2060 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2061 		KASSERT(rle != NULL, ("missing MSI resource"));
2062 		if (rle->res != NULL)
2063 			return (EBUSY);
2064 		if (i == 0) {
2065 			cpuid = rle->cpuid;
2066 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2067 			    ("invalid MSI target cpuid %d", cpuid));
2068 		} else {
2069 			KASSERT(rle->cpuid == cpuid,
2070 			    ("MSI targets different cpus, "
2071 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2072 		}
2073 		irqs[i] = rle->start;
2074 	}
2075 
2076 	/* Update control register with 0 count. */
2077 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2078 	    ("%s: MSI still enabled", __func__));
2079 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2080 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2081 	    msi->msi_ctrl, 2);
2082 
2083 	/* Release the messages. */
2084 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2085 	    cpuid);
2086 	for (i = 0; i < msi->msi_alloc; i++)
2087 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2088 
2089 	/* Update alloc count. */
2090 	msi->msi_alloc = 0;
2091 	msi->msi_addr = 0;
2092 	msi->msi_data = 0;
2093 	return (0);
2094 }
2095 
2096 /*
2097  * Return the max supported MSI messages this device supports.
2098  * Basically, assuming the MD code can alloc messages, this function
2099  * should return the maximum value that pci_alloc_msi() can return.
2100  * Thus, it is subject to the tunables, etc.
2101  */
2102 int
2103 pci_msi_count_method(device_t dev, device_t child)
2104 {
2105 	struct pci_devinfo *dinfo = device_get_ivars(child);
2106 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2107 
2108 	if (pci_do_msi && msi->msi_location != 0)
2109 		return (msi->msi_msgnum);
2110 	return (0);
2111 }
2112 
2113 /* kfree pcicfgregs structure and all depending data structures */
2114 
2115 int
2116 pci_freecfg(struct pci_devinfo *dinfo)
2117 {
2118 	struct devlist *devlist_head;
2119 	int i;
2120 
2121 	devlist_head = &pci_devq;
2122 
2123 	if (dinfo->cfg.vpd.vpd_reg) {
2124 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2125 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2126 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2127 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2128 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2129 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2130 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2131 	}
2132 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2133 	kfree(dinfo, M_DEVBUF);
2134 
2135 	/* increment the generation count */
2136 	pci_generation++;
2137 
2138 	/* we're losing one device */
2139 	pci_numdevs--;
2140 	return (0);
2141 }
2142 
2143 /*
2144  * PCI power manangement
2145  */
2146 int
2147 pci_set_powerstate_method(device_t dev, device_t child, int state)
2148 {
2149 	struct pci_devinfo *dinfo = device_get_ivars(child);
2150 	pcicfgregs *cfg = &dinfo->cfg;
2151 	uint16_t status;
2152 	int oldstate, highest, delay;
2153 
2154 	if (cfg->pp.pp_cap == 0)
2155 		return (EOPNOTSUPP);
2156 
2157 	/*
2158 	 * Optimize a no state change request away.  While it would be OK to
2159 	 * write to the hardware in theory, some devices have shown odd
2160 	 * behavior when going from D3 -> D3.
2161 	 */
2162 	oldstate = pci_get_powerstate(child);
2163 	if (oldstate == state)
2164 		return (0);
2165 
2166 	/*
2167 	 * The PCI power management specification states that after a state
2168 	 * transition between PCI power states, system software must
2169 	 * guarantee a minimal delay before the function accesses the device.
2170 	 * Compute the worst case delay that we need to guarantee before we
2171 	 * access the device.  Many devices will be responsive much more
2172 	 * quickly than this delay, but there are some that don't respond
2173 	 * instantly to state changes.  Transitions to/from D3 state require
2174 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2175 	 * is done below with DELAY rather than a sleeper function because
2176 	 * this function can be called from contexts where we cannot sleep.
2177 	 */
2178 	highest = (oldstate > state) ? oldstate : state;
2179 	if (highest == PCI_POWERSTATE_D3)
2180 	    delay = 10000;
2181 	else if (highest == PCI_POWERSTATE_D2)
2182 	    delay = 200;
2183 	else
2184 	    delay = 0;
2185 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2186 	    & ~PCIM_PSTAT_DMASK;
2187 	switch (state) {
2188 	case PCI_POWERSTATE_D0:
2189 		status |= PCIM_PSTAT_D0;
2190 		break;
2191 	case PCI_POWERSTATE_D1:
2192 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2193 			return (EOPNOTSUPP);
2194 		status |= PCIM_PSTAT_D1;
2195 		break;
2196 	case PCI_POWERSTATE_D2:
2197 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2198 			return (EOPNOTSUPP);
2199 		status |= PCIM_PSTAT_D2;
2200 		break;
2201 	case PCI_POWERSTATE_D3:
2202 		status |= PCIM_PSTAT_D3;
2203 		break;
2204 	default:
2205 		return (EINVAL);
2206 	}
2207 
2208 	if (bootverbose)
2209 		kprintf(
2210 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2211 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2212 		    dinfo->cfg.func, oldstate, state);
2213 
2214 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2215 	if (delay)
2216 		DELAY(delay);
2217 	return (0);
2218 }
2219 
2220 int
2221 pci_get_powerstate_method(device_t dev, device_t child)
2222 {
2223 	struct pci_devinfo *dinfo = device_get_ivars(child);
2224 	pcicfgregs *cfg = &dinfo->cfg;
2225 	uint16_t status;
2226 	int result;
2227 
2228 	if (cfg->pp.pp_cap != 0) {
2229 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2230 		switch (status & PCIM_PSTAT_DMASK) {
2231 		case PCIM_PSTAT_D0:
2232 			result = PCI_POWERSTATE_D0;
2233 			break;
2234 		case PCIM_PSTAT_D1:
2235 			result = PCI_POWERSTATE_D1;
2236 			break;
2237 		case PCIM_PSTAT_D2:
2238 			result = PCI_POWERSTATE_D2;
2239 			break;
2240 		case PCIM_PSTAT_D3:
2241 			result = PCI_POWERSTATE_D3;
2242 			break;
2243 		default:
2244 			result = PCI_POWERSTATE_UNKNOWN;
2245 			break;
2246 		}
2247 	} else {
2248 		/* No support, device is always at D0 */
2249 		result = PCI_POWERSTATE_D0;
2250 	}
2251 	return (result);
2252 }
2253 
2254 /*
2255  * Some convenience functions for PCI device drivers.
2256  */
2257 
2258 static __inline void
2259 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2260 {
2261 	uint16_t	command;
2262 
2263 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2264 	command |= bit;
2265 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2266 }
2267 
2268 static __inline void
2269 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2270 {
2271 	uint16_t	command;
2272 
2273 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2274 	command &= ~bit;
2275 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2276 }
2277 
2278 int
2279 pci_enable_busmaster_method(device_t dev, device_t child)
2280 {
2281 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282 	return (0);
2283 }
2284 
2285 int
2286 pci_disable_busmaster_method(device_t dev, device_t child)
2287 {
2288 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2289 	return (0);
2290 }
2291 
2292 int
2293 pci_enable_io_method(device_t dev, device_t child, int space)
2294 {
2295 	uint16_t command;
2296 	uint16_t bit;
2297 	char *error;
2298 
2299 	bit = 0;
2300 	error = NULL;
2301 
2302 	switch(space) {
2303 	case SYS_RES_IOPORT:
2304 		bit = PCIM_CMD_PORTEN;
2305 		error = "port";
2306 		break;
2307 	case SYS_RES_MEMORY:
2308 		bit = PCIM_CMD_MEMEN;
2309 		error = "memory";
2310 		break;
2311 	default:
2312 		return (EINVAL);
2313 	}
2314 	pci_set_command_bit(dev, child, bit);
2315 	/* Some devices seem to need a brief stall here, what do to? */
2316 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2317 	if (command & bit)
2318 		return (0);
2319 	device_printf(child, "failed to enable %s mapping!\n", error);
2320 	return (ENXIO);
2321 }
2322 
2323 int
2324 pci_disable_io_method(device_t dev, device_t child, int space)
2325 {
2326 	uint16_t command;
2327 	uint16_t bit;
2328 	char *error;
2329 
2330 	bit = 0;
2331 	error = NULL;
2332 
2333 	switch(space) {
2334 	case SYS_RES_IOPORT:
2335 		bit = PCIM_CMD_PORTEN;
2336 		error = "port";
2337 		break;
2338 	case SYS_RES_MEMORY:
2339 		bit = PCIM_CMD_MEMEN;
2340 		error = "memory";
2341 		break;
2342 	default:
2343 		return (EINVAL);
2344 	}
2345 	pci_clear_command_bit(dev, child, bit);
2346 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2347 	if (command & bit) {
2348 		device_printf(child, "failed to disable %s mapping!\n", error);
2349 		return (ENXIO);
2350 	}
2351 	return (0);
2352 }
2353 
2354 /*
2355  * New style pci driver.  Parent device is either a pci-host-bridge or a
2356  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2357  */
2358 
2359 void
2360 pci_print_verbose(struct pci_devinfo *dinfo)
2361 {
2362 
2363 	if (bootverbose) {
2364 		pcicfgregs *cfg = &dinfo->cfg;
2365 
2366 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2367 		    cfg->vendor, cfg->device, cfg->revid);
2368 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2369 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2370 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2371 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2372 		    cfg->mfdev);
2373 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2374 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2375 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2376 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2377 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2378 		if (cfg->intpin > 0)
2379 			kprintf("\tintpin=%c, irq=%d\n",
2380 			    cfg->intpin +'a' -1, cfg->intline);
2381 		if (cfg->pp.pp_cap) {
2382 			uint16_t status;
2383 
2384 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2385 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2386 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2387 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2388 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2389 			    status & PCIM_PSTAT_DMASK);
2390 		}
2391 		if (cfg->msi.msi_location) {
2392 			int ctrl;
2393 
2394 			ctrl = cfg->msi.msi_ctrl;
2395 			kprintf("\tMSI supports %d message%s%s%s\n",
2396 			    cfg->msi.msi_msgnum,
2397 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2398 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2399 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2400 		}
2401 		if (cfg->msix.msix_location) {
2402 			kprintf("\tMSI-X supports %d message%s ",
2403 			    cfg->msix.msix_msgnum,
2404 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2405 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2406 				kprintf("in map 0x%x\n",
2407 				    cfg->msix.msix_table_bar);
2408 			else
2409 				kprintf("in maps 0x%x and 0x%x\n",
2410 				    cfg->msix.msix_table_bar,
2411 				    cfg->msix.msix_pba_bar);
2412 		}
2413 		pci_print_verbose_expr(cfg);
2414 	}
2415 }
2416 
2417 static void
2418 pci_print_verbose_expr(const pcicfgregs *cfg)
2419 {
2420 	const struct pcicfg_expr *expr = &cfg->expr;
2421 	const char *port_name;
2422 	uint16_t port_type;
2423 
2424 	if (!bootverbose)
2425 		return;
2426 
2427 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2428 		return;
2429 
2430 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2431 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2432 
2433 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2434 
2435 	switch (port_type) {
2436 	case PCIE_END_POINT:
2437 		port_name = "DEVICE";
2438 		break;
2439 	case PCIE_LEG_END_POINT:
2440 		port_name = "LEGDEV";
2441 		break;
2442 	case PCIE_ROOT_PORT:
2443 		port_name = "ROOT";
2444 		break;
2445 	case PCIE_UP_STREAM_PORT:
2446 		port_name = "UPSTREAM";
2447 		break;
2448 	case PCIE_DOWN_STREAM_PORT:
2449 		port_name = "DOWNSTRM";
2450 		break;
2451 	case PCIE_PCIE2PCI_BRIDGE:
2452 		port_name = "PCIE2PCI";
2453 		break;
2454 	case PCIE_PCI2PCIE_BRIDGE:
2455 		port_name = "PCI2PCIE";
2456 		break;
2457 	case PCIE_ROOT_END_POINT:
2458 		port_name = "ROOTDEV";
2459 		break;
2460 	case PCIE_ROOT_EVT_COLL:
2461 		port_name = "ROOTEVTC";
2462 		break;
2463 	default:
2464 		port_name = NULL;
2465 		break;
2466 	}
2467 	if ((port_type == PCIE_ROOT_PORT ||
2468 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2469 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2470 		port_name = NULL;
2471 	if (port_name != NULL)
2472 		kprintf("[%s]", port_name);
2473 
2474 	if (pcie_slotimpl(cfg)) {
2475 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2476 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2477 			kprintf("[HOTPLUG]");
2478 	}
2479 	kprintf("\n");
2480 }
2481 
2482 static int
2483 pci_porten(device_t pcib, int b, int s, int f)
2484 {
2485 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2486 		& PCIM_CMD_PORTEN) != 0;
2487 }
2488 
2489 static int
2490 pci_memen(device_t pcib, int b, int s, int f)
2491 {
2492 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2493 		& PCIM_CMD_MEMEN) != 0;
2494 }
2495 
2496 /*
2497  * Add a resource based on a pci map register. Return 1 if the map
2498  * register is a 32bit map register or 2 if it is a 64bit register.
2499  */
2500 static int
2501 pci_add_map(device_t pcib, device_t bus, device_t dev,
2502     int b, int s, int f, int reg, struct resource_list *rl, int force,
2503     int prefetch)
2504 {
2505 	uint32_t map;
2506 	uint16_t old_cmd;
2507 	pci_addr_t base;
2508 	pci_addr_t start, end, count;
2509 	uint8_t ln2size;
2510 	uint8_t ln2range;
2511 	uint32_t testval;
2512 	uint16_t cmd;
2513 	int type;
2514 	int barlen;
2515 	struct resource *res;
2516 
2517 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2518 
2519         /* Disable access to device memory */
2520 	old_cmd = 0;
2521 	if (PCI_BAR_MEM(map)) {
2522 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2523 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2524 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2525 	}
2526 
2527 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2528 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2529 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2530 
2531         /* Restore memory access mode */
2532 	if (PCI_BAR_MEM(map)) {
2533 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2534 	}
2535 
2536 	if (PCI_BAR_MEM(map)) {
2537 		type = SYS_RES_MEMORY;
2538 		if (map & PCIM_BAR_MEM_PREFETCH)
2539 			prefetch = 1;
2540 	} else
2541 		type = SYS_RES_IOPORT;
2542 	ln2size = pci_mapsize(testval);
2543 	ln2range = pci_maprange(testval);
2544 	base = pci_mapbase(map);
2545 	barlen = ln2range == 64 ? 2 : 1;
2546 
2547 	/*
2548 	 * For I/O registers, if bottom bit is set, and the next bit up
2549 	 * isn't clear, we know we have a BAR that doesn't conform to the
2550 	 * spec, so ignore it.  Also, sanity check the size of the data
2551 	 * areas to the type of memory involved.  Memory must be at least
2552 	 * 16 bytes in size, while I/O ranges must be at least 4.
2553 	 */
2554 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2555 		return (barlen);
2556 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2557 	    (type == SYS_RES_IOPORT && ln2size < 2))
2558 		return (barlen);
2559 
2560 	if (ln2range == 64)
2561 		/* Read the other half of a 64bit map register */
2562 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2563 	if (bootverbose) {
2564 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2565 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2566 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2567 			kprintf(", port disabled\n");
2568 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2569 			kprintf(", memory disabled\n");
2570 		else
2571 			kprintf(", enabled\n");
2572 	}
2573 
2574 	/*
2575 	 * If base is 0, then we have problems.  It is best to ignore
2576 	 * such entries for the moment.  These will be allocated later if
2577 	 * the driver specifically requests them.  However, some
2578 	 * removable busses look better when all resources are allocated,
2579 	 * so allow '0' to be overriden.
2580 	 *
2581 	 * Similarly treat maps whose values is the same as the test value
2582 	 * read back.  These maps have had all f's written to them by the
2583 	 * BIOS in an attempt to disable the resources.
2584 	 */
2585 	if (!force && (base == 0 || map == testval))
2586 		return (barlen);
2587 	if ((u_long)base != base) {
2588 		device_printf(bus,
2589 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2590 		    pci_get_domain(dev), b, s, f, reg);
2591 		return (barlen);
2592 	}
2593 
2594 	/*
2595 	 * This code theoretically does the right thing, but has
2596 	 * undesirable side effects in some cases where peripherals
2597 	 * respond oddly to having these bits enabled.  Let the user
2598 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2599 	 * default).
2600 	 */
2601 	if (pci_enable_io_modes) {
2602 		/* Turn on resources that have been left off by a lazy BIOS */
2603 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2604 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2605 			cmd |= PCIM_CMD_PORTEN;
2606 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2607 		}
2608 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2609 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2610 			cmd |= PCIM_CMD_MEMEN;
2611 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2612 		}
2613 	} else {
2614 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2615 			return (barlen);
2616 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2617 			return (barlen);
2618 	}
2619 
2620 	count = 1 << ln2size;
2621 	if (base == 0 || base == pci_mapbase(testval)) {
2622 		start = 0;	/* Let the parent decide. */
2623 		end = ~0ULL;
2624 	} else {
2625 		start = base;
2626 		end = base + (1 << ln2size) - 1;
2627 	}
2628 	resource_list_add(rl, type, reg, start, end, count, -1);
2629 
2630 	/*
2631 	 * Try to allocate the resource for this BAR from our parent
2632 	 * so that this resource range is already reserved.  The
2633 	 * driver for this device will later inherit this resource in
2634 	 * pci_alloc_resource().
2635 	 */
2636 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2637 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2638 	if (res == NULL) {
2639 		/*
2640 		 * If the allocation fails, delete the resource list
2641 		 * entry to force pci_alloc_resource() to allocate
2642 		 * resources from the parent.
2643 		 */
2644 		resource_list_delete(rl, type, reg);
2645 #ifdef PCI_BAR_CLEAR
2646 		/* Clear the BAR */
2647 		start = 0;
2648 #else	/* !PCI_BAR_CLEAR */
2649 		/*
2650 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2651 		 * PCI function, clearing the BAR causes HPET timer
2652 		 * stop ticking.
2653 		 */
2654 		if (bootverbose) {
2655 			kprintf("pci:%d:%d:%d: resource reservation failed "
2656 				"%#jx - %#jx\n", b, s, f,
2657 				(intmax_t)start, (intmax_t)end);
2658 		}
2659 		return (barlen);
2660 #endif	/* PCI_BAR_CLEAR */
2661 	} else {
2662 		start = rman_get_start(res);
2663 	}
2664 	pci_write_config(dev, reg, start, 4);
2665 	if (ln2range == 64)
2666 		pci_write_config(dev, reg + 4, start >> 32, 4);
2667 	return (barlen);
2668 }
2669 
2670 /*
2671  * For ATA devices we need to decide early what addressing mode to use.
2672  * Legacy demands that the primary and secondary ATA ports sits on the
2673  * same addresses that old ISA hardware did. This dictates that we use
2674  * those addresses and ignore the BAR's if we cannot set PCI native
2675  * addressing mode.
2676  */
2677 static void
2678 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2679     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2680 {
2681 	int rid, type, progif;
2682 #if 0
2683 	/* if this device supports PCI native addressing use it */
2684 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2685 	if ((progif & 0x8a) == 0x8a) {
2686 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2687 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2688 			kprintf("Trying ATA native PCI addressing mode\n");
2689 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2690 		}
2691 	}
2692 #endif
2693 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2694 	type = SYS_RES_IOPORT;
2695 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2696 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2697 		    prefetchmask & (1 << 0));
2698 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2699 		    prefetchmask & (1 << 1));
2700 	} else {
2701 		rid = PCIR_BAR(0);
2702 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2703 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2704 		    0, -1);
2705 		rid = PCIR_BAR(1);
2706 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2707 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2708 		    0, -1);
2709 	}
2710 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2711 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2712 		    prefetchmask & (1 << 2));
2713 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2714 		    prefetchmask & (1 << 3));
2715 	} else {
2716 		rid = PCIR_BAR(2);
2717 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2718 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2719 		    0, -1);
2720 		rid = PCIR_BAR(3);
2721 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2722 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2723 		    0, -1);
2724 	}
2725 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2726 	    prefetchmask & (1 << 4));
2727 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2728 	    prefetchmask & (1 << 5));
2729 }
2730 
2731 static void
2732 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2733 {
2734 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2735 	pcicfgregs *cfg = &dinfo->cfg;
2736 	char tunable_name[64];
2737 	int irq;
2738 
2739 	/* Has to have an intpin to have an interrupt. */
2740 	if (cfg->intpin == 0)
2741 		return;
2742 
2743 	/* Let the user override the IRQ with a tunable. */
2744 	irq = PCI_INVALID_IRQ;
2745 	ksnprintf(tunable_name, sizeof(tunable_name),
2746 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2747 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2748 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2749 		if (irq >= 255 || irq <= 0) {
2750 			irq = PCI_INVALID_IRQ;
2751 		} else {
2752 			if (machintr_legacy_intr_find(irq,
2753 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2754 				device_printf(dev,
2755 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2756 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2757 				    cfg->intpin + 'A' - 1, irq);
2758 				irq = PCI_INVALID_IRQ;
2759 			} else {
2760 				BUS_CONFIG_INTR(bus, dev, irq,
2761 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2762 			}
2763 		}
2764 	}
2765 
2766 	/*
2767 	 * If we didn't get an IRQ via the tunable, then we either use the
2768 	 * IRQ value in the intline register or we ask the bus to route an
2769 	 * interrupt for us.  If force_route is true, then we only use the
2770 	 * value in the intline register if the bus was unable to assign an
2771 	 * IRQ.
2772 	 */
2773 	if (!PCI_INTERRUPT_VALID(irq)) {
2774 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2775 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2776 		if (!PCI_INTERRUPT_VALID(irq))
2777 			irq = cfg->intline;
2778 	}
2779 
2780 	/* If after all that we don't have an IRQ, just bail. */
2781 	if (!PCI_INTERRUPT_VALID(irq))
2782 		return;
2783 
2784 	/* Update the config register if it changed. */
2785 	if (irq != cfg->intline) {
2786 		cfg->intline = irq;
2787 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2788 	}
2789 
2790 	/* Add this IRQ as rid 0 interrupt resource. */
2791 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2792 	    machintr_legacy_intr_cpuid(irq));
2793 }
2794 
2795 /* Perform early OHCI takeover from SMM. */
2796 static void
2797 ohci_early_takeover(device_t self)
2798 {
2799 	struct resource *res;
2800 	uint32_t ctl;
2801 	int rid;
2802 	int i;
2803 
2804 	rid = PCIR_BAR(0);
2805 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2806 	if (res == NULL)
2807 		return;
2808 
2809 	ctl = bus_read_4(res, OHCI_CONTROL);
2810 	if (ctl & OHCI_IR) {
2811 		if (bootverbose)
2812 			kprintf("ohci early: "
2813 			    "SMM active, request owner change\n");
2814 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2815 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2816 			DELAY(1000);
2817 			ctl = bus_read_4(res, OHCI_CONTROL);
2818 		}
2819 		if (ctl & OHCI_IR) {
2820 			if (bootverbose)
2821 				kprintf("ohci early: "
2822 				    "SMM does not respond, resetting\n");
2823 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2824 		}
2825 		/* Disable interrupts */
2826 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2827 	}
2828 
2829 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2830 }
2831 
2832 /* Perform early UHCI takeover from SMM. */
2833 static void
2834 uhci_early_takeover(device_t self)
2835 {
2836 	struct resource *res;
2837 	int rid;
2838 
2839 	/*
2840 	 * Set the PIRQD enable bit and switch off all the others. We don't
2841 	 * want legacy support to interfere with us XXX Does this also mean
2842 	 * that the BIOS won't touch the keyboard anymore if it is connected
2843 	 * to the ports of the root hub?
2844 	 */
2845 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2846 
2847 	/* Disable interrupts */
2848 	rid = PCI_UHCI_BASE_REG;
2849 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2850 	if (res != NULL) {
2851 		bus_write_2(res, UHCI_INTR, 0);
2852 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2853 	}
2854 }
2855 
2856 /* Perform early EHCI takeover from SMM. */
2857 static void
2858 ehci_early_takeover(device_t self)
2859 {
2860 	struct resource *res;
2861 	uint32_t cparams;
2862 	uint32_t eec;
2863 	uint32_t eecp;
2864 	uint32_t bios_sem;
2865 	uint32_t offs;
2866 	int rid;
2867 	int i;
2868 
2869 	rid = PCIR_BAR(0);
2870 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2871 	if (res == NULL)
2872 		return;
2873 
2874 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2875 
2876 	/* Synchronise with the BIOS if it owns the controller. */
2877 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2878 	    eecp = EHCI_EECP_NEXT(eec)) {
2879 		eec = pci_read_config(self, eecp, 4);
2880 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2881 			continue;
2882 		}
2883 		bios_sem = pci_read_config(self, eecp +
2884 		    EHCI_LEGSUP_BIOS_SEM, 1);
2885 		if (bios_sem == 0) {
2886 			continue;
2887 		}
2888 		if (bootverbose)
2889 			kprintf("ehci early: "
2890 			    "SMM active, request owner change\n");
2891 
2892 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2893 
2894 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2895 			DELAY(1000);
2896 			bios_sem = pci_read_config(self, eecp +
2897 			    EHCI_LEGSUP_BIOS_SEM, 1);
2898 		}
2899 
2900 		if (bios_sem != 0) {
2901 			if (bootverbose)
2902 				kprintf("ehci early: "
2903 				    "SMM does not respond\n");
2904 		}
2905 		/* Disable interrupts */
2906 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2907 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2908 	}
2909 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2910 }
2911 
2912 /* Perform early XHCI takeover from SMM. */
2913 static void
2914 xhci_early_takeover(device_t self)
2915 {
2916 	struct resource *res;
2917 	uint32_t cparams;
2918 	uint32_t eec;
2919 	uint32_t eecp;
2920 	uint32_t bios_sem;
2921 	uint32_t offs;
2922 	int rid;
2923 	int i;
2924 
2925 	rid = PCIR_BAR(0);
2926 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2927 	if (res == NULL)
2928 		return;
2929 
2930 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2931 
2932 	eec = -1;
2933 
2934 	/* Synchronise with the BIOS if it owns the controller. */
2935 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2936 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2937 		eec = bus_read_4(res, eecp);
2938 
2939 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2940 			continue;
2941 
2942 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2943 
2944 		if (bios_sem == 0) {
2945 			if (bootverbose)
2946 				kprintf("xhci early: xhci is not owned by SMM\n");
2947 
2948 			continue;
2949 		}
2950 
2951 		if (bootverbose)
2952 			kprintf("xhci early: "
2953 			    "SMM active, request owner change\n");
2954 
2955 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2956 
2957 		/* wait a maximum of 5 seconds */
2958 
2959 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2960 			DELAY(1000);
2961 
2962 			bios_sem = bus_read_1(res, eecp +
2963 			    XHCI_XECP_BIOS_SEM);
2964 		}
2965 
2966 		if (bios_sem != 0) {
2967 			if (bootverbose) {
2968 				kprintf("xhci early: "
2969 				    "SMM does not respond\n");
2970 				kprintf("xhci early: "
2971 				    "taking xhci by force\n");
2972 			}
2973 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2974 		} else {
2975 			if (bootverbose)
2976 				kprintf("xhci early: "
2977 				    "handover successful\n");
2978 		}
2979 
2980 		/* Disable interrupts */
2981 		offs = bus_read_1(res, XHCI_CAPLENGTH);
2982 		bus_write_4(res, offs + XHCI_USBCMD, 0);
2983 		bus_read_4(res, offs + XHCI_USBSTS);
2984 	}
2985 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2986 }
2987 
2988 void
2989 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2990 {
2991 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2992 	pcicfgregs *cfg = &dinfo->cfg;
2993 	struct resource_list *rl = &dinfo->resources;
2994 	struct pci_quirk *q;
2995 	int b, i, f, s;
2996 
2997 	b = cfg->bus;
2998 	s = cfg->slot;
2999 	f = cfg->func;
3000 
3001 	/* ATA devices needs special map treatment */
3002 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3003 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3004 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3005 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3006 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3007 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
3008 	else
3009 		for (i = 0; i < cfg->nummaps;)
3010 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
3011 			    rl, force, prefetchmask & (1 << i));
3012 
3013 	/*
3014 	 * Add additional, quirked resources.
3015 	 */
3016 	for (q = &pci_quirks[0]; q->devid; q++) {
3017 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3018 		    && q->type == PCI_QUIRK_MAP_REG)
3019 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3020 			  force, 0);
3021 	}
3022 
3023 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3024 		/*
3025 		 * Try to re-route interrupts. Sometimes the BIOS or
3026 		 * firmware may leave bogus values in these registers.
3027 		 * If the re-route fails, then just stick with what we
3028 		 * have.
3029 		 */
3030 		pci_assign_interrupt(bus, dev, 1);
3031 	}
3032 
3033 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3034 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3035 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3036 			xhci_early_takeover(dev);
3037 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3038 			ehci_early_takeover(dev);
3039 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3040 			ohci_early_takeover(dev);
3041 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3042 			uhci_early_takeover(dev);
3043 	}
3044 }
3045 
3046 void
3047 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3048 {
3049 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3050 	device_t pcib = device_get_parent(dev);
3051 	struct pci_devinfo *dinfo;
3052 	int maxslots;
3053 	int s, f, pcifunchigh;
3054 	uint8_t hdrtype;
3055 
3056 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3057 	    ("dinfo_size too small"));
3058 	maxslots = PCIB_MAXSLOTS(pcib);
3059 	for (s = 0; s <= maxslots; s++) {
3060 		pcifunchigh = 0;
3061 		f = 0;
3062 		DELAY(1);
3063 		hdrtype = REG(PCIR_HDRTYPE, 1);
3064 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3065 			continue;
3066 		if (hdrtype & PCIM_MFDEV)
3067 			pcifunchigh = PCI_FUNCMAX;
3068 		for (f = 0; f <= pcifunchigh; f++) {
3069 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3070 			    dinfo_size);
3071 			if (dinfo != NULL) {
3072 				pci_add_child(dev, dinfo);
3073 			}
3074 		}
3075 	}
3076 #undef REG
3077 }
3078 
3079 void
3080 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3081 {
3082 	device_t pcib;
3083 
3084 	pcib = device_get_parent(bus);
3085 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3086 	device_set_ivars(dinfo->cfg.dev, dinfo);
3087 	resource_list_init(&dinfo->resources);
3088 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3089 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3090 	pci_print_verbose(dinfo);
3091 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3092 }
3093 
3094 static int
3095 pci_probe(device_t dev)
3096 {
3097 	device_set_desc(dev, "PCI bus");
3098 
3099 	/* Allow other subclasses to override this driver. */
3100 	return (-1000);
3101 }
3102 
3103 static int
3104 pci_attach(device_t dev)
3105 {
3106 	int busno, domain;
3107 
3108 	/*
3109 	 * Since there can be multiple independantly numbered PCI
3110 	 * busses on systems with multiple PCI domains, we can't use
3111 	 * the unit number to decide which bus we are probing. We ask
3112 	 * the parent pcib what our domain and bus numbers are.
3113 	 */
3114 	domain = pcib_get_domain(dev);
3115 	busno = pcib_get_bus(dev);
3116 	if (bootverbose)
3117 		device_printf(dev, "domain=%d, physical bus=%d\n",
3118 		    domain, busno);
3119 
3120 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3121 
3122 	return (bus_generic_attach(dev));
3123 }
3124 
3125 int
3126 pci_suspend(device_t dev)
3127 {
3128 	int dstate, error, i, numdevs;
3129 	device_t acpi_dev, child, *devlist;
3130 	struct pci_devinfo *dinfo;
3131 
3132 	/*
3133 	 * Save the PCI configuration space for each child and set the
3134 	 * device in the appropriate power state for this sleep state.
3135 	 */
3136 	acpi_dev = NULL;
3137 	if (pci_do_power_resume)
3138 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3139 	device_get_children(dev, &devlist, &numdevs);
3140 	for (i = 0; i < numdevs; i++) {
3141 		child = devlist[i];
3142 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3143 		pci_cfg_save(child, dinfo, 0);
3144 	}
3145 
3146 	/* Suspend devices before potentially powering them down. */
3147 	error = bus_generic_suspend(dev);
3148 	if (error) {
3149 		kfree(devlist, M_TEMP);
3150 		return (error);
3151 	}
3152 
3153 	/*
3154 	 * Always set the device to D3.  If ACPI suggests a different
3155 	 * power state, use it instead.  If ACPI is not present, the
3156 	 * firmware is responsible for managing device power.  Skip
3157 	 * children who aren't attached since they are powered down
3158 	 * separately.  Only manage type 0 devices for now.
3159 	 */
3160 	for (i = 0; acpi_dev && i < numdevs; i++) {
3161 		child = devlist[i];
3162 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3163 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3164 			dstate = PCI_POWERSTATE_D3;
3165 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3166 			pci_set_powerstate(child, dstate);
3167 		}
3168 	}
3169 	kfree(devlist, M_TEMP);
3170 	return (0);
3171 }
3172 
3173 int
3174 pci_resume(device_t dev)
3175 {
3176 	int i, numdevs;
3177 	device_t acpi_dev, child, *devlist;
3178 	struct pci_devinfo *dinfo;
3179 
3180 	/*
3181 	 * Set each child to D0 and restore its PCI configuration space.
3182 	 */
3183 	acpi_dev = NULL;
3184 	if (pci_do_power_resume)
3185 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3186 	device_get_children(dev, &devlist, &numdevs);
3187 	for (i = 0; i < numdevs; i++) {
3188 		/*
3189 		 * Notify ACPI we're going to D0 but ignore the result.  If
3190 		 * ACPI is not present, the firmware is responsible for
3191 		 * managing device power.  Only manage type 0 devices for now.
3192 		 */
3193 		child = devlist[i];
3194 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3195 		if (acpi_dev && device_is_attached(child) &&
3196 		    dinfo->cfg.hdrtype == 0) {
3197 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3198 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3199 		}
3200 
3201 		/* Now the device is powered up, restore its config space. */
3202 		pci_cfg_restore(child, dinfo);
3203 	}
3204 	kfree(devlist, M_TEMP);
3205 	return (bus_generic_resume(dev));
3206 }
3207 
3208 static void
3209 pci_load_vendor_data(void)
3210 {
3211 	caddr_t vendordata, info;
3212 
3213 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3214 		info = preload_search_info(vendordata, MODINFO_ADDR);
3215 		pci_vendordata = *(char **)info;
3216 		info = preload_search_info(vendordata, MODINFO_SIZE);
3217 		pci_vendordata_size = *(size_t *)info;
3218 		/* terminate the database */
3219 		pci_vendordata[pci_vendordata_size] = '\n';
3220 	}
3221 }
3222 
3223 void
3224 pci_driver_added(device_t dev, driver_t *driver)
3225 {
3226 	int numdevs;
3227 	device_t *devlist;
3228 	device_t child;
3229 	struct pci_devinfo *dinfo;
3230 	int i;
3231 
3232 	if (bootverbose)
3233 		device_printf(dev, "driver added\n");
3234 	DEVICE_IDENTIFY(driver, dev);
3235 	device_get_children(dev, &devlist, &numdevs);
3236 	for (i = 0; i < numdevs; i++) {
3237 		child = devlist[i];
3238 		if (device_get_state(child) != DS_NOTPRESENT)
3239 			continue;
3240 		dinfo = device_get_ivars(child);
3241 		pci_print_verbose(dinfo);
3242 		if (bootverbose)
3243 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3244 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3245 			    dinfo->cfg.func);
3246 		pci_cfg_restore(child, dinfo);
3247 		if (device_probe_and_attach(child) != 0)
3248 			pci_cfg_save(child, dinfo, 1);
3249 	}
3250 	kfree(devlist, M_TEMP);
3251 }
3252 
3253 static void
3254 pci_child_detached(device_t parent __unused, device_t child)
3255 {
3256 	/* Turn child's power off */
3257 	pci_cfg_save(child, device_get_ivars(child), 1);
3258 }
3259 
3260 int
3261 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3262     driver_intr_t *intr, void *arg, void **cookiep,
3263     lwkt_serialize_t serializer, const char *desc)
3264 {
3265 	int rid, error;
3266 	void *cookie;
3267 
3268 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3269 	    arg, &cookie, serializer, desc);
3270 	if (error)
3271 		return (error);
3272 
3273 	/* If this is not a direct child, just bail out. */
3274 	if (device_get_parent(child) != dev) {
3275 		*cookiep = cookie;
3276 		return(0);
3277 	}
3278 
3279 	rid = rman_get_rid(irq);
3280 	if (rid == 0) {
3281 		/* Make sure that INTx is enabled */
3282 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3283 	} else {
3284 		struct pci_devinfo *dinfo = device_get_ivars(child);
3285 		uint64_t addr;
3286 		uint32_t data;
3287 
3288 		/*
3289 		 * Check to see if the interrupt is MSI or MSI-X.
3290 		 * Ask our parent to map the MSI and give
3291 		 * us the address and data register values.
3292 		 * If we fail for some reason, teardown the
3293 		 * interrupt handler.
3294 		 */
3295 		if (dinfo->cfg.msi.msi_alloc > 0) {
3296 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3297 
3298 			if (msi->msi_addr == 0) {
3299 				KASSERT(msi->msi_handlers == 0,
3300 			    ("MSI has handlers, but vectors not mapped"));
3301 				error = PCIB_MAP_MSI(device_get_parent(dev),
3302 				    child, rman_get_start(irq), &addr, &data,
3303 				    rman_get_cpuid(irq));
3304 				if (error)
3305 					goto bad;
3306 				msi->msi_addr = addr;
3307 				msi->msi_data = data;
3308 				pci_enable_msi(child, addr, data);
3309 			}
3310 			msi->msi_handlers++;
3311 		} else {
3312 			struct msix_vector *mv;
3313 			u_int vector;
3314 
3315 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3316 			    ("No MSI-X or MSI rid %d allocated", rid));
3317 
3318 			mv = pci_find_msix_vector(child, rid);
3319 			KASSERT(mv != NULL,
3320 			    ("MSI-X rid %d is not allocated", rid));
3321 			KASSERT(mv->mv_address == 0,
3322 			    ("MSI-X rid %d has been setup", rid));
3323 
3324 			error = PCIB_MAP_MSI(device_get_parent(dev),
3325 			    child, rman_get_start(irq), &addr, &data,
3326 			    rman_get_cpuid(irq));
3327 			if (error)
3328 				goto bad;
3329 			mv->mv_address = addr;
3330 			mv->mv_data = data;
3331 
3332 			vector = PCI_MSIX_RID2VEC(rid);
3333 			pci_setup_msix_vector(child, vector,
3334 			    mv->mv_address, mv->mv_data);
3335 			pci_unmask_msix_vector(child, vector);
3336 		}
3337 
3338 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3339 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3340 	bad:
3341 		if (error) {
3342 			(void)bus_generic_teardown_intr(dev, child, irq,
3343 			    cookie);
3344 			return (error);
3345 		}
3346 	}
3347 	*cookiep = cookie;
3348 	return (0);
3349 }
3350 
3351 int
3352 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3353     void *cookie)
3354 {
3355 	int rid, error;
3356 
3357 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3358 		return (EINVAL);
3359 
3360 	/* If this isn't a direct child, just bail out */
3361 	if (device_get_parent(child) != dev)
3362 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3363 
3364 	rid = rman_get_rid(irq);
3365 	if (rid == 0) {
3366 		/* Mask INTx */
3367 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3368 	} else {
3369 		struct pci_devinfo *dinfo = device_get_ivars(child);
3370 
3371 		/*
3372 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3373 		 * decrement the appropriate handlers count and mask the
3374 		 * MSI-X message, or disable MSI messages if the count
3375 		 * drops to 0.
3376 		 */
3377 		if (dinfo->cfg.msi.msi_alloc > 0) {
3378 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3379 
3380 			KASSERT(rid <= msi->msi_alloc,
3381 			    ("MSI-X index too high"));
3382 			KASSERT(msi->msi_handlers > 0,
3383 			    ("MSI rid %d is not setup", rid));
3384 
3385 			msi->msi_handlers--;
3386 			if (msi->msi_handlers == 0)
3387 				pci_disable_msi(child);
3388 		} else {
3389 			struct msix_vector *mv;
3390 
3391 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3392 			    ("No MSI or MSI-X rid %d allocated", rid));
3393 
3394 			mv = pci_find_msix_vector(child, rid);
3395 			KASSERT(mv != NULL,
3396 			    ("MSI-X rid %d is not allocated", rid));
3397 			KASSERT(mv->mv_address != 0,
3398 			    ("MSI-X rid %d has not been setup", rid));
3399 
3400 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3401 			mv->mv_address = 0;
3402 			mv->mv_data = 0;
3403 		}
3404 	}
3405 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3406 	if (rid > 0)
3407 		KASSERT(error == 0,
3408 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3409 	return (error);
3410 }
3411 
3412 int
3413 pci_print_child(device_t dev, device_t child)
3414 {
3415 	struct pci_devinfo *dinfo;
3416 	struct resource_list *rl;
3417 	int retval = 0;
3418 
3419 	dinfo = device_get_ivars(child);
3420 	rl = &dinfo->resources;
3421 
3422 	retval += bus_print_child_header(dev, child);
3423 
3424 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3425 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3426 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3427 	if (device_get_flags(dev))
3428 		retval += kprintf(" flags %#x", device_get_flags(dev));
3429 
3430 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3431 	    pci_get_function(child));
3432 
3433 	retval += bus_print_child_footer(dev, child);
3434 
3435 	return (retval);
3436 }
3437 
3438 static struct
3439 {
3440 	int	class;
3441 	int	subclass;
3442 	char	*desc;
3443 } pci_nomatch_tab[] = {
3444 	{PCIC_OLD,		-1,			"old"},
3445 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3446 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3447 	{PCIC_STORAGE,		-1,			"mass storage"},
3448 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3449 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3450 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3451 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3452 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3453 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3454 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3455 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3456 	{PCIC_NETWORK,		-1,			"network"},
3457 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3458 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3459 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3460 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3461 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3462 	{PCIC_DISPLAY,		-1,			"display"},
3463 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3464 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3465 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3466 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3467 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3468 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3469 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3470 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3471 	{PCIC_MEMORY,		-1,			"memory"},
3472 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3473 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3474 	{PCIC_BRIDGE,		-1,			"bridge"},
3475 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3476 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3477 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3478 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3479 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3480 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3481 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3482 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3483 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3484 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3485 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3486 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3487 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3488 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3489 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3490 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3491 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3492 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3493 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3494 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3495 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3496 	{PCIC_INPUTDEV,		-1,			"input device"},
3497 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3498 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3499 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3500 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3501 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3502 	{PCIC_DOCKING,		-1,			"docking station"},
3503 	{PCIC_PROCESSOR,	-1,			"processor"},
3504 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3505 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3506 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3507 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3508 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3509 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3510 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3511 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3512 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3513 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3514 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3515 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3516 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3517 	{PCIC_SATCOM,		-1,			"satellite communication"},
3518 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3519 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3520 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3521 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3522 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3523 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3524 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3525 	{PCIC_DASP,		-1,			"dasp"},
3526 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3527 	{0, 0,		NULL}
3528 };
3529 
3530 void
3531 pci_probe_nomatch(device_t dev, device_t child)
3532 {
3533 	int	i;
3534 	char	*cp, *scp, *device;
3535 
3536 	/*
3537 	 * Look for a listing for this device in a loaded device database.
3538 	 */
3539 	if ((device = pci_describe_device(child)) != NULL) {
3540 		device_printf(dev, "<%s>", device);
3541 		kfree(device, M_DEVBUF);
3542 	} else {
3543 		/*
3544 		 * Scan the class/subclass descriptions for a general
3545 		 * description.
3546 		 */
3547 		cp = "unknown";
3548 		scp = NULL;
3549 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3550 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3551 				if (pci_nomatch_tab[i].subclass == -1) {
3552 					cp = pci_nomatch_tab[i].desc;
3553 				} else if (pci_nomatch_tab[i].subclass ==
3554 				    pci_get_subclass(child)) {
3555 					scp = pci_nomatch_tab[i].desc;
3556 				}
3557 			}
3558 		}
3559 		device_printf(dev, "<%s%s%s>",
3560 		    cp ? cp : "",
3561 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3562 		    scp ? scp : "");
3563 	}
3564 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3565 		pci_get_vendor(child), pci_get_device(child),
3566 		pci_get_slot(child), pci_get_function(child));
3567 	if (pci_get_intpin(child) > 0) {
3568 		int irq;
3569 
3570 		irq = pci_get_irq(child);
3571 		if (PCI_INTERRUPT_VALID(irq))
3572 			kprintf(" irq %d", irq);
3573 	}
3574 	kprintf("\n");
3575 
3576 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3577 }
3578 
3579 /*
3580  * Parse the PCI device database, if loaded, and return a pointer to a
3581  * description of the device.
3582  *
3583  * The database is flat text formatted as follows:
3584  *
3585  * Any line not in a valid format is ignored.
3586  * Lines are terminated with newline '\n' characters.
3587  *
3588  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3589  * the vendor name.
3590  *
3591  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3592  * - devices cannot be listed without a corresponding VENDOR line.
3593  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3594  * another TAB, then the device name.
3595  */
3596 
3597 /*
3598  * Assuming (ptr) points to the beginning of a line in the database,
3599  * return the vendor or device and description of the next entry.
3600  * The value of (vendor) or (device) inappropriate for the entry type
3601  * is set to -1.  Returns nonzero at the end of the database.
3602  *
3603  * Note that this is slightly unrobust in the face of corrupt data;
3604  * we attempt to safeguard against this by spamming the end of the
3605  * database with a newline when we initialise.
3606  */
3607 static int
3608 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3609 {
3610 	char	*cp = *ptr;
3611 	int	left;
3612 
3613 	*device = -1;
3614 	*vendor = -1;
3615 	**desc = '\0';
3616 	for (;;) {
3617 		left = pci_vendordata_size - (cp - pci_vendordata);
3618 		if (left <= 0) {
3619 			*ptr = cp;
3620 			return(1);
3621 		}
3622 
3623 		/* vendor entry? */
3624 		if (*cp != '\t' &&
3625 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3626 			break;
3627 		/* device entry? */
3628 		if (*cp == '\t' &&
3629 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3630 			break;
3631 
3632 		/* skip to next line */
3633 		while (*cp != '\n' && left > 0) {
3634 			cp++;
3635 			left--;
3636 		}
3637 		if (*cp == '\n') {
3638 			cp++;
3639 			left--;
3640 		}
3641 	}
3642 	/* skip to next line */
3643 	while (*cp != '\n' && left > 0) {
3644 		cp++;
3645 		left--;
3646 	}
3647 	if (*cp == '\n' && left > 0)
3648 		cp++;
3649 	*ptr = cp;
3650 	return(0);
3651 }
3652 
3653 static char *
3654 pci_describe_device(device_t dev)
3655 {
3656 	int	vendor, device;
3657 	char	*desc, *vp, *dp, *line;
3658 
3659 	desc = vp = dp = NULL;
3660 
3661 	/*
3662 	 * If we have no vendor data, we can't do anything.
3663 	 */
3664 	if (pci_vendordata == NULL)
3665 		goto out;
3666 
3667 	/*
3668 	 * Scan the vendor data looking for this device
3669 	 */
3670 	line = pci_vendordata;
3671 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3672 		goto out;
3673 	for (;;) {
3674 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3675 			goto out;
3676 		if (vendor == pci_get_vendor(dev))
3677 			break;
3678 	}
3679 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3680 		goto out;
3681 	for (;;) {
3682 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3683 			*dp = 0;
3684 			break;
3685 		}
3686 		if (vendor != -1) {
3687 			*dp = 0;
3688 			break;
3689 		}
3690 		if (device == pci_get_device(dev))
3691 			break;
3692 	}
3693 	if (dp[0] == '\0')
3694 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3695 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3696 	    NULL)
3697 		ksprintf(desc, "%s, %s", vp, dp);
3698  out:
3699 	if (vp != NULL)
3700 		kfree(vp, M_DEVBUF);
3701 	if (dp != NULL)
3702 		kfree(dp, M_DEVBUF);
3703 	return(desc);
3704 }
3705 
3706 int
3707 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3708 {
3709 	struct pci_devinfo *dinfo;
3710 	pcicfgregs *cfg;
3711 
3712 	dinfo = device_get_ivars(child);
3713 	cfg = &dinfo->cfg;
3714 
3715 	switch (which) {
3716 	case PCI_IVAR_ETHADDR:
3717 		/*
3718 		 * The generic accessor doesn't deal with failure, so
3719 		 * we set the return value, then return an error.
3720 		 */
3721 		*((uint8_t **) result) = NULL;
3722 		return (EINVAL);
3723 	case PCI_IVAR_SUBVENDOR:
3724 		*result = cfg->subvendor;
3725 		break;
3726 	case PCI_IVAR_SUBDEVICE:
3727 		*result = cfg->subdevice;
3728 		break;
3729 	case PCI_IVAR_VENDOR:
3730 		*result = cfg->vendor;
3731 		break;
3732 	case PCI_IVAR_DEVICE:
3733 		*result = cfg->device;
3734 		break;
3735 	case PCI_IVAR_DEVID:
3736 		*result = (cfg->device << 16) | cfg->vendor;
3737 		break;
3738 	case PCI_IVAR_CLASS:
3739 		*result = cfg->baseclass;
3740 		break;
3741 	case PCI_IVAR_SUBCLASS:
3742 		*result = cfg->subclass;
3743 		break;
3744 	case PCI_IVAR_PROGIF:
3745 		*result = cfg->progif;
3746 		break;
3747 	case PCI_IVAR_REVID:
3748 		*result = cfg->revid;
3749 		break;
3750 	case PCI_IVAR_INTPIN:
3751 		*result = cfg->intpin;
3752 		break;
3753 	case PCI_IVAR_IRQ:
3754 		*result = cfg->intline;
3755 		break;
3756 	case PCI_IVAR_DOMAIN:
3757 		*result = cfg->domain;
3758 		break;
3759 	case PCI_IVAR_BUS:
3760 		*result = cfg->bus;
3761 		break;
3762 	case PCI_IVAR_SLOT:
3763 		*result = cfg->slot;
3764 		break;
3765 	case PCI_IVAR_FUNCTION:
3766 		*result = cfg->func;
3767 		break;
3768 	case PCI_IVAR_CMDREG:
3769 		*result = cfg->cmdreg;
3770 		break;
3771 	case PCI_IVAR_CACHELNSZ:
3772 		*result = cfg->cachelnsz;
3773 		break;
3774 	case PCI_IVAR_MINGNT:
3775 		*result = cfg->mingnt;
3776 		break;
3777 	case PCI_IVAR_MAXLAT:
3778 		*result = cfg->maxlat;
3779 		break;
3780 	case PCI_IVAR_LATTIMER:
3781 		*result = cfg->lattimer;
3782 		break;
3783 	case PCI_IVAR_PCIXCAP_PTR:
3784 		*result = cfg->pcix.pcix_ptr;
3785 		break;
3786 	case PCI_IVAR_PCIECAP_PTR:
3787 		*result = cfg->expr.expr_ptr;
3788 		break;
3789 	case PCI_IVAR_VPDCAP_PTR:
3790 		*result = cfg->vpd.vpd_reg;
3791 		break;
3792 	default:
3793 		return (ENOENT);
3794 	}
3795 	return (0);
3796 }
3797 
3798 int
3799 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3800 {
3801 	struct pci_devinfo *dinfo;
3802 
3803 	dinfo = device_get_ivars(child);
3804 
3805 	switch (which) {
3806 	case PCI_IVAR_INTPIN:
3807 		dinfo->cfg.intpin = value;
3808 		return (0);
3809 	case PCI_IVAR_ETHADDR:
3810 	case PCI_IVAR_SUBVENDOR:
3811 	case PCI_IVAR_SUBDEVICE:
3812 	case PCI_IVAR_VENDOR:
3813 	case PCI_IVAR_DEVICE:
3814 	case PCI_IVAR_DEVID:
3815 	case PCI_IVAR_CLASS:
3816 	case PCI_IVAR_SUBCLASS:
3817 	case PCI_IVAR_PROGIF:
3818 	case PCI_IVAR_REVID:
3819 	case PCI_IVAR_IRQ:
3820 	case PCI_IVAR_DOMAIN:
3821 	case PCI_IVAR_BUS:
3822 	case PCI_IVAR_SLOT:
3823 	case PCI_IVAR_FUNCTION:
3824 		return (EINVAL);	/* disallow for now */
3825 
3826 	default:
3827 		return (ENOENT);
3828 	}
3829 }
3830 #ifdef notyet
3831 #include "opt_ddb.h"
3832 #ifdef DDB
3833 #include <ddb/ddb.h>
3834 #include <sys/cons.h>
3835 
3836 /*
3837  * List resources based on pci map registers, used for within ddb
3838  */
3839 
3840 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3841 {
3842 	struct pci_devinfo *dinfo;
3843 	struct devlist *devlist_head;
3844 	struct pci_conf *p;
3845 	const char *name;
3846 	int i, error, none_count;
3847 
3848 	none_count = 0;
3849 	/* get the head of the device queue */
3850 	devlist_head = &pci_devq;
3851 
3852 	/*
3853 	 * Go through the list of devices and print out devices
3854 	 */
3855 	for (error = 0, i = 0,
3856 	     dinfo = STAILQ_FIRST(devlist_head);
3857 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3858 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3859 
3860 		/* Populate pd_name and pd_unit */
3861 		name = NULL;
3862 		if (dinfo->cfg.dev)
3863 			name = device_get_name(dinfo->cfg.dev);
3864 
3865 		p = &dinfo->conf;
3866 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3867 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3868 			(name && *name) ? name : "none",
3869 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3870 			none_count++,
3871 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3872 			p->pc_sel.pc_func, (p->pc_class << 16) |
3873 			(p->pc_subclass << 8) | p->pc_progif,
3874 			(p->pc_subdevice << 16) | p->pc_subvendor,
3875 			(p->pc_device << 16) | p->pc_vendor,
3876 			p->pc_revid, p->pc_hdr);
3877 	}
3878 }
3879 #endif /* DDB */
3880 #endif
3881 
3882 static struct resource *
3883 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3884     u_long start, u_long end, u_long count, u_int flags)
3885 {
3886 	struct pci_devinfo *dinfo = device_get_ivars(child);
3887 	struct resource_list *rl = &dinfo->resources;
3888 	struct resource_list_entry *rle;
3889 	struct resource *res;
3890 	pci_addr_t map, testval;
3891 	int mapsize;
3892 
3893 	/*
3894 	 * Weed out the bogons, and figure out how large the BAR/map
3895 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3896 	 * Note: atapci in legacy mode are special and handled elsewhere
3897 	 * in the code.  If you have a atapci device in legacy mode and
3898 	 * it fails here, that other code is broken.
3899 	 */
3900 	res = NULL;
3901 	map = pci_read_config(child, *rid, 4);
3902 	pci_write_config(child, *rid, 0xffffffff, 4);
3903 	testval = pci_read_config(child, *rid, 4);
3904 	if (pci_maprange(testval) == 64)
3905 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3906 	if (pci_mapbase(testval) == 0)
3907 		goto out;
3908 
3909 	/*
3910 	 * Restore the original value of the BAR.  We may have reprogrammed
3911 	 * the BAR of the low-level console device and when booting verbose,
3912 	 * we need the console device addressable.
3913 	 */
3914 	pci_write_config(child, *rid, map, 4);
3915 
3916 	if (PCI_BAR_MEM(testval)) {
3917 		if (type != SYS_RES_MEMORY) {
3918 			if (bootverbose)
3919 				device_printf(dev,
3920 				    "child %s requested type %d for rid %#x,"
3921 				    " but the BAR says it is an memio\n",
3922 				    device_get_nameunit(child), type, *rid);
3923 			goto out;
3924 		}
3925 	} else {
3926 		if (type != SYS_RES_IOPORT) {
3927 			if (bootverbose)
3928 				device_printf(dev,
3929 				    "child %s requested type %d for rid %#x,"
3930 				    " but the BAR says it is an ioport\n",
3931 				    device_get_nameunit(child), type, *rid);
3932 			goto out;
3933 		}
3934 	}
3935 	/*
3936 	 * For real BARs, we need to override the size that
3937 	 * the driver requests, because that's what the BAR
3938 	 * actually uses and we would otherwise have a
3939 	 * situation where we might allocate the excess to
3940 	 * another driver, which won't work.
3941 	 */
3942 	mapsize = pci_mapsize(testval);
3943 	count = 1UL << mapsize;
3944 	if (RF_ALIGNMENT(flags) < mapsize)
3945 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3946 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3947 		flags |= RF_PREFETCHABLE;
3948 
3949 	/*
3950 	 * Allocate enough resource, and then write back the
3951 	 * appropriate bar for that resource.
3952 	 */
3953 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3954 	    start, end, count, flags, -1);
3955 	if (res == NULL) {
3956 		device_printf(child,
3957 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3958 		    count, *rid, type, start, end);
3959 		goto out;
3960 	}
3961 	resource_list_add(rl, type, *rid, start, end, count, -1);
3962 	rle = resource_list_find(rl, type, *rid);
3963 	if (rle == NULL)
3964 		panic("pci_alloc_map: unexpectedly can't find resource.");
3965 	rle->res = res;
3966 	rle->start = rman_get_start(res);
3967 	rle->end = rman_get_end(res);
3968 	rle->count = count;
3969 	if (bootverbose)
3970 		device_printf(child,
3971 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3972 		    count, *rid, type, rman_get_start(res));
3973 	map = rman_get_start(res);
3974 out:;
3975 	pci_write_config(child, *rid, map, 4);
3976 	if (pci_maprange(testval) == 64)
3977 		pci_write_config(child, *rid + 4, map >> 32, 4);
3978 	return (res);
3979 }
3980 
3981 
3982 struct resource *
3983 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3984     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3985 {
3986 	struct pci_devinfo *dinfo = device_get_ivars(child);
3987 	struct resource_list *rl = &dinfo->resources;
3988 	struct resource_list_entry *rle;
3989 	pcicfgregs *cfg = &dinfo->cfg;
3990 
3991 	/*
3992 	 * Perform lazy resource allocation
3993 	 */
3994 	if (device_get_parent(child) == dev) {
3995 		switch (type) {
3996 		case SYS_RES_IRQ:
3997 			/*
3998 			 * Can't alloc legacy interrupt once MSI messages
3999 			 * have been allocated.
4000 			 */
4001 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4002 			    cfg->msix.msix_alloc > 0))
4003 				return (NULL);
4004 			/*
4005 			 * If the child device doesn't have an
4006 			 * interrupt routed and is deserving of an
4007 			 * interrupt, try to assign it one.
4008 			 */
4009 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4010 			    (cfg->intpin != 0))
4011 				pci_assign_interrupt(dev, child, 0);
4012 			break;
4013 		case SYS_RES_IOPORT:
4014 		case SYS_RES_MEMORY:
4015 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4016 				/*
4017 				 * Enable the I/O mode.  We should
4018 				 * also be assigning resources too
4019 				 * when none are present.  The
4020 				 * resource_list_alloc kind of sorta does
4021 				 * this...
4022 				 */
4023 				if (PCI_ENABLE_IO(dev, child, type))
4024 					return (NULL);
4025 			}
4026 			rle = resource_list_find(rl, type, *rid);
4027 			if (rle == NULL)
4028 				return (pci_alloc_map(dev, child, type, rid,
4029 				    start, end, count, flags));
4030 			break;
4031 		}
4032 		/*
4033 		 * If we've already allocated the resource, then
4034 		 * return it now.  But first we may need to activate
4035 		 * it, since we don't allocate the resource as active
4036 		 * above.  Normally this would be done down in the
4037 		 * nexus, but since we short-circuit that path we have
4038 		 * to do its job here.  Not sure if we should kfree the
4039 		 * resource if it fails to activate.
4040 		 */
4041 		rle = resource_list_find(rl, type, *rid);
4042 		if (rle != NULL && rle->res != NULL) {
4043 			if (bootverbose)
4044 				device_printf(child,
4045 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4046 				    rman_get_size(rle->res), *rid, type,
4047 				    rman_get_start(rle->res));
4048 			if ((flags & RF_ACTIVE) &&
4049 			    bus_generic_activate_resource(dev, child, type,
4050 			    *rid, rle->res) != 0)
4051 				return (NULL);
4052 			return (rle->res);
4053 		}
4054 	}
4055 	return (resource_list_alloc(rl, dev, child, type, rid,
4056 	    start, end, count, flags, cpuid));
4057 }
4058 
4059 void
4060 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4061 {
4062 	struct pci_devinfo *dinfo;
4063 	struct resource_list *rl;
4064 	struct resource_list_entry *rle;
4065 
4066 	if (device_get_parent(child) != dev)
4067 		return;
4068 
4069 	dinfo = device_get_ivars(child);
4070 	rl = &dinfo->resources;
4071 	rle = resource_list_find(rl, type, rid);
4072 	if (rle) {
4073 		if (rle->res) {
4074 			if (rman_get_device(rle->res) != dev ||
4075 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4076 				device_printf(dev, "delete_resource: "
4077 				    "Resource still owned by child, oops. "
4078 				    "(type=%d, rid=%d, addr=%lx)\n",
4079 				    rle->type, rle->rid,
4080 				    rman_get_start(rle->res));
4081 				return;
4082 			}
4083 			bus_release_resource(dev, type, rid, rle->res);
4084 		}
4085 		resource_list_delete(rl, type, rid);
4086 	}
4087 	/*
4088 	 * Why do we turn off the PCI configuration BAR when we delete a
4089 	 * resource? -- imp
4090 	 */
4091 	pci_write_config(child, rid, 0, 4);
4092 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4093 }
4094 
4095 struct resource_list *
4096 pci_get_resource_list (device_t dev, device_t child)
4097 {
4098 	struct pci_devinfo *dinfo = device_get_ivars(child);
4099 
4100 	if (dinfo == NULL)
4101 		return (NULL);
4102 
4103 	return (&dinfo->resources);
4104 }
4105 
4106 uint32_t
4107 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4108 {
4109 	struct pci_devinfo *dinfo = device_get_ivars(child);
4110 	pcicfgregs *cfg = &dinfo->cfg;
4111 
4112 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4113 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4114 }
4115 
4116 void
4117 pci_write_config_method(device_t dev, device_t child, int reg,
4118     uint32_t val, int width)
4119 {
4120 	struct pci_devinfo *dinfo = device_get_ivars(child);
4121 	pcicfgregs *cfg = &dinfo->cfg;
4122 
4123 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4124 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4125 }
4126 
4127 int
4128 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4129     size_t buflen)
4130 {
4131 
4132 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4133 	    pci_get_function(child));
4134 	return (0);
4135 }
4136 
4137 int
4138 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4139     size_t buflen)
4140 {
4141 	struct pci_devinfo *dinfo;
4142 	pcicfgregs *cfg;
4143 
4144 	dinfo = device_get_ivars(child);
4145 	cfg = &dinfo->cfg;
4146 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4147 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4148 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4149 	    cfg->progif);
4150 	return (0);
4151 }
4152 
4153 int
4154 pci_assign_interrupt_method(device_t dev, device_t child)
4155 {
4156 	struct pci_devinfo *dinfo = device_get_ivars(child);
4157 	pcicfgregs *cfg = &dinfo->cfg;
4158 
4159 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4160 	    cfg->intpin));
4161 }
4162 
4163 static int
4164 pci_modevent(module_t mod, int what, void *arg)
4165 {
4166 	static struct cdev *pci_cdev;
4167 
4168 	switch (what) {
4169 	case MOD_LOAD:
4170 		STAILQ_INIT(&pci_devq);
4171 		pci_generation = 0;
4172 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4173 				    "pci");
4174 		pci_load_vendor_data();
4175 		break;
4176 
4177 	case MOD_UNLOAD:
4178 		destroy_dev(pci_cdev);
4179 		break;
4180 	}
4181 
4182 	return (0);
4183 }
4184 
4185 void
4186 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4187 {
4188 	int i;
4189 
4190 	/*
4191 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4192 	 * which we know need special treatment.  Type 2 devices are
4193 	 * cardbus bridges which also require special treatment.
4194 	 * Other types are unknown, and we err on the side of safety
4195 	 * by ignoring them.
4196 	 */
4197 	if (dinfo->cfg.hdrtype != 0)
4198 		return;
4199 
4200 	/*
4201 	 * Restore the device to full power mode.  We must do this
4202 	 * before we restore the registers because moving from D3 to
4203 	 * D0 will cause the chip's BARs and some other registers to
4204 	 * be reset to some unknown power on reset values.  Cut down
4205 	 * the noise on boot by doing nothing if we are already in
4206 	 * state D0.
4207 	 */
4208 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4209 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4210 	}
4211 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4212 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4213 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4214 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4215 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4216 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4217 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4218 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4219 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4220 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4221 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4222 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4223 
4224 	/* Restore MSI and MSI-X configurations if they are present. */
4225 	if (dinfo->cfg.msi.msi_location != 0)
4226 		pci_resume_msi(dev);
4227 	if (dinfo->cfg.msix.msix_location != 0)
4228 		pci_resume_msix(dev);
4229 }
4230 
4231 void
4232 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4233 {
4234 	int i;
4235 	uint32_t cls;
4236 	int ps;
4237 
4238 	/*
4239 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4240 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4241 	 * which also require special treatment.  Other types are unknown, and
4242 	 * we err on the side of safety by ignoring them.  Powering down
4243 	 * bridges should not be undertaken lightly.
4244 	 */
4245 	if (dinfo->cfg.hdrtype != 0)
4246 		return;
4247 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4248 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4249 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4250 
4251 	/*
4252 	 * Some drivers apparently write to these registers w/o updating our
4253 	 * cached copy.  No harm happens if we update the copy, so do so here
4254 	 * so we can restore them.  The COMMAND register is modified by the
4255 	 * bus w/o updating the cache.  This should represent the normally
4256 	 * writable portion of the 'defined' part of type 0 headers.  In
4257 	 * theory we also need to save/restore the PCI capability structures
4258 	 * we know about, but apart from power we don't know any that are
4259 	 * writable.
4260 	 */
4261 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4262 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4263 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4264 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4265 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4266 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4267 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4268 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4269 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4270 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4271 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4272 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4273 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4274 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4275 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4276 
4277 	/*
4278 	 * don't set the state for display devices, base peripherals and
4279 	 * memory devices since bad things happen when they are powered down.
4280 	 * We should (a) have drivers that can easily detach and (b) use
4281 	 * generic drivers for these devices so that some device actually
4282 	 * attaches.  We need to make sure that when we implement (a) we don't
4283 	 * power the device down on a reattach.
4284 	 */
4285 	cls = pci_get_class(dev);
4286 	if (!setstate)
4287 		return;
4288 	switch (pci_do_power_nodriver)
4289 	{
4290 		case 0:		/* NO powerdown at all */
4291 			return;
4292 		case 1:		/* Conservative about what to power down */
4293 			if (cls == PCIC_STORAGE)
4294 				return;
4295 			/*FALLTHROUGH*/
4296 		case 2:		/* Agressive about what to power down */
4297 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4298 			    cls == PCIC_BASEPERIPH)
4299 				return;
4300 			/*FALLTHROUGH*/
4301 		case 3:		/* Power down everything */
4302 			break;
4303 	}
4304 	/*
4305 	 * PCI spec says we can only go into D3 state from D0 state.
4306 	 * Transition from D[12] into D0 before going to D3 state.
4307 	 */
4308 	ps = pci_get_powerstate(dev);
4309 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4310 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4311 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4312 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4313 }
4314 
4315 #ifdef COMPAT_OLDPCI
4316 
4317 /*
4318  * Locate the parent of a PCI device by scanning the PCI devlist
4319  * and return the entry for the parent.
4320  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4321  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4322  */
4323 pcicfgregs *
4324 pci_devlist_get_parent(pcicfgregs *cfg)
4325 {
4326 	struct devlist *devlist_head;
4327 	struct pci_devinfo *dinfo;
4328 	pcicfgregs *bridge_cfg;
4329 	int i;
4330 
4331 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4332 
4333 	/* If the device is on PCI bus 0, look for the host */
4334 	if (cfg->bus == 0) {
4335 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4336 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4337 			bridge_cfg = &dinfo->cfg;
4338 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4339 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4340 		    		&& bridge_cfg->bus == cfg->bus) {
4341 				return bridge_cfg;
4342 			}
4343 		}
4344 	}
4345 
4346 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4347 	if (cfg->bus > 0) {
4348 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4349 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4350 			bridge_cfg = &dinfo->cfg;
4351 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4352 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4353 				&& bridge_cfg->secondarybus == cfg->bus) {
4354 				return bridge_cfg;
4355 			}
4356 		}
4357 	}
4358 
4359 	return NULL;
4360 }
4361 
4362 #endif	/* COMPAT_OLDPCI */
4363 
4364 int
4365 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4366 {
4367 	int rid, type;
4368 	u_int flags;
4369 
4370 	rid = 0;
4371 	type = PCI_INTR_TYPE_LEGACY;
4372 	flags = RF_SHAREABLE | RF_ACTIVE;
4373 
4374 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4375 	if (msi_enable) {
4376 		int cpu;
4377 
4378 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4379 		if (cpu >= ncpus)
4380 			cpu = ncpus - 1;
4381 
4382 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4383 			flags &= ~RF_SHAREABLE;
4384 			type = PCI_INTR_TYPE_MSI;
4385 		}
4386 	}
4387 
4388 	*rid0 = rid;
4389 	*flags0 = flags;
4390 
4391 	return type;
4392 }
4393 
4394 /* Wrapper APIs suitable for device driver use. */
4395 void
4396 pci_save_state(device_t dev)
4397 {
4398 	struct pci_devinfo *dinfo;
4399 
4400 	dinfo = device_get_ivars(dev);
4401 	pci_cfg_save(dev, dinfo, 0);
4402 }
4403 
4404 void
4405 pci_restore_state(device_t dev)
4406 {
4407 	struct pci_devinfo *dinfo;
4408 
4409 	dinfo = device_get_ivars(dev);
4410 	pci_cfg_restore(dev, dinfo);
4411 }
4412