xref: /dragonfly/sys/bus/pci/pci.c (revision 38b930d0)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 #include "use_usb4bsd.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 #include <sys/machintr.h>
47 
48 #include <machine/msi_machdep.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 
54 #include <sys/bus.h>
55 #include <sys/rman.h>
56 #include <sys/device.h>
57 
58 #include <sys/pciio.h>
59 #include <bus/pci/pcireg.h>
60 #include <bus/pci/pcivar.h>
61 #include <bus/pci/pci_private.h>
62 
63 #if NUSB4BSD > 0
64 #include <bus/u4b/controller/xhcireg.h>
65 #include <bus/u4b/controller/ehcireg.h>
66 #include <bus/u4b/controller/ohcireg.h>
67 #include <bus/u4b/controller/uhcireg.h>
68 #endif
69 
70 #include "pcib_if.h"
71 #include "pci_if.h"
72 
73 #ifdef __HAVE_ACPI
74 #include <contrib/dev/acpica/acpi.h>
75 #include "acpi_if.h"
76 #else
77 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
78 #endif
79 
80 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
81 
82 static uint32_t		pci_mapbase(unsigned mapreg);
83 static const char	*pci_maptype(unsigned mapreg);
84 static int		pci_mapsize(unsigned testval);
85 static int		pci_maprange(unsigned mapreg);
86 static void		pci_fixancient(pcicfgregs *cfg);
87 
88 static int		pci_porten(device_t pcib, int b, int s, int f);
89 static int		pci_memen(device_t pcib, int b, int s, int f);
90 static void		pci_assign_interrupt(device_t bus, device_t dev,
91 			    int force_route);
92 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
93 			    int b, int s, int f, int reg,
94 			    struct resource_list *rl, int force, int prefetch);
95 static int		pci_probe(device_t dev);
96 static int		pci_attach(device_t dev);
97 static void		pci_child_detached(device_t, device_t);
98 static void		pci_load_vendor_data(void);
99 static int		pci_describe_parse_line(char **ptr, int *vendor,
100 			    int *device, char **desc);
101 static char		*pci_describe_device(device_t dev);
102 static int		pci_modevent(module_t mod, int what, void *arg);
103 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
104 			    pcicfgregs *cfg);
105 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
106 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
107 			    int reg, uint32_t *data);
108 #if 0
109 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
110 			    int reg, uint32_t data);
111 #endif
112 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
113 static void		pci_disable_msi(device_t dev);
114 static void		pci_enable_msi(device_t dev, uint64_t address,
115 			    uint16_t data);
116 static void		pci_setup_msix_vector(device_t dev, u_int index,
117 			    uint64_t address, uint32_t data);
118 static void		pci_mask_msix_vector(device_t dev, u_int index);
119 static void		pci_unmask_msix_vector(device_t dev, u_int index);
120 static void		pci_mask_msix_allvectors(device_t dev);
121 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
122 static int		pci_msi_blacklisted(void);
123 static void		pci_resume_msi(device_t dev);
124 static void		pci_resume_msix(device_t dev);
125 static int		pcie_slotimpl(const pcicfgregs *);
126 static void		pci_print_verbose_expr(const pcicfgregs *);
127 
128 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
131 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
132 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
133 static void		pci_read_cap_subvendor(device_t, int, int,
134 			    pcicfgregs *);
135 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
136 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
137 
138 static device_method_t pci_methods[] = {
139 	/* Device interface */
140 	DEVMETHOD(device_probe,		pci_probe),
141 	DEVMETHOD(device_attach,	pci_attach),
142 	DEVMETHOD(device_detach,	bus_generic_detach),
143 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
144 	DEVMETHOD(device_suspend,	pci_suspend),
145 	DEVMETHOD(device_resume,	pci_resume),
146 
147 	/* Bus interface */
148 	DEVMETHOD(bus_print_child,	pci_print_child),
149 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
150 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
151 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
152 	DEVMETHOD(bus_driver_added,	pci_driver_added),
153 	DEVMETHOD(bus_child_detached,	pci_child_detached),
154 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
155 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
156 
157 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
158 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
159 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
160 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
161 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
162 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
163 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
164 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
165 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
166 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
167 
168 	/* PCI interface */
169 	DEVMETHOD(pci_read_config,	pci_read_config_method),
170 	DEVMETHOD(pci_write_config,	pci_write_config_method),
171 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
172 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
173 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
174 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
175 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
176 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
177 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
178 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
179 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
180 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
181 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
182 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
183 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
184 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
185 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
186 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
187 
188 	DEVMETHOD_END
189 };
190 
191 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
192 
193 static devclass_t pci_devclass;
194 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
195 MODULE_VERSION(pci, 1);
196 
197 static char	*pci_vendordata;
198 static size_t	pci_vendordata_size;
199 
200 
201 static const struct pci_read_cap {
202 	int		cap;
203 	pci_read_cap_t	read_cap;
204 } pci_read_caps[] = {
205 	{ PCIY_PMG,		pci_read_cap_pmgt },
206 	{ PCIY_HT,		pci_read_cap_ht },
207 	{ PCIY_MSI,		pci_read_cap_msi },
208 	{ PCIY_MSIX,		pci_read_cap_msix },
209 	{ PCIY_VPD,		pci_read_cap_vpd },
210 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
211 	{ PCIY_PCIX,		pci_read_cap_pcix },
212 	{ PCIY_EXPRESS,		pci_read_cap_express },
213 	{ 0, NULL } /* required last entry */
214 };
215 
216 struct pci_quirk {
217 	uint32_t devid;	/* Vendor/device of the card */
218 	int	type;
219 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
220 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
221 	int	arg1;
222 	int	arg2;
223 };
224 
225 struct pci_quirk pci_quirks[] = {
226 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
227 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
229 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
230 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
231 
232 	/*
233 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
234 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
235 	 */
236 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 
239 	/*
240 	 * MSI doesn't work on earlier Intel chipsets including
241 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
242 	 */
243 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
248 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	/*
252 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
253 	 * bridge.
254 	 */
255 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
256 
257 	{ 0 }
258 };
259 
260 /* map register information */
261 #define	PCI_MAPMEM	0x01	/* memory map */
262 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
263 #define	PCI_MAPPORT	0x04	/* port map */
264 
265 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
266 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
267 
268 struct devlist pci_devq;
269 uint32_t pci_generation;
270 uint32_t pci_numdevs = 0;
271 static int pcie_chipset, pcix_chipset;
272 
273 /* sysctl vars */
274 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
275 
276 static int pci_enable_io_modes = 1;
277 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
278 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
279     &pci_enable_io_modes, 1,
280     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
281 enable these bits correctly.  We'd like to do this all the time, but there\n\
282 are some peripherals that this causes problems with.");
283 
284 static int pci_do_power_nodriver = 0;
285 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
286 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
287     &pci_do_power_nodriver, 0,
288   "Place a function into D3 state when no driver attaches to it.  0 means\n\
289 disable.  1 means conservatively place devices into D3 state.  2 means\n\
290 aggressively place devices into D3 state.  3 means put absolutely everything\n\
291 in D3 state.");
292 
293 static int pci_do_power_resume = 1;
294 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
295 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
296     &pci_do_power_resume, 1,
297   "Transition from D3 -> D0 on resume.");
298 
299 static int pci_do_msi = 1;
300 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
301 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
302     "Enable support for MSI interrupts");
303 
304 static int pci_do_msix = 1;
305 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
306 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
307     "Enable support for MSI-X interrupts");
308 
309 static int pci_honor_msi_blacklist = 1;
310 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
311 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
312     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
313 
314 #if NUSB4BSD > 0
315 #if defined(__i386__) || defined(__x86_64__)
316 static int pci_usb_takeover = 1;
317 #else
318 static int pci_usb_takeover = 0;
319 #endif
320 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
321 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
322     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
323 Disable this if you depend on BIOS emulation of USB devices, that is\n\
324 you use USB devices (like keyboard or mouse) but do not load USB drivers");
325 #endif
326 
327 static int pci_msi_cpuid;
328 
329 /* Find a device_t by bus/slot/function in domain 0 */
330 
331 device_t
332 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
333 {
334 
335 	return (pci_find_dbsf(0, bus, slot, func));
336 }
337 
338 /* Find a device_t by domain/bus/slot/function */
339 
340 device_t
341 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.domain == domain) &&
347 		    (dinfo->cfg.bus == bus) &&
348 		    (dinfo->cfg.slot == slot) &&
349 		    (dinfo->cfg.func == func)) {
350 			return (dinfo->cfg.dev);
351 		}
352 	}
353 
354 	return (NULL);
355 }
356 
357 /* Find a device_t by vendor/device ID */
358 
359 device_t
360 pci_find_device(uint16_t vendor, uint16_t device)
361 {
362 	struct pci_devinfo *dinfo;
363 
364 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
365 		if ((dinfo->cfg.vendor == vendor) &&
366 		    (dinfo->cfg.device == device)) {
367 			return (dinfo->cfg.dev);
368 		}
369 	}
370 
371 	return (NULL);
372 }
373 
374 device_t
375 pci_find_class(uint8_t class, uint8_t subclass)
376 {
377 	struct pci_devinfo *dinfo;
378 
379 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
380 		if (dinfo->cfg.baseclass == class &&
381 		    dinfo->cfg.subclass == subclass) {
382 			return (dinfo->cfg.dev);
383 		}
384 	}
385 
386 	return (NULL);
387 }
388 
389 /* return base address of memory or port map */
390 
391 static uint32_t
392 pci_mapbase(uint32_t mapreg)
393 {
394 
395 	if (PCI_BAR_MEM(mapreg))
396 		return (mapreg & PCIM_BAR_MEM_BASE);
397 	else
398 		return (mapreg & PCIM_BAR_IO_BASE);
399 }
400 
401 /* return map type of memory or port map */
402 
403 static const char *
404 pci_maptype(unsigned mapreg)
405 {
406 
407 	if (PCI_BAR_IO(mapreg))
408 		return ("I/O Port");
409 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
410 		return ("Prefetchable Memory");
411 	return ("Memory");
412 }
413 
414 /* return log2 of map size decoded for memory or port map */
415 
416 static int
417 pci_mapsize(uint32_t testval)
418 {
419 	int ln2size;
420 
421 	testval = pci_mapbase(testval);
422 	ln2size = 0;
423 	if (testval != 0) {
424 		while ((testval & 1) == 0)
425 		{
426 			ln2size++;
427 			testval >>= 1;
428 		}
429 	}
430 	return (ln2size);
431 }
432 
433 /* return log2 of address range supported by map register */
434 
435 static int
436 pci_maprange(unsigned mapreg)
437 {
438 	int ln2range = 0;
439 
440 	if (PCI_BAR_IO(mapreg))
441 		ln2range = 32;
442 	else
443 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
444 		case PCIM_BAR_MEM_32:
445 			ln2range = 32;
446 			break;
447 		case PCIM_BAR_MEM_1MB:
448 			ln2range = 20;
449 			break;
450 		case PCIM_BAR_MEM_64:
451 			ln2range = 64;
452 			break;
453 		}
454 	return (ln2range);
455 }
456 
457 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
458 
459 static void
460 pci_fixancient(pcicfgregs *cfg)
461 {
462 	if (cfg->hdrtype != 0)
463 		return;
464 
465 	/* PCI to PCI bridges use header type 1 */
466 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
467 		cfg->hdrtype = 1;
468 }
469 
470 /* extract header type specific config data */
471 
472 static void
473 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
474 {
475 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
476 	switch (cfg->hdrtype) {
477 	case 0:
478 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
479 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
480 		cfg->nummaps	    = PCI_MAXMAPS_0;
481 		break;
482 	case 1:
483 		cfg->nummaps	    = PCI_MAXMAPS_1;
484 #ifdef COMPAT_OLDPCI
485 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
486 #endif
487 		break;
488 	case 2:
489 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
490 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
491 		cfg->nummaps	    = PCI_MAXMAPS_2;
492 #ifdef COMPAT_OLDPCI
493 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
494 #endif
495 		break;
496 	}
497 #undef REG
498 }
499 
500 /* read configuration header into pcicfgregs structure */
501 struct pci_devinfo *
502 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
503 {
504 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
505 	pcicfgregs *cfg = NULL;
506 	struct pci_devinfo *devlist_entry;
507 	struct devlist *devlist_head;
508 
509 	devlist_head = &pci_devq;
510 
511 	devlist_entry = NULL;
512 
513 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
514 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
515 
516 		cfg = &devlist_entry->cfg;
517 
518 		cfg->domain		= d;
519 		cfg->bus		= b;
520 		cfg->slot		= s;
521 		cfg->func		= f;
522 		cfg->vendor		= REG(PCIR_VENDOR, 2);
523 		cfg->device		= REG(PCIR_DEVICE, 2);
524 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
525 		cfg->statreg		= REG(PCIR_STATUS, 2);
526 		cfg->baseclass		= REG(PCIR_CLASS, 1);
527 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
528 		cfg->progif		= REG(PCIR_PROGIF, 1);
529 		cfg->revid		= REG(PCIR_REVID, 1);
530 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
531 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
532 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
533 		cfg->intpin		= REG(PCIR_INTPIN, 1);
534 		cfg->intline		= REG(PCIR_INTLINE, 1);
535 
536 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
537 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
538 
539 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
540 		cfg->hdrtype		&= ~PCIM_MFDEV;
541 
542 		pci_fixancient(cfg);
543 		pci_hdrtypedata(pcib, b, s, f, cfg);
544 
545 		pci_read_capabilities(pcib, cfg);
546 
547 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
548 
549 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
550 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
551 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
552 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
553 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
554 
555 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
556 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
557 		devlist_entry->conf.pc_vendor = cfg->vendor;
558 		devlist_entry->conf.pc_device = cfg->device;
559 
560 		devlist_entry->conf.pc_class = cfg->baseclass;
561 		devlist_entry->conf.pc_subclass = cfg->subclass;
562 		devlist_entry->conf.pc_progif = cfg->progif;
563 		devlist_entry->conf.pc_revid = cfg->revid;
564 
565 		pci_numdevs++;
566 		pci_generation++;
567 	}
568 	return (devlist_entry);
569 #undef REG
570 }
571 
572 static int
573 pci_fixup_nextptr(int *nextptr0)
574 {
575 	int nextptr = *nextptr0;
576 
577 	/* "Next pointer" is only one byte */
578 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
579 
580 	if (nextptr & 0x3) {
581 		/*
582 		 * PCI local bus spec 3.0:
583 		 *
584 		 * "... The bottom two bits of all pointers are reserved
585 		 *  and must be implemented as 00b although software must
586 		 *  mask them to allow for future uses of these bits ..."
587 		 */
588 		if (bootverbose) {
589 			kprintf("Illegal PCI extended capability "
590 				"offset, fixup 0x%02x -> 0x%02x\n",
591 				nextptr, nextptr & ~0x3);
592 		}
593 		nextptr &= ~0x3;
594 	}
595 	*nextptr0 = nextptr;
596 
597 	if (nextptr < 0x40) {
598 		if (nextptr != 0) {
599 			kprintf("Illegal PCI extended capability "
600 				"offset 0x%02x", nextptr);
601 		}
602 		return 0;
603 	}
604 	return 1;
605 }
606 
607 static void
608 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
609 {
610 #define REG(n, w)	\
611 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612 
613 	struct pcicfg_pp *pp = &cfg->pp;
614 
615 	if (pp->pp_cap)
616 		return;
617 
618 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
619 	pp->pp_status = ptr + PCIR_POWER_STATUS;
620 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
621 
622 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
623 		/*
624 		 * XXX
625 		 * We should write to data_select and read back from
626 		 * data_scale to determine whether data register is
627 		 * implemented.
628 		 */
629 #ifdef foo
630 		pp->pp_data = ptr + PCIR_POWER_DATA;
631 #else
632 		pp->pp_data = 0;
633 #endif
634 	}
635 
636 #undef REG
637 }
638 
639 static void
640 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
641 {
642 #if defined(__i386__) || defined(__x86_64__)
643 
644 #define REG(n, w)	\
645 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
646 
647 	struct pcicfg_ht *ht = &cfg->ht;
648 	uint64_t addr;
649 	uint32_t val;
650 
651 	/* Determine HT-specific capability type. */
652 	val = REG(ptr + PCIR_HT_COMMAND, 2);
653 
654 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
655 		cfg->ht.ht_slave = ptr;
656 
657 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
658 		return;
659 
660 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
661 		/* Sanity check the mapping window. */
662 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
663 		addr <<= 32;
664 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
665 		if (addr != MSI_X86_ADDR_BASE) {
666 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
667 				"has non-default MSI window 0x%llx\n",
668 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
669 				(long long)addr);
670 		}
671 	} else {
672 		addr = MSI_X86_ADDR_BASE;
673 	}
674 
675 	ht->ht_msimap = ptr;
676 	ht->ht_msictrl = val;
677 	ht->ht_msiaddr = addr;
678 
679 #undef REG
680 
681 #endif	/* __i386__ || __x86_64__ */
682 }
683 
684 static void
685 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
686 {
687 #define REG(n, w)	\
688 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
689 
690 	struct pcicfg_msi *msi = &cfg->msi;
691 
692 	msi->msi_location = ptr;
693 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
694 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
695 
696 #undef REG
697 }
698 
699 static void
700 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
701 {
702 #define REG(n, w)	\
703 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
704 
705 	struct pcicfg_msix *msix = &cfg->msix;
706 	uint32_t val;
707 
708 	msix->msix_location = ptr;
709 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
710 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
711 
712 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
713 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
714 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
715 
716 	val = REG(ptr + PCIR_MSIX_PBA, 4);
717 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
718 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
719 
720 	TAILQ_INIT(&msix->msix_vectors);
721 
722 #undef REG
723 }
724 
725 static void
726 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
727 {
728 	cfg->vpd.vpd_reg = ptr;
729 }
730 
731 static void
732 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
733 {
734 #define REG(n, w)	\
735 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
736 
737 	/* Should always be true. */
738 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
739 		uint32_t val;
740 
741 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
742 		cfg->subvendor = val & 0xffff;
743 		cfg->subdevice = val >> 16;
744 	}
745 
746 #undef REG
747 }
748 
749 static void
750 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
751 {
752 	/*
753 	 * Assume we have a PCI-X chipset if we have
754 	 * at least one PCI-PCI bridge with a PCI-X
755 	 * capability.  Note that some systems with
756 	 * PCI-express or HT chipsets might match on
757 	 * this check as well.
758 	 */
759 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
760 		pcix_chipset = 1;
761 
762 	cfg->pcix.pcix_ptr = ptr;
763 }
764 
765 static int
766 pcie_slotimpl(const pcicfgregs *cfg)
767 {
768 	const struct pcicfg_expr *expr = &cfg->expr;
769 	uint16_t port_type;
770 
771 	/*
772 	 * - Slot implemented bit is meaningful iff current port is
773 	 *   root port or down stream port.
774 	 * - Testing for root port or down stream port is meanningful
775 	 *   iff PCI configure has type 1 header.
776 	 */
777 
778 	if (cfg->hdrtype != 1)
779 		return 0;
780 
781 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
782 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
783 		return 0;
784 
785 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
786 		return 0;
787 
788 	return 1;
789 }
790 
791 static void
792 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
793 {
794 #define REG(n, w)	\
795 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
796 
797 	struct pcicfg_expr *expr = &cfg->expr;
798 
799 	/*
800 	 * Assume we have a PCI-express chipset if we have
801 	 * at least one PCI-express device.
802 	 */
803 	pcie_chipset = 1;
804 
805 	expr->expr_ptr = ptr;
806 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
807 
808 	/*
809 	 * Read slot capabilities.  Slot capabilities exists iff
810 	 * current port's slot is implemented
811 	 */
812 	if (pcie_slotimpl(cfg))
813 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
814 
815 #undef REG
816 }
817 
818 static void
819 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
820 {
821 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
822 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
823 
824 	uint32_t val;
825 	int nextptr, ptrptr;
826 
827 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
828 		/* No capabilities */
829 		return;
830 	}
831 
832 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
833 	case 0:
834 	case 1:
835 		ptrptr = PCIR_CAP_PTR;
836 		break;
837 	case 2:
838 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
839 		break;
840 	default:
841 		return;				/* no capabilities support */
842 	}
843 	nextptr = REG(ptrptr, 1);	/* sanity check? */
844 
845 	/*
846 	 * Read capability entries.
847 	 */
848 	while (pci_fixup_nextptr(&nextptr)) {
849 		const struct pci_read_cap *rc;
850 		int ptr = nextptr;
851 
852 		/* Find the next entry */
853 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
854 
855 		/* Process this entry */
856 		val = REG(ptr + PCICAP_ID, 1);
857 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
858 			if (rc->cap == val) {
859 				rc->read_cap(pcib, ptr, nextptr, cfg);
860 				break;
861 			}
862 		}
863 	}
864 
865 #if defined(__i386__) || defined(__x86_64__)
866 	/*
867 	 * Enable the MSI mapping window for all HyperTransport
868 	 * slaves.  PCI-PCI bridges have their windows enabled via
869 	 * PCIB_MAP_MSI().
870 	 */
871 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
872 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
873 		device_printf(pcib,
874 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
875 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
876 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
877 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
878 		     2);
879 	}
880 #endif
881 
882 /* REG and WREG use carry through to next functions */
883 }
884 
885 /*
886  * PCI Vital Product Data
887  */
888 
889 #define	PCI_VPD_TIMEOUT		1000000
890 
891 static int
892 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
893 {
894 	int count = PCI_VPD_TIMEOUT;
895 
896 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
897 
898 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
899 
900 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
901 		if (--count < 0)
902 			return (ENXIO);
903 		DELAY(1);	/* limit looping */
904 	}
905 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
906 
907 	return (0);
908 }
909 
910 #if 0
911 static int
912 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
913 {
914 	int count = PCI_VPD_TIMEOUT;
915 
916 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
917 
918 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
919 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
920 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
921 		if (--count < 0)
922 			return (ENXIO);
923 		DELAY(1);	/* limit looping */
924 	}
925 
926 	return (0);
927 }
928 #endif
929 
930 #undef PCI_VPD_TIMEOUT
931 
932 struct vpd_readstate {
933 	device_t	pcib;
934 	pcicfgregs	*cfg;
935 	uint32_t	val;
936 	int		bytesinval;
937 	int		off;
938 	uint8_t		cksum;
939 };
940 
941 static int
942 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
943 {
944 	uint32_t reg;
945 	uint8_t byte;
946 
947 	if (vrs->bytesinval == 0) {
948 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
949 			return (ENXIO);
950 		vrs->val = le32toh(reg);
951 		vrs->off += 4;
952 		byte = vrs->val & 0xff;
953 		vrs->bytesinval = 3;
954 	} else {
955 		vrs->val = vrs->val >> 8;
956 		byte = vrs->val & 0xff;
957 		vrs->bytesinval--;
958 	}
959 
960 	vrs->cksum += byte;
961 	*data = byte;
962 	return (0);
963 }
964 
965 int
966 pcie_slot_implemented(device_t dev)
967 {
968 	struct pci_devinfo *dinfo = device_get_ivars(dev);
969 
970 	return pcie_slotimpl(&dinfo->cfg);
971 }
972 
973 void
974 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
975 {
976 	uint8_t expr_ptr;
977 	uint16_t val;
978 
979 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
980 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
981 		panic("%s: invalid max read request size 0x%02x",
982 		      device_get_nameunit(dev), rqsize);
983 	}
984 
985 	expr_ptr = pci_get_pciecap_ptr(dev);
986 	if (!expr_ptr)
987 		panic("%s: not PCIe device", device_get_nameunit(dev));
988 
989 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
990 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
991 		if (bootverbose)
992 			device_printf(dev, "adjust device control 0x%04x", val);
993 
994 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
995 		val |= rqsize;
996 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
997 
998 		if (bootverbose)
999 			kprintf(" -> 0x%04x\n", val);
1000 	}
1001 }
1002 
1003 uint16_t
1004 pcie_get_max_readrq(device_t dev)
1005 {
1006 	uint8_t expr_ptr;
1007 	uint16_t val;
1008 
1009 	expr_ptr = pci_get_pciecap_ptr(dev);
1010 	if (!expr_ptr)
1011 		panic("%s: not PCIe device", device_get_nameunit(dev));
1012 
1013 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1014 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1015 }
1016 
1017 static void
1018 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1019 {
1020 	struct vpd_readstate vrs;
1021 	int state;
1022 	int name;
1023 	int remain;
1024 	int i;
1025 	int alloc, off;		/* alloc/off for RO/W arrays */
1026 	int cksumvalid;
1027 	int dflen;
1028 	uint8_t byte;
1029 	uint8_t byte2;
1030 
1031 	/* init vpd reader */
1032 	vrs.bytesinval = 0;
1033 	vrs.off = 0;
1034 	vrs.pcib = pcib;
1035 	vrs.cfg = cfg;
1036 	vrs.cksum = 0;
1037 
1038 	state = 0;
1039 	name = remain = i = 0;	/* shut up stupid gcc */
1040 	alloc = off = 0;	/* shut up stupid gcc */
1041 	dflen = 0;		/* shut up stupid gcc */
1042 	cksumvalid = -1;
1043 	while (state >= 0) {
1044 		if (vpd_nextbyte(&vrs, &byte)) {
1045 			state = -2;
1046 			break;
1047 		}
1048 #if 0
1049 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1050 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1051 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1052 #endif
1053 		switch (state) {
1054 		case 0:		/* item name */
1055 			if (byte & 0x80) {
1056 				if (vpd_nextbyte(&vrs, &byte2)) {
1057 					state = -2;
1058 					break;
1059 				}
1060 				remain = byte2;
1061 				if (vpd_nextbyte(&vrs, &byte2)) {
1062 					state = -2;
1063 					break;
1064 				}
1065 				remain |= byte2 << 8;
1066 				if (remain > (0x7f*4 - vrs.off)) {
1067 					state = -1;
1068 					kprintf(
1069 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1070 					    cfg->domain, cfg->bus, cfg->slot,
1071 					    cfg->func, remain);
1072 				}
1073 				name = byte & 0x7f;
1074 			} else {
1075 				remain = byte & 0x7;
1076 				name = (byte >> 3) & 0xf;
1077 			}
1078 			switch (name) {
1079 			case 0x2:	/* String */
1080 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1081 				    M_DEVBUF, M_WAITOK);
1082 				i = 0;
1083 				state = 1;
1084 				break;
1085 			case 0xf:	/* End */
1086 				state = -1;
1087 				break;
1088 			case 0x10:	/* VPD-R */
1089 				alloc = 8;
1090 				off = 0;
1091 				cfg->vpd.vpd_ros = kmalloc(alloc *
1092 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1093 				    M_WAITOK | M_ZERO);
1094 				state = 2;
1095 				break;
1096 			case 0x11:	/* VPD-W */
1097 				alloc = 8;
1098 				off = 0;
1099 				cfg->vpd.vpd_w = kmalloc(alloc *
1100 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1101 				    M_WAITOK | M_ZERO);
1102 				state = 5;
1103 				break;
1104 			default:	/* Invalid data, abort */
1105 				state = -1;
1106 				break;
1107 			}
1108 			break;
1109 
1110 		case 1:	/* Identifier String */
1111 			cfg->vpd.vpd_ident[i++] = byte;
1112 			remain--;
1113 			if (remain == 0)  {
1114 				cfg->vpd.vpd_ident[i] = '\0';
1115 				state = 0;
1116 			}
1117 			break;
1118 
1119 		case 2:	/* VPD-R Keyword Header */
1120 			if (off == alloc) {
1121 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1122 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1123 				    M_DEVBUF, M_WAITOK | M_ZERO);
1124 			}
1125 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1126 			if (vpd_nextbyte(&vrs, &byte2)) {
1127 				state = -2;
1128 				break;
1129 			}
1130 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1131 			if (vpd_nextbyte(&vrs, &byte2)) {
1132 				state = -2;
1133 				break;
1134 			}
1135 			dflen = byte2;
1136 			if (dflen == 0 &&
1137 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1138 			    2) == 0) {
1139 				/*
1140 				 * if this happens, we can't trust the rest
1141 				 * of the VPD.
1142 				 */
1143 				kprintf(
1144 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1145 				    cfg->domain, cfg->bus, cfg->slot,
1146 				    cfg->func, dflen);
1147 				cksumvalid = 0;
1148 				state = -1;
1149 				break;
1150 			} else if (dflen == 0) {
1151 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1152 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1153 				    M_DEVBUF, M_WAITOK);
1154 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1155 			} else
1156 				cfg->vpd.vpd_ros[off].value = kmalloc(
1157 				    (dflen + 1) *
1158 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1159 				    M_DEVBUF, M_WAITOK);
1160 			remain -= 3;
1161 			i = 0;
1162 			/* keep in sync w/ state 3's transistions */
1163 			if (dflen == 0 && remain == 0)
1164 				state = 0;
1165 			else if (dflen == 0)
1166 				state = 2;
1167 			else
1168 				state = 3;
1169 			break;
1170 
1171 		case 3:	/* VPD-R Keyword Value */
1172 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1173 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1174 			    "RV", 2) == 0 && cksumvalid == -1) {
1175 				if (vrs.cksum == 0)
1176 					cksumvalid = 1;
1177 				else {
1178 					if (bootverbose)
1179 						kprintf(
1180 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1181 						    cfg->domain, cfg->bus,
1182 						    cfg->slot, cfg->func,
1183 						    vrs.cksum);
1184 					cksumvalid = 0;
1185 					state = -1;
1186 					break;
1187 				}
1188 			}
1189 			dflen--;
1190 			remain--;
1191 			/* keep in sync w/ state 2's transistions */
1192 			if (dflen == 0)
1193 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1194 			if (dflen == 0 && remain == 0) {
1195 				cfg->vpd.vpd_rocnt = off;
1196 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1197 				    off * sizeof(*cfg->vpd.vpd_ros),
1198 				    M_DEVBUF, M_WAITOK | M_ZERO);
1199 				state = 0;
1200 			} else if (dflen == 0)
1201 				state = 2;
1202 			break;
1203 
1204 		case 4:
1205 			remain--;
1206 			if (remain == 0)
1207 				state = 0;
1208 			break;
1209 
1210 		case 5:	/* VPD-W Keyword Header */
1211 			if (off == alloc) {
1212 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1213 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1214 				    M_DEVBUF, M_WAITOK | M_ZERO);
1215 			}
1216 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1217 			if (vpd_nextbyte(&vrs, &byte2)) {
1218 				state = -2;
1219 				break;
1220 			}
1221 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1222 			if (vpd_nextbyte(&vrs, &byte2)) {
1223 				state = -2;
1224 				break;
1225 			}
1226 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1227 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1228 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1229 			    sizeof(*cfg->vpd.vpd_w[off].value),
1230 			    M_DEVBUF, M_WAITOK);
1231 			remain -= 3;
1232 			i = 0;
1233 			/* keep in sync w/ state 6's transistions */
1234 			if (dflen == 0 && remain == 0)
1235 				state = 0;
1236 			else if (dflen == 0)
1237 				state = 5;
1238 			else
1239 				state = 6;
1240 			break;
1241 
1242 		case 6:	/* VPD-W Keyword Value */
1243 			cfg->vpd.vpd_w[off].value[i++] = byte;
1244 			dflen--;
1245 			remain--;
1246 			/* keep in sync w/ state 5's transistions */
1247 			if (dflen == 0)
1248 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1249 			if (dflen == 0 && remain == 0) {
1250 				cfg->vpd.vpd_wcnt = off;
1251 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1252 				    off * sizeof(*cfg->vpd.vpd_w),
1253 				    M_DEVBUF, M_WAITOK | M_ZERO);
1254 				state = 0;
1255 			} else if (dflen == 0)
1256 				state = 5;
1257 			break;
1258 
1259 		default:
1260 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1261 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1262 			    state);
1263 			state = -1;
1264 			break;
1265 		}
1266 	}
1267 
1268 	if (cksumvalid == 0 || state < -1) {
1269 		/* read-only data bad, clean up */
1270 		if (cfg->vpd.vpd_ros != NULL) {
1271 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1272 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1273 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1274 			cfg->vpd.vpd_ros = NULL;
1275 		}
1276 	}
1277 	if (state < -1) {
1278 		/* I/O error, clean up */
1279 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1280 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1281 		if (cfg->vpd.vpd_ident != NULL) {
1282 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1283 			cfg->vpd.vpd_ident = NULL;
1284 		}
1285 		if (cfg->vpd.vpd_w != NULL) {
1286 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1287 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1288 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1289 			cfg->vpd.vpd_w = NULL;
1290 		}
1291 	}
1292 	cfg->vpd.vpd_cached = 1;
1293 #undef REG
1294 #undef WREG
1295 }
1296 
1297 int
1298 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1299 {
1300 	struct pci_devinfo *dinfo = device_get_ivars(child);
1301 	pcicfgregs *cfg = &dinfo->cfg;
1302 
1303 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1304 		pci_read_vpd(device_get_parent(dev), cfg);
1305 
1306 	*identptr = cfg->vpd.vpd_ident;
1307 
1308 	if (*identptr == NULL)
1309 		return (ENXIO);
1310 
1311 	return (0);
1312 }
1313 
1314 int
1315 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1316 	const char **vptr)
1317 {
1318 	struct pci_devinfo *dinfo = device_get_ivars(child);
1319 	pcicfgregs *cfg = &dinfo->cfg;
1320 	int i;
1321 
1322 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1323 		pci_read_vpd(device_get_parent(dev), cfg);
1324 
1325 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1326 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1327 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1328 			*vptr = cfg->vpd.vpd_ros[i].value;
1329 		}
1330 
1331 	if (i != cfg->vpd.vpd_rocnt)
1332 		return (0);
1333 
1334 	*vptr = NULL;
1335 	return (ENXIO);
1336 }
1337 
1338 /*
1339  * Return the offset in configuration space of the requested extended
1340  * capability entry or 0 if the specified capability was not found.
1341  */
1342 int
1343 pci_find_extcap_method(device_t dev, device_t child, int capability,
1344     int *capreg)
1345 {
1346 	struct pci_devinfo *dinfo = device_get_ivars(child);
1347 	pcicfgregs *cfg = &dinfo->cfg;
1348 	u_int32_t status;
1349 	u_int8_t ptr;
1350 
1351 	/*
1352 	 * Check the CAP_LIST bit of the PCI status register first.
1353 	 */
1354 	status = pci_read_config(child, PCIR_STATUS, 2);
1355 	if (!(status & PCIM_STATUS_CAPPRESENT))
1356 		return (ENXIO);
1357 
1358 	/*
1359 	 * Determine the start pointer of the capabilities list.
1360 	 */
1361 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1362 	case 0:
1363 	case 1:
1364 		ptr = PCIR_CAP_PTR;
1365 		break;
1366 	case 2:
1367 		ptr = PCIR_CAP_PTR_2;
1368 		break;
1369 	default:
1370 		/* XXX: panic? */
1371 		return (ENXIO);		/* no extended capabilities support */
1372 	}
1373 	ptr = pci_read_config(child, ptr, 1);
1374 
1375 	/*
1376 	 * Traverse the capabilities list.
1377 	 */
1378 	while (ptr != 0) {
1379 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1380 			if (capreg != NULL)
1381 				*capreg = ptr;
1382 			return (0);
1383 		}
1384 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1385 	}
1386 
1387 	return (ENOENT);
1388 }
1389 
1390 /*
1391  * Support for MSI-X message interrupts.
1392  */
1393 static void
1394 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1395     uint32_t data)
1396 {
1397 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1398 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1399 	uint32_t offset;
1400 
1401 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1402 	offset = msix->msix_table_offset + index * 16;
1403 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1404 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1405 	bus_write_4(msix->msix_table_res, offset + 8, data);
1406 
1407 	/* Enable MSI -> HT mapping. */
1408 	pci_ht_map_msi(dev, address);
1409 }
1410 
1411 static void
1412 pci_mask_msix_vector(device_t dev, u_int index)
1413 {
1414 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1415 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1416 	uint32_t offset, val;
1417 
1418 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1419 	offset = msix->msix_table_offset + index * 16 + 12;
1420 	val = bus_read_4(msix->msix_table_res, offset);
1421 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1422 		val |= PCIM_MSIX_VCTRL_MASK;
1423 		bus_write_4(msix->msix_table_res, offset, val);
1424 	}
1425 }
1426 
1427 static void
1428 pci_unmask_msix_vector(device_t dev, u_int index)
1429 {
1430 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1431 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1432 	uint32_t offset, val;
1433 
1434 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1435 	offset = msix->msix_table_offset + index * 16 + 12;
1436 	val = bus_read_4(msix->msix_table_res, offset);
1437 	if (val & PCIM_MSIX_VCTRL_MASK) {
1438 		val &= ~PCIM_MSIX_VCTRL_MASK;
1439 		bus_write_4(msix->msix_table_res, offset, val);
1440 	}
1441 }
1442 
1443 int
1444 pci_pending_msix_vector(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, bit;
1449 
1450 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1451 	    ("MSI-X is not setup yet"));
1452 
1453 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1454 	offset = msix->msix_pba_offset + (index / 32) * 4;
1455 	bit = 1 << index % 32;
1456 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1457 }
1458 
1459 /*
1460  * Restore MSI-X registers and table during resume.  If MSI-X is
1461  * enabled then walk the virtual table to restore the actual MSI-X
1462  * table.
1463  */
1464 static void
1465 pci_resume_msix(device_t dev)
1466 {
1467 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1468 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1469 
1470 	if (msix->msix_table_res != NULL) {
1471 		const struct msix_vector *mv;
1472 
1473 		pci_mask_msix_allvectors(dev);
1474 
1475 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1476 			u_int vector;
1477 
1478 			if (mv->mv_address == 0)
1479 				continue;
1480 
1481 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1482 			pci_setup_msix_vector(dev, vector,
1483 			    mv->mv_address, mv->mv_data);
1484 			pci_unmask_msix_vector(dev, vector);
1485 		}
1486 	}
1487 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1488 	    msix->msix_ctrl, 2);
1489 }
1490 
1491 /*
1492  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1493  *
1494  * After this function returns, the MSI-X's rid will be saved in rid0.
1495  */
1496 int
1497 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1498     int *rid0, int cpuid)
1499 {
1500 	struct pci_devinfo *dinfo = device_get_ivars(child);
1501 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1502 	struct msix_vector *mv;
1503 	struct resource_list_entry *rle;
1504 	int error, irq, rid;
1505 
1506 	KASSERT(msix->msix_table_res != NULL &&
1507 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1508 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1509 	KASSERT(vector < msix->msix_msgnum,
1510 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1511 
1512 	if (bootverbose) {
1513 		device_printf(child,
1514 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1515 		    vector, msix->msix_msgnum);
1516 	}
1517 
1518 	/* Set rid according to vector number */
1519 	rid = PCI_MSIX_VEC2RID(vector);
1520 
1521 	/* Vector has already been allocated */
1522 	mv = pci_find_msix_vector(child, rid);
1523 	if (mv != NULL)
1524 		return EBUSY;
1525 
1526 	/* Allocate a message. */
1527 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1528 	if (error)
1529 		return error;
1530 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1531 	    irq, irq, 1, cpuid);
1532 
1533 	if (bootverbose) {
1534 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1535 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1536 		    rle->start, cpuid);
1537 	}
1538 
1539 	/* Update counts of alloc'd messages. */
1540 	msix->msix_alloc++;
1541 
1542 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1543 	mv->mv_rid = rid;
1544 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1545 
1546 	*rid0 = rid;
1547 	return 0;
1548 }
1549 
1550 int
1551 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1552 {
1553 	struct pci_devinfo *dinfo = device_get_ivars(child);
1554 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1555 	struct resource_list_entry *rle;
1556 	struct msix_vector *mv;
1557 	int irq, cpuid;
1558 
1559 	KASSERT(msix->msix_table_res != NULL &&
1560 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1561 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1562 	KASSERT(rid > 0, ("invalid rid %d", rid));
1563 
1564 	mv = pci_find_msix_vector(child, rid);
1565 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1566 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1567 
1568 	/* Make sure resource is no longer allocated. */
1569 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1570 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1571 	KASSERT(rle->res == NULL,
1572 	    ("MSI-X resource is still allocated, rid %d", rid));
1573 
1574 	irq = rle->start;
1575 	cpuid = rle->cpuid;
1576 
1577 	/* Free the resource list entries. */
1578 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1579 
1580 	/* Release the IRQ. */
1581 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1582 
1583 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1584 	kfree(mv, M_DEVBUF);
1585 
1586 	msix->msix_alloc--;
1587 	return (0);
1588 }
1589 
1590 /*
1591  * Return the max supported MSI-X messages this device supports.
1592  * Basically, assuming the MD code can alloc messages, this function
1593  * should return the maximum value that pci_alloc_msix() can return.
1594  * Thus, it is subject to the tunables, etc.
1595  */
1596 int
1597 pci_msix_count_method(device_t dev, device_t child)
1598 {
1599 	struct pci_devinfo *dinfo = device_get_ivars(child);
1600 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1601 
1602 	if (pci_do_msix && msix->msix_location != 0)
1603 		return (msix->msix_msgnum);
1604 	return (0);
1605 }
1606 
1607 int
1608 pci_setup_msix(device_t dev)
1609 {
1610 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1611 	pcicfgregs *cfg = &dinfo->cfg;
1612 	struct resource_list_entry *rle;
1613 	struct resource *table_res, *pba_res;
1614 
1615 	KASSERT(cfg->msix.msix_table_res == NULL &&
1616 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1617 
1618 	/* If rid 0 is allocated, then fail. */
1619 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1620 	if (rle != NULL && rle->res != NULL)
1621 		return (ENXIO);
1622 
1623 	/* Already have allocated MSIs? */
1624 	if (cfg->msi.msi_alloc != 0)
1625 		return (ENXIO);
1626 
1627 	/* If MSI is blacklisted for this system, fail. */
1628 	if (pci_msi_blacklisted())
1629 		return (ENXIO);
1630 
1631 	/* MSI-X capability present? */
1632 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1633 	    !pci_do_msix)
1634 		return (ENODEV);
1635 
1636 	KASSERT(cfg->msix.msix_alloc == 0 &&
1637 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1638 	    ("MSI-X vector has been allocated"));
1639 
1640 	/* Make sure the appropriate BARs are mapped. */
1641 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1642 	    cfg->msix.msix_table_bar);
1643 	if (rle == NULL || rle->res == NULL ||
1644 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1645 		return (ENXIO);
1646 	table_res = rle->res;
1647 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1648 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1649 		    cfg->msix.msix_pba_bar);
1650 		if (rle == NULL || rle->res == NULL ||
1651 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1652 			return (ENXIO);
1653 	}
1654 	pba_res = rle->res;
1655 
1656 	cfg->msix.msix_table_res = table_res;
1657 	cfg->msix.msix_pba_res = pba_res;
1658 
1659 	pci_mask_msix_allvectors(dev);
1660 
1661 	return 0;
1662 }
1663 
1664 void
1665 pci_teardown_msix(device_t dev)
1666 {
1667 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1668 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1669 
1670 	KASSERT(msix->msix_table_res != NULL &&
1671 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1672 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1673 	    ("MSI-X vector is still allocated"));
1674 
1675 	pci_mask_msix_allvectors(dev);
1676 
1677 	msix->msix_table_res = NULL;
1678 	msix->msix_pba_res = NULL;
1679 }
1680 
1681 void
1682 pci_enable_msix(device_t dev)
1683 {
1684 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1685 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1686 
1687 	KASSERT(msix->msix_table_res != NULL &&
1688 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1689 
1690 	/* Update control register to enable MSI-X. */
1691 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1692 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1693 	    msix->msix_ctrl, 2);
1694 }
1695 
1696 void
1697 pci_disable_msix(device_t dev)
1698 {
1699 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1701 
1702 	KASSERT(msix->msix_table_res != NULL &&
1703 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1704 
1705 	/* Disable MSI -> HT mapping. */
1706 	pci_ht_map_msi(dev, 0);
1707 
1708 	/* Update control register to disable MSI-X. */
1709 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1710 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1711 	    msix->msix_ctrl, 2);
1712 }
1713 
1714 static void
1715 pci_mask_msix_allvectors(device_t dev)
1716 {
1717 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1718 	u_int i;
1719 
1720 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1721 		pci_mask_msix_vector(dev, i);
1722 }
1723 
1724 static struct msix_vector *
1725 pci_find_msix_vector(device_t dev, int rid)
1726 {
1727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1728 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1729 	struct msix_vector *mv;
1730 
1731 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1732 		if (mv->mv_rid == rid)
1733 			return mv;
1734 	}
1735 	return NULL;
1736 }
1737 
1738 /*
1739  * HyperTransport MSI mapping control
1740  */
1741 void
1742 pci_ht_map_msi(device_t dev, uint64_t addr)
1743 {
1744 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1745 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1746 
1747 	if (!ht->ht_msimap)
1748 		return;
1749 
1750 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1751 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1752 		/* Enable MSI -> HT mapping. */
1753 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1754 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1755 		    ht->ht_msictrl, 2);
1756 	}
1757 
1758 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1759 		/* Disable MSI -> HT mapping. */
1760 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1761 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1762 		    ht->ht_msictrl, 2);
1763 	}
1764 }
1765 
1766 /*
1767  * Support for MSI message signalled interrupts.
1768  */
1769 void
1770 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1771 {
1772 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1773 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1774 
1775 	/* Write data and address values. */
1776 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1777 	    address & 0xffffffff, 4);
1778 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1779 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1780 		    address >> 32, 4);
1781 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1782 		    data, 2);
1783 	} else
1784 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1785 		    2);
1786 
1787 	/* Enable MSI in the control register. */
1788 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1789 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1790 	    2);
1791 
1792 	/* Enable MSI -> HT mapping. */
1793 	pci_ht_map_msi(dev, address);
1794 }
1795 
1796 void
1797 pci_disable_msi(device_t dev)
1798 {
1799 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1800 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1801 
1802 	/* Disable MSI -> HT mapping. */
1803 	pci_ht_map_msi(dev, 0);
1804 
1805 	/* Disable MSI in the control register. */
1806 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1807 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1808 	    2);
1809 }
1810 
1811 /*
1812  * Restore MSI registers during resume.  If MSI is enabled then
1813  * restore the data and address registers in addition to the control
1814  * register.
1815  */
1816 static void
1817 pci_resume_msi(device_t dev)
1818 {
1819 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1820 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1821 	uint64_t address;
1822 	uint16_t data;
1823 
1824 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1825 		address = msi->msi_addr;
1826 		data = msi->msi_data;
1827 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1828 		    address & 0xffffffff, 4);
1829 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1830 			pci_write_config(dev, msi->msi_location +
1831 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1832 			pci_write_config(dev, msi->msi_location +
1833 			    PCIR_MSI_DATA_64BIT, data, 2);
1834 		} else
1835 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1836 			    data, 2);
1837 	}
1838 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1839 	    2);
1840 }
1841 
1842 /*
1843  * Returns true if the specified device is blacklisted because MSI
1844  * doesn't work.
1845  */
1846 int
1847 pci_msi_device_blacklisted(device_t dev)
1848 {
1849 	struct pci_quirk *q;
1850 
1851 	if (!pci_honor_msi_blacklist)
1852 		return (0);
1853 
1854 	for (q = &pci_quirks[0]; q->devid; q++) {
1855 		if (q->devid == pci_get_devid(dev) &&
1856 		    q->type == PCI_QUIRK_DISABLE_MSI)
1857 			return (1);
1858 	}
1859 	return (0);
1860 }
1861 
1862 /*
1863  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1864  * we just check for blacklisted chipsets as represented by the
1865  * host-PCI bridge at device 0:0:0.  In the future, it may become
1866  * necessary to check other system attributes, such as the kenv values
1867  * that give the motherboard manufacturer and model number.
1868  */
1869 static int
1870 pci_msi_blacklisted(void)
1871 {
1872 	device_t dev;
1873 
1874 	if (!pci_honor_msi_blacklist)
1875 		return (0);
1876 
1877 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1878 	if (!(pcie_chipset || pcix_chipset))
1879 		return (1);
1880 
1881 	dev = pci_find_bsf(0, 0, 0);
1882 	if (dev != NULL)
1883 		return (pci_msi_device_blacklisted(dev));
1884 	return (0);
1885 }
1886 
1887 /*
1888  * Attempt to allocate count MSI messages on start_cpuid.
1889  *
1890  * If start_cpuid < 0, then the MSI messages' target CPU will be
1891  * selected automaticly.
1892  *
1893  * If the caller explicitly specified the MSI messages' target CPU,
1894  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1895  * messages on the specified CPU, if the allocation fails due to MD
1896  * does not have enough vectors (EMSGSIZE), then we will try next
1897  * available CPU, until the allocation fails on all CPUs.
1898  *
1899  * EMSGSIZE will be returned, if all available CPUs does not have
1900  * enough vectors for the requested amount of MSI messages.  Caller
1901  * should either reduce the amount of MSI messages to be requested,
1902  * or simply giving up using MSI.
1903  *
1904  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1905  * returned in 'rid' array, if the allocation succeeds.
1906  */
1907 int
1908 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1909     int start_cpuid)
1910 {
1911 	struct pci_devinfo *dinfo = device_get_ivars(child);
1912 	pcicfgregs *cfg = &dinfo->cfg;
1913 	struct resource_list_entry *rle;
1914 	int error, i, irqs[32], cpuid = 0;
1915 	uint16_t ctrl;
1916 
1917 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1918 	    ("invalid MSI count %d", count));
1919 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1920 
1921 	/* If rid 0 is allocated, then fail. */
1922 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1923 	if (rle != NULL && rle->res != NULL)
1924 		return (ENXIO);
1925 
1926 	/* Already have allocated messages? */
1927 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1928 		return (ENXIO);
1929 
1930 	/* If MSI is blacklisted for this system, fail. */
1931 	if (pci_msi_blacklisted())
1932 		return (ENXIO);
1933 
1934 	/* MSI capability present? */
1935 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1936 	    !pci_do_msi)
1937 		return (ENODEV);
1938 
1939 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1940 	    count, cfg->msi.msi_msgnum));
1941 
1942 	if (bootverbose) {
1943 		device_printf(child,
1944 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1945 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1946 	}
1947 
1948 	if (start_cpuid < 0)
1949 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1950 
1951 	error = EINVAL;
1952 	for (i = 0; i < ncpus; ++i) {
1953 		cpuid = (start_cpuid + i) % ncpus;
1954 
1955 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1956 		    cfg->msi.msi_msgnum, irqs, cpuid);
1957 		if (error == 0)
1958 			break;
1959 		else if (error != EMSGSIZE)
1960 			return error;
1961 	}
1962 	if (error)
1963 		return error;
1964 
1965 	/*
1966 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1967 	 * the irqs[] array, so add new resources starting at rid 1.
1968 	 */
1969 	for (i = 0; i < count; i++) {
1970 		rid[i] = i + 1;
1971 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1972 		    irqs[i], irqs[i], 1, cpuid);
1973 	}
1974 
1975 	if (bootverbose) {
1976 		if (count == 1) {
1977 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1978 			    irqs[0], cpuid);
1979 		} else {
1980 			int run;
1981 
1982 			/*
1983 			 * Be fancy and try to print contiguous runs
1984 			 * of IRQ values as ranges.  'run' is true if
1985 			 * we are in a range.
1986 			 */
1987 			device_printf(child, "using IRQs %d", irqs[0]);
1988 			run = 0;
1989 			for (i = 1; i < count; i++) {
1990 
1991 				/* Still in a run? */
1992 				if (irqs[i] == irqs[i - 1] + 1) {
1993 					run = 1;
1994 					continue;
1995 				}
1996 
1997 				/* Finish previous range. */
1998 				if (run) {
1999 					kprintf("-%d", irqs[i - 1]);
2000 					run = 0;
2001 				}
2002 
2003 				/* Start new range. */
2004 				kprintf(",%d", irqs[i]);
2005 			}
2006 
2007 			/* Unfinished range? */
2008 			if (run)
2009 				kprintf("-%d", irqs[count - 1]);
2010 			kprintf(" for MSI on cpu%d\n", cpuid);
2011 		}
2012 	}
2013 
2014 	/* Update control register with count. */
2015 	ctrl = cfg->msi.msi_ctrl;
2016 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2017 	ctrl |= (ffs(count) - 1) << 4;
2018 	cfg->msi.msi_ctrl = ctrl;
2019 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2020 
2021 	/* Update counts of alloc'd messages. */
2022 	cfg->msi.msi_alloc = count;
2023 	cfg->msi.msi_handlers = 0;
2024 	return (0);
2025 }
2026 
2027 /* Release the MSI messages associated with this device. */
2028 int
2029 pci_release_msi_method(device_t dev, device_t child)
2030 {
2031 	struct pci_devinfo *dinfo = device_get_ivars(child);
2032 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2033 	struct resource_list_entry *rle;
2034 	int i, irqs[32], cpuid = -1;
2035 
2036 	/* Do we have any messages to release? */
2037 	if (msi->msi_alloc == 0)
2038 		return (ENODEV);
2039 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2040 
2041 	/* Make sure none of the resources are allocated. */
2042 	if (msi->msi_handlers > 0)
2043 		return (EBUSY);
2044 	for (i = 0; i < msi->msi_alloc; i++) {
2045 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2046 		KASSERT(rle != NULL, ("missing MSI resource"));
2047 		if (rle->res != NULL)
2048 			return (EBUSY);
2049 		if (i == 0) {
2050 			cpuid = rle->cpuid;
2051 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2052 			    ("invalid MSI target cpuid %d", cpuid));
2053 		} else {
2054 			KASSERT(rle->cpuid == cpuid,
2055 			    ("MSI targets different cpus, "
2056 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2057 		}
2058 		irqs[i] = rle->start;
2059 	}
2060 
2061 	/* Update control register with 0 count. */
2062 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2063 	    ("%s: MSI still enabled", __func__));
2064 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2065 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2066 	    msi->msi_ctrl, 2);
2067 
2068 	/* Release the messages. */
2069 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2070 	    cpuid);
2071 	for (i = 0; i < msi->msi_alloc; i++)
2072 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2073 
2074 	/* Update alloc count. */
2075 	msi->msi_alloc = 0;
2076 	msi->msi_addr = 0;
2077 	msi->msi_data = 0;
2078 	return (0);
2079 }
2080 
2081 /*
2082  * Return the max supported MSI messages this device supports.
2083  * Basically, assuming the MD code can alloc messages, this function
2084  * should return the maximum value that pci_alloc_msi() can return.
2085  * Thus, it is subject to the tunables, etc.
2086  */
2087 int
2088 pci_msi_count_method(device_t dev, device_t child)
2089 {
2090 	struct pci_devinfo *dinfo = device_get_ivars(child);
2091 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2092 
2093 	if (pci_do_msi && msi->msi_location != 0)
2094 		return (msi->msi_msgnum);
2095 	return (0);
2096 }
2097 
2098 /* kfree pcicfgregs structure and all depending data structures */
2099 
2100 int
2101 pci_freecfg(struct pci_devinfo *dinfo)
2102 {
2103 	struct devlist *devlist_head;
2104 	int i;
2105 
2106 	devlist_head = &pci_devq;
2107 
2108 	if (dinfo->cfg.vpd.vpd_reg) {
2109 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2110 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2111 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2112 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2113 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2114 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2115 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2116 	}
2117 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2118 	kfree(dinfo, M_DEVBUF);
2119 
2120 	/* increment the generation count */
2121 	pci_generation++;
2122 
2123 	/* we're losing one device */
2124 	pci_numdevs--;
2125 	return (0);
2126 }
2127 
2128 /*
2129  * PCI power manangement
2130  */
2131 int
2132 pci_set_powerstate_method(device_t dev, device_t child, int state)
2133 {
2134 	struct pci_devinfo *dinfo = device_get_ivars(child);
2135 	pcicfgregs *cfg = &dinfo->cfg;
2136 	uint16_t status;
2137 	int oldstate, highest, delay;
2138 
2139 	if (cfg->pp.pp_cap == 0)
2140 		return (EOPNOTSUPP);
2141 
2142 	/*
2143 	 * Optimize a no state change request away.  While it would be OK to
2144 	 * write to the hardware in theory, some devices have shown odd
2145 	 * behavior when going from D3 -> D3.
2146 	 */
2147 	oldstate = pci_get_powerstate(child);
2148 	if (oldstate == state)
2149 		return (0);
2150 
2151 	/*
2152 	 * The PCI power management specification states that after a state
2153 	 * transition between PCI power states, system software must
2154 	 * guarantee a minimal delay before the function accesses the device.
2155 	 * Compute the worst case delay that we need to guarantee before we
2156 	 * access the device.  Many devices will be responsive much more
2157 	 * quickly than this delay, but there are some that don't respond
2158 	 * instantly to state changes.  Transitions to/from D3 state require
2159 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2160 	 * is done below with DELAY rather than a sleeper function because
2161 	 * this function can be called from contexts where we cannot sleep.
2162 	 */
2163 	highest = (oldstate > state) ? oldstate : state;
2164 	if (highest == PCI_POWERSTATE_D3)
2165 	    delay = 10000;
2166 	else if (highest == PCI_POWERSTATE_D2)
2167 	    delay = 200;
2168 	else
2169 	    delay = 0;
2170 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2171 	    & ~PCIM_PSTAT_DMASK;
2172 	switch (state) {
2173 	case PCI_POWERSTATE_D0:
2174 		status |= PCIM_PSTAT_D0;
2175 		break;
2176 	case PCI_POWERSTATE_D1:
2177 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2178 			return (EOPNOTSUPP);
2179 		status |= PCIM_PSTAT_D1;
2180 		break;
2181 	case PCI_POWERSTATE_D2:
2182 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2183 			return (EOPNOTSUPP);
2184 		status |= PCIM_PSTAT_D2;
2185 		break;
2186 	case PCI_POWERSTATE_D3:
2187 		status |= PCIM_PSTAT_D3;
2188 		break;
2189 	default:
2190 		return (EINVAL);
2191 	}
2192 
2193 	if (bootverbose)
2194 		kprintf(
2195 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2196 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2197 		    dinfo->cfg.func, oldstate, state);
2198 
2199 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2200 	if (delay)
2201 		DELAY(delay);
2202 	return (0);
2203 }
2204 
2205 int
2206 pci_get_powerstate_method(device_t dev, device_t child)
2207 {
2208 	struct pci_devinfo *dinfo = device_get_ivars(child);
2209 	pcicfgregs *cfg = &dinfo->cfg;
2210 	uint16_t status;
2211 	int result;
2212 
2213 	if (cfg->pp.pp_cap != 0) {
2214 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2215 		switch (status & PCIM_PSTAT_DMASK) {
2216 		case PCIM_PSTAT_D0:
2217 			result = PCI_POWERSTATE_D0;
2218 			break;
2219 		case PCIM_PSTAT_D1:
2220 			result = PCI_POWERSTATE_D1;
2221 			break;
2222 		case PCIM_PSTAT_D2:
2223 			result = PCI_POWERSTATE_D2;
2224 			break;
2225 		case PCIM_PSTAT_D3:
2226 			result = PCI_POWERSTATE_D3;
2227 			break;
2228 		default:
2229 			result = PCI_POWERSTATE_UNKNOWN;
2230 			break;
2231 		}
2232 	} else {
2233 		/* No support, device is always at D0 */
2234 		result = PCI_POWERSTATE_D0;
2235 	}
2236 	return (result);
2237 }
2238 
2239 /*
2240  * Some convenience functions for PCI device drivers.
2241  */
2242 
2243 static __inline void
2244 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2245 {
2246 	uint16_t	command;
2247 
2248 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2249 	command |= bit;
2250 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2251 }
2252 
2253 static __inline void
2254 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2255 {
2256 	uint16_t	command;
2257 
2258 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2259 	command &= ~bit;
2260 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2261 }
2262 
2263 int
2264 pci_enable_busmaster_method(device_t dev, device_t child)
2265 {
2266 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2267 	return (0);
2268 }
2269 
2270 int
2271 pci_disable_busmaster_method(device_t dev, device_t child)
2272 {
2273 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2274 	return (0);
2275 }
2276 
2277 int
2278 pci_enable_io_method(device_t dev, device_t child, int space)
2279 {
2280 	uint16_t command;
2281 	uint16_t bit;
2282 	char *error;
2283 
2284 	bit = 0;
2285 	error = NULL;
2286 
2287 	switch(space) {
2288 	case SYS_RES_IOPORT:
2289 		bit = PCIM_CMD_PORTEN;
2290 		error = "port";
2291 		break;
2292 	case SYS_RES_MEMORY:
2293 		bit = PCIM_CMD_MEMEN;
2294 		error = "memory";
2295 		break;
2296 	default:
2297 		return (EINVAL);
2298 	}
2299 	pci_set_command_bit(dev, child, bit);
2300 	/* Some devices seem to need a brief stall here, what do to? */
2301 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2302 	if (command & bit)
2303 		return (0);
2304 	device_printf(child, "failed to enable %s mapping!\n", error);
2305 	return (ENXIO);
2306 }
2307 
2308 int
2309 pci_disable_io_method(device_t dev, device_t child, int space)
2310 {
2311 	uint16_t command;
2312 	uint16_t bit;
2313 	char *error;
2314 
2315 	bit = 0;
2316 	error = NULL;
2317 
2318 	switch(space) {
2319 	case SYS_RES_IOPORT:
2320 		bit = PCIM_CMD_PORTEN;
2321 		error = "port";
2322 		break;
2323 	case SYS_RES_MEMORY:
2324 		bit = PCIM_CMD_MEMEN;
2325 		error = "memory";
2326 		break;
2327 	default:
2328 		return (EINVAL);
2329 	}
2330 	pci_clear_command_bit(dev, child, bit);
2331 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2332 	if (command & bit) {
2333 		device_printf(child, "failed to disable %s mapping!\n", error);
2334 		return (ENXIO);
2335 	}
2336 	return (0);
2337 }
2338 
2339 /*
2340  * New style pci driver.  Parent device is either a pci-host-bridge or a
2341  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2342  */
2343 
2344 void
2345 pci_print_verbose(struct pci_devinfo *dinfo)
2346 {
2347 
2348 	if (bootverbose) {
2349 		pcicfgregs *cfg = &dinfo->cfg;
2350 
2351 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2352 		    cfg->vendor, cfg->device, cfg->revid);
2353 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2354 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2355 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2356 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2357 		    cfg->mfdev);
2358 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2359 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2360 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2361 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2362 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2363 		if (cfg->intpin > 0)
2364 			kprintf("\tintpin=%c, irq=%d\n",
2365 			    cfg->intpin +'a' -1, cfg->intline);
2366 		if (cfg->pp.pp_cap) {
2367 			uint16_t status;
2368 
2369 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2370 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2371 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2372 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2373 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2374 			    status & PCIM_PSTAT_DMASK);
2375 		}
2376 		if (cfg->msi.msi_location) {
2377 			int ctrl;
2378 
2379 			ctrl = cfg->msi.msi_ctrl;
2380 			kprintf("\tMSI supports %d message%s%s%s\n",
2381 			    cfg->msi.msi_msgnum,
2382 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2383 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2384 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2385 		}
2386 		if (cfg->msix.msix_location) {
2387 			kprintf("\tMSI-X supports %d message%s ",
2388 			    cfg->msix.msix_msgnum,
2389 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2390 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2391 				kprintf("in map 0x%x\n",
2392 				    cfg->msix.msix_table_bar);
2393 			else
2394 				kprintf("in maps 0x%x and 0x%x\n",
2395 				    cfg->msix.msix_table_bar,
2396 				    cfg->msix.msix_pba_bar);
2397 		}
2398 		pci_print_verbose_expr(cfg);
2399 	}
2400 }
2401 
2402 static void
2403 pci_print_verbose_expr(const pcicfgregs *cfg)
2404 {
2405 	const struct pcicfg_expr *expr = &cfg->expr;
2406 	const char *port_name;
2407 	uint16_t port_type;
2408 
2409 	if (!bootverbose)
2410 		return;
2411 
2412 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2413 		return;
2414 
2415 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2416 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2417 
2418 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2419 
2420 	switch (port_type) {
2421 	case PCIE_END_POINT:
2422 		port_name = "DEVICE";
2423 		break;
2424 	case PCIE_LEG_END_POINT:
2425 		port_name = "LEGDEV";
2426 		break;
2427 	case PCIE_ROOT_PORT:
2428 		port_name = "ROOT";
2429 		break;
2430 	case PCIE_UP_STREAM_PORT:
2431 		port_name = "UPSTREAM";
2432 		break;
2433 	case PCIE_DOWN_STREAM_PORT:
2434 		port_name = "DOWNSTRM";
2435 		break;
2436 	case PCIE_PCIE2PCI_BRIDGE:
2437 		port_name = "PCIE2PCI";
2438 		break;
2439 	case PCIE_PCI2PCIE_BRIDGE:
2440 		port_name = "PCI2PCIE";
2441 		break;
2442 	case PCIE_ROOT_END_POINT:
2443 		port_name = "ROOTDEV";
2444 		break;
2445 	case PCIE_ROOT_EVT_COLL:
2446 		port_name = "ROOTEVTC";
2447 		break;
2448 	default:
2449 		port_name = NULL;
2450 		break;
2451 	}
2452 	if ((port_type == PCIE_ROOT_PORT ||
2453 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2454 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2455 		port_name = NULL;
2456 	if (port_name != NULL)
2457 		kprintf("[%s]", port_name);
2458 
2459 	if (pcie_slotimpl(cfg)) {
2460 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2461 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2462 			kprintf("[HOTPLUG]");
2463 	}
2464 	kprintf("\n");
2465 }
2466 
2467 static int
2468 pci_porten(device_t pcib, int b, int s, int f)
2469 {
2470 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2471 		& PCIM_CMD_PORTEN) != 0;
2472 }
2473 
2474 static int
2475 pci_memen(device_t pcib, int b, int s, int f)
2476 {
2477 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2478 		& PCIM_CMD_MEMEN) != 0;
2479 }
2480 
2481 /*
2482  * Add a resource based on a pci map register. Return 1 if the map
2483  * register is a 32bit map register or 2 if it is a 64bit register.
2484  */
2485 static int
2486 pci_add_map(device_t pcib, device_t bus, device_t dev,
2487     int b, int s, int f, int reg, struct resource_list *rl, int force,
2488     int prefetch)
2489 {
2490 	uint32_t map;
2491 	uint16_t old_cmd;
2492 	pci_addr_t base;
2493 	pci_addr_t start, end, count;
2494 	uint8_t ln2size;
2495 	uint8_t ln2range;
2496 	uint32_t testval;
2497 	uint16_t cmd;
2498 	int type;
2499 	int barlen;
2500 	struct resource *res;
2501 
2502 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2503 
2504         /* Disable access to device memory */
2505 	old_cmd = 0;
2506 	if (PCI_BAR_MEM(map)) {
2507 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2508 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2509 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2510 	}
2511 
2512 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2513 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2514 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2515 
2516         /* Restore memory access mode */
2517 	if (PCI_BAR_MEM(map)) {
2518 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2519 	}
2520 
2521 	if (PCI_BAR_MEM(map)) {
2522 		type = SYS_RES_MEMORY;
2523 		if (map & PCIM_BAR_MEM_PREFETCH)
2524 			prefetch = 1;
2525 	} else
2526 		type = SYS_RES_IOPORT;
2527 	ln2size = pci_mapsize(testval);
2528 	ln2range = pci_maprange(testval);
2529 	base = pci_mapbase(map);
2530 	barlen = ln2range == 64 ? 2 : 1;
2531 
2532 	/*
2533 	 * For I/O registers, if bottom bit is set, and the next bit up
2534 	 * isn't clear, we know we have a BAR that doesn't conform to the
2535 	 * spec, so ignore it.  Also, sanity check the size of the data
2536 	 * areas to the type of memory involved.  Memory must be at least
2537 	 * 16 bytes in size, while I/O ranges must be at least 4.
2538 	 */
2539 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2540 		return (barlen);
2541 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2542 	    (type == SYS_RES_IOPORT && ln2size < 2))
2543 		return (barlen);
2544 
2545 	if (ln2range == 64)
2546 		/* Read the other half of a 64bit map register */
2547 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2548 	if (bootverbose) {
2549 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2550 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2551 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2552 			kprintf(", port disabled\n");
2553 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2554 			kprintf(", memory disabled\n");
2555 		else
2556 			kprintf(", enabled\n");
2557 	}
2558 
2559 	/*
2560 	 * If base is 0, then we have problems.  It is best to ignore
2561 	 * such entries for the moment.  These will be allocated later if
2562 	 * the driver specifically requests them.  However, some
2563 	 * removable busses look better when all resources are allocated,
2564 	 * so allow '0' to be overriden.
2565 	 *
2566 	 * Similarly treat maps whose values is the same as the test value
2567 	 * read back.  These maps have had all f's written to them by the
2568 	 * BIOS in an attempt to disable the resources.
2569 	 */
2570 	if (!force && (base == 0 || map == testval))
2571 		return (barlen);
2572 	if ((u_long)base != base) {
2573 		device_printf(bus,
2574 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2575 		    pci_get_domain(dev), b, s, f, reg);
2576 		return (barlen);
2577 	}
2578 
2579 	/*
2580 	 * This code theoretically does the right thing, but has
2581 	 * undesirable side effects in some cases where peripherals
2582 	 * respond oddly to having these bits enabled.  Let the user
2583 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2584 	 * default).
2585 	 */
2586 	if (pci_enable_io_modes) {
2587 		/* Turn on resources that have been left off by a lazy BIOS */
2588 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2589 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2590 			cmd |= PCIM_CMD_PORTEN;
2591 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2592 		}
2593 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2594 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2595 			cmd |= PCIM_CMD_MEMEN;
2596 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2597 		}
2598 	} else {
2599 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2600 			return (barlen);
2601 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2602 			return (barlen);
2603 	}
2604 
2605 	count = 1 << ln2size;
2606 	if (base == 0 || base == pci_mapbase(testval)) {
2607 		start = 0;	/* Let the parent decide. */
2608 		end = ~0ULL;
2609 	} else {
2610 		start = base;
2611 		end = base + (1 << ln2size) - 1;
2612 	}
2613 	resource_list_add(rl, type, reg, start, end, count, -1);
2614 
2615 	/*
2616 	 * Try to allocate the resource for this BAR from our parent
2617 	 * so that this resource range is already reserved.  The
2618 	 * driver for this device will later inherit this resource in
2619 	 * pci_alloc_resource().
2620 	 */
2621 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2622 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2623 	if (res == NULL) {
2624 		/*
2625 		 * If the allocation fails, delete the resource list
2626 		 * entry to force pci_alloc_resource() to allocate
2627 		 * resources from the parent.
2628 		 */
2629 		resource_list_delete(rl, type, reg);
2630 #ifdef PCI_BAR_CLEAR
2631 		/* Clear the BAR */
2632 		start = 0;
2633 #else	/* !PCI_BAR_CLEAR */
2634 		/*
2635 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2636 		 * PCI function, clearing the BAR causes HPET timer
2637 		 * stop ticking.
2638 		 */
2639 		if (bootverbose) {
2640 			kprintf("pci:%d:%d:%d: resource reservation failed "
2641 				"%#jx - %#jx\n", b, s, f,
2642 				(intmax_t)start, (intmax_t)end);
2643 		}
2644 		return (barlen);
2645 #endif	/* PCI_BAR_CLEAR */
2646 	} else {
2647 		start = rman_get_start(res);
2648 	}
2649 	pci_write_config(dev, reg, start, 4);
2650 	if (ln2range == 64)
2651 		pci_write_config(dev, reg + 4, start >> 32, 4);
2652 	return (barlen);
2653 }
2654 
2655 /*
2656  * For ATA devices we need to decide early what addressing mode to use.
2657  * Legacy demands that the primary and secondary ATA ports sits on the
2658  * same addresses that old ISA hardware did. This dictates that we use
2659  * those addresses and ignore the BAR's if we cannot set PCI native
2660  * addressing mode.
2661  */
2662 static void
2663 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2664     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2665 {
2666 	int rid, type, progif;
2667 #if 0
2668 	/* if this device supports PCI native addressing use it */
2669 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2670 	if ((progif & 0x8a) == 0x8a) {
2671 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2672 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2673 			kprintf("Trying ATA native PCI addressing mode\n");
2674 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2675 		}
2676 	}
2677 #endif
2678 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2679 	type = SYS_RES_IOPORT;
2680 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2681 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2682 		    prefetchmask & (1 << 0));
2683 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2684 		    prefetchmask & (1 << 1));
2685 	} else {
2686 		rid = PCIR_BAR(0);
2687 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2688 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2689 		    0, -1);
2690 		rid = PCIR_BAR(1);
2691 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2692 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2693 		    0, -1);
2694 	}
2695 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2696 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2697 		    prefetchmask & (1 << 2));
2698 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2699 		    prefetchmask & (1 << 3));
2700 	} else {
2701 		rid = PCIR_BAR(2);
2702 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2703 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2704 		    0, -1);
2705 		rid = PCIR_BAR(3);
2706 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2707 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2708 		    0, -1);
2709 	}
2710 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2711 	    prefetchmask & (1 << 4));
2712 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2713 	    prefetchmask & (1 << 5));
2714 }
2715 
2716 static void
2717 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2718 {
2719 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2720 	pcicfgregs *cfg = &dinfo->cfg;
2721 	char tunable_name[64];
2722 	int irq;
2723 
2724 	/* Has to have an intpin to have an interrupt. */
2725 	if (cfg->intpin == 0)
2726 		return;
2727 
2728 	/* Let the user override the IRQ with a tunable. */
2729 	irq = PCI_INVALID_IRQ;
2730 	ksnprintf(tunable_name, sizeof(tunable_name),
2731 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2732 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2733 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2734 		if (irq >= 255 || irq <= 0) {
2735 			irq = PCI_INVALID_IRQ;
2736 		} else {
2737 			if (machintr_legacy_intr_find(irq,
2738 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2739 				device_printf(dev,
2740 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2741 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2742 				    cfg->intpin + 'A' - 1, irq);
2743 				irq = PCI_INVALID_IRQ;
2744 			} else {
2745 				BUS_CONFIG_INTR(bus, dev, irq,
2746 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2747 			}
2748 		}
2749 	}
2750 
2751 	/*
2752 	 * If we didn't get an IRQ via the tunable, then we either use the
2753 	 * IRQ value in the intline register or we ask the bus to route an
2754 	 * interrupt for us.  If force_route is true, then we only use the
2755 	 * value in the intline register if the bus was unable to assign an
2756 	 * IRQ.
2757 	 */
2758 	if (!PCI_INTERRUPT_VALID(irq)) {
2759 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2760 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2761 		if (!PCI_INTERRUPT_VALID(irq))
2762 			irq = cfg->intline;
2763 	}
2764 
2765 	/* If after all that we don't have an IRQ, just bail. */
2766 	if (!PCI_INTERRUPT_VALID(irq))
2767 		return;
2768 
2769 	/* Update the config register if it changed. */
2770 	if (irq != cfg->intline) {
2771 		cfg->intline = irq;
2772 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2773 	}
2774 
2775 	/* Add this IRQ as rid 0 interrupt resource. */
2776 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2777 	    machintr_legacy_intr_cpuid(irq));
2778 }
2779 
2780 #if NUSB4BSD > 0
2781 /* Perform early OHCI takeover from SMM. */
2782 static void
2783 ohci_early_takeover(device_t self)
2784 {
2785 	struct resource *res;
2786 	uint32_t ctl;
2787 	int rid;
2788 	int i;
2789 
2790 	rid = PCIR_BAR(0);
2791 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2792 	if (res == NULL)
2793 		return;
2794 
2795 	ctl = bus_read_4(res, OHCI_CONTROL);
2796 	if (ctl & OHCI_IR) {
2797 		if (bootverbose)
2798 			kprintf("ohci early: "
2799 			    "SMM active, request owner change\n");
2800 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2801 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2802 			DELAY(1000);
2803 			ctl = bus_read_4(res, OHCI_CONTROL);
2804 		}
2805 		if (ctl & OHCI_IR) {
2806 			if (bootverbose)
2807 				kprintf("ohci early: "
2808 				    "SMM does not respond, resetting\n");
2809 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2810 		}
2811 		/* Disable interrupts */
2812 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2813 	}
2814 
2815 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2816 }
2817 
2818 /* Perform early UHCI takeover from SMM. */
2819 static void
2820 uhci_early_takeover(device_t self)
2821 {
2822 	struct resource *res;
2823 	int rid;
2824 
2825 	/*
2826 	 * Set the PIRQD enable bit and switch off all the others. We don't
2827 	 * want legacy support to interfere with us XXX Does this also mean
2828 	 * that the BIOS won't touch the keyboard anymore if it is connected
2829 	 * to the ports of the root hub?
2830 	 */
2831 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2832 
2833 	/* Disable interrupts */
2834 	rid = PCI_UHCI_BASE_REG;
2835 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2836 	if (res != NULL) {
2837 		bus_write_2(res, UHCI_INTR, 0);
2838 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2839 	}
2840 }
2841 
2842 /* Perform early EHCI takeover from SMM. */
2843 static void
2844 ehci_early_takeover(device_t self)
2845 {
2846 	struct resource *res;
2847 	uint32_t cparams;
2848 	uint32_t eec;
2849 	uint32_t eecp;
2850 	uint32_t bios_sem;
2851 	uint32_t offs;
2852 	int rid;
2853 	int i;
2854 
2855 	rid = PCIR_BAR(0);
2856 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2857 	if (res == NULL)
2858 		return;
2859 
2860 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2861 
2862 	/* Synchronise with the BIOS if it owns the controller. */
2863 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2864 	    eecp = EHCI_EECP_NEXT(eec)) {
2865 		eec = pci_read_config(self, eecp, 4);
2866 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2867 			continue;
2868 		}
2869 		bios_sem = pci_read_config(self, eecp +
2870 		    EHCI_LEGSUP_BIOS_SEM, 1);
2871 		if (bios_sem == 0) {
2872 			continue;
2873 		}
2874 		if (bootverbose)
2875 			kprintf("ehci early: "
2876 			    "SMM active, request owner change\n");
2877 
2878 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2879 
2880 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2881 			DELAY(1000);
2882 			bios_sem = pci_read_config(self, eecp +
2883 			    EHCI_LEGSUP_BIOS_SEM, 1);
2884 		}
2885 
2886 		if (bios_sem != 0) {
2887 			if (bootverbose)
2888 				kprintf("ehci early: "
2889 				    "SMM does not respond\n");
2890 		}
2891 		/* Disable interrupts */
2892 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2893 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2894 	}
2895 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2896 }
2897 
2898 /* Perform early XHCI takeover from SMM. */
2899 static void
2900 xhci_early_takeover(device_t self)
2901 {
2902 	struct resource *res;
2903 	uint32_t cparams;
2904 	uint32_t eec;
2905 	uint32_t eecp;
2906 	uint32_t bios_sem;
2907 	uint32_t offs;
2908 	int rid;
2909 	int i;
2910 
2911 	rid = PCIR_BAR(0);
2912 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2913 	if (res == NULL)
2914 		return;
2915 
2916 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2917 
2918 	eec = -1;
2919 
2920 	/* Synchronise with the BIOS if it owns the controller. */
2921 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2922 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2923 		eec = bus_read_4(res, eecp);
2924 
2925 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2926 			continue;
2927 
2928 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2929 
2930 		if (bios_sem == 0) {
2931 			if (bootverbose)
2932 				kprintf("xhci early: xhci is not owned by SMM\n");
2933 
2934 			continue;
2935 		}
2936 
2937 		if (bootverbose)
2938 			kprintf("xhci early: "
2939 			    "SMM active, request owner change\n");
2940 
2941 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2942 
2943 		/* wait a maximum of 5 seconds */
2944 
2945 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2946 			DELAY(1000);
2947 
2948 			bios_sem = bus_read_1(res, eecp +
2949 			    XHCI_XECP_BIOS_SEM);
2950 		}
2951 
2952 		if (bios_sem != 0) {
2953 			if (bootverbose) {
2954 				kprintf("xhci early: "
2955 				    "SMM does not respond\n");
2956 				kprintf("xhci early: "
2957 				    "taking xhci by force\n");
2958 			}
2959 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2960 		} else {
2961 			if (bootverbose)
2962 				kprintf("xhci early:"
2963 				    "handover successful\n");
2964 		}
2965 
2966 		/* Disable interrupts */
2967 		offs = bus_read_1(res, XHCI_CAPLENGTH);
2968 		bus_write_4(res, offs + XHCI_USBCMD, 0);
2969 		bus_read_4(res, offs + XHCI_USBSTS);
2970 	}
2971 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2972 }
2973 #endif /* NUSB4BSD > 0 */
2974 
2975 void
2976 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2977 {
2978 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2979 	pcicfgregs *cfg = &dinfo->cfg;
2980 	struct resource_list *rl = &dinfo->resources;
2981 	struct pci_quirk *q;
2982 	int b, i, f, s;
2983 
2984 	b = cfg->bus;
2985 	s = cfg->slot;
2986 	f = cfg->func;
2987 
2988 	/* ATA devices needs special map treatment */
2989 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2990 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2991 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2992 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2993 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2994 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2995 	else
2996 		for (i = 0; i < cfg->nummaps;)
2997 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2998 			    rl, force, prefetchmask & (1 << i));
2999 
3000 	/*
3001 	 * Add additional, quirked resources.
3002 	 */
3003 	for (q = &pci_quirks[0]; q->devid; q++) {
3004 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3005 		    && q->type == PCI_QUIRK_MAP_REG)
3006 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3007 			  force, 0);
3008 	}
3009 
3010 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3011 		/*
3012 		 * Try to re-route interrupts. Sometimes the BIOS or
3013 		 * firmware may leave bogus values in these registers.
3014 		 * If the re-route fails, then just stick with what we
3015 		 * have.
3016 		 */
3017 		pci_assign_interrupt(bus, dev, 1);
3018 	}
3019 
3020 #if NUSB4BSD > 0
3021 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3022 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3023 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3024 			xhci_early_takeover(dev);
3025 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3026 			ehci_early_takeover(dev);
3027 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3028 			ohci_early_takeover(dev);
3029 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3030 			uhci_early_takeover(dev);
3031 	}
3032 #endif
3033 }
3034 
3035 void
3036 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3037 {
3038 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3039 	device_t pcib = device_get_parent(dev);
3040 	struct pci_devinfo *dinfo;
3041 	int maxslots;
3042 	int s, f, pcifunchigh;
3043 	uint8_t hdrtype;
3044 
3045 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3046 	    ("dinfo_size too small"));
3047 	maxslots = PCIB_MAXSLOTS(pcib);
3048 	for (s = 0; s <= maxslots; s++) {
3049 		pcifunchigh = 0;
3050 		f = 0;
3051 		DELAY(1);
3052 		hdrtype = REG(PCIR_HDRTYPE, 1);
3053 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3054 			continue;
3055 		if (hdrtype & PCIM_MFDEV)
3056 			pcifunchigh = PCI_FUNCMAX;
3057 		for (f = 0; f <= pcifunchigh; f++) {
3058 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3059 			    dinfo_size);
3060 			if (dinfo != NULL) {
3061 				pci_add_child(dev, dinfo);
3062 			}
3063 		}
3064 	}
3065 #undef REG
3066 }
3067 
3068 void
3069 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3070 {
3071 	device_t pcib;
3072 
3073 	pcib = device_get_parent(bus);
3074 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3075 	device_set_ivars(dinfo->cfg.dev, dinfo);
3076 	resource_list_init(&dinfo->resources);
3077 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3078 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3079 	pci_print_verbose(dinfo);
3080 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3081 }
3082 
3083 static int
3084 pci_probe(device_t dev)
3085 {
3086 	device_set_desc(dev, "PCI bus");
3087 
3088 	/* Allow other subclasses to override this driver. */
3089 	return (-1000);
3090 }
3091 
3092 static int
3093 pci_attach(device_t dev)
3094 {
3095 	int busno, domain;
3096 
3097 	/*
3098 	 * Since there can be multiple independantly numbered PCI
3099 	 * busses on systems with multiple PCI domains, we can't use
3100 	 * the unit number to decide which bus we are probing. We ask
3101 	 * the parent pcib what our domain and bus numbers are.
3102 	 */
3103 	domain = pcib_get_domain(dev);
3104 	busno = pcib_get_bus(dev);
3105 	if (bootverbose)
3106 		device_printf(dev, "domain=%d, physical bus=%d\n",
3107 		    domain, busno);
3108 
3109 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3110 
3111 	return (bus_generic_attach(dev));
3112 }
3113 
3114 int
3115 pci_suspend(device_t dev)
3116 {
3117 	int dstate, error, i, numdevs;
3118 	device_t acpi_dev, child, *devlist;
3119 	struct pci_devinfo *dinfo;
3120 
3121 	/*
3122 	 * Save the PCI configuration space for each child and set the
3123 	 * device in the appropriate power state for this sleep state.
3124 	 */
3125 	acpi_dev = NULL;
3126 	if (pci_do_power_resume)
3127 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3128 	device_get_children(dev, &devlist, &numdevs);
3129 	for (i = 0; i < numdevs; i++) {
3130 		child = devlist[i];
3131 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3132 		pci_cfg_save(child, dinfo, 0);
3133 	}
3134 
3135 	/* Suspend devices before potentially powering them down. */
3136 	error = bus_generic_suspend(dev);
3137 	if (error) {
3138 		kfree(devlist, M_TEMP);
3139 		return (error);
3140 	}
3141 
3142 	/*
3143 	 * Always set the device to D3.  If ACPI suggests a different
3144 	 * power state, use it instead.  If ACPI is not present, the
3145 	 * firmware is responsible for managing device power.  Skip
3146 	 * children who aren't attached since they are powered down
3147 	 * separately.  Only manage type 0 devices for now.
3148 	 */
3149 	for (i = 0; acpi_dev && i < numdevs; i++) {
3150 		child = devlist[i];
3151 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3152 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3153 			dstate = PCI_POWERSTATE_D3;
3154 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3155 			pci_set_powerstate(child, dstate);
3156 		}
3157 	}
3158 	kfree(devlist, M_TEMP);
3159 	return (0);
3160 }
3161 
3162 int
3163 pci_resume(device_t dev)
3164 {
3165 	int i, numdevs;
3166 	device_t acpi_dev, child, *devlist;
3167 	struct pci_devinfo *dinfo;
3168 
3169 	/*
3170 	 * Set each child to D0 and restore its PCI configuration space.
3171 	 */
3172 	acpi_dev = NULL;
3173 	if (pci_do_power_resume)
3174 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3175 	device_get_children(dev, &devlist, &numdevs);
3176 	for (i = 0; i < numdevs; i++) {
3177 		/*
3178 		 * Notify ACPI we're going to D0 but ignore the result.  If
3179 		 * ACPI is not present, the firmware is responsible for
3180 		 * managing device power.  Only manage type 0 devices for now.
3181 		 */
3182 		child = devlist[i];
3183 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3184 		if (acpi_dev && device_is_attached(child) &&
3185 		    dinfo->cfg.hdrtype == 0) {
3186 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3187 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3188 		}
3189 
3190 		/* Now the device is powered up, restore its config space. */
3191 		pci_cfg_restore(child, dinfo);
3192 	}
3193 	kfree(devlist, M_TEMP);
3194 	return (bus_generic_resume(dev));
3195 }
3196 
3197 static void
3198 pci_load_vendor_data(void)
3199 {
3200 	caddr_t vendordata, info;
3201 
3202 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3203 		info = preload_search_info(vendordata, MODINFO_ADDR);
3204 		pci_vendordata = *(char **)info;
3205 		info = preload_search_info(vendordata, MODINFO_SIZE);
3206 		pci_vendordata_size = *(size_t *)info;
3207 		/* terminate the database */
3208 		pci_vendordata[pci_vendordata_size] = '\n';
3209 	}
3210 }
3211 
3212 void
3213 pci_driver_added(device_t dev, driver_t *driver)
3214 {
3215 	int numdevs;
3216 	device_t *devlist;
3217 	device_t child;
3218 	struct pci_devinfo *dinfo;
3219 	int i;
3220 
3221 	if (bootverbose)
3222 		device_printf(dev, "driver added\n");
3223 	DEVICE_IDENTIFY(driver, dev);
3224 	device_get_children(dev, &devlist, &numdevs);
3225 	for (i = 0; i < numdevs; i++) {
3226 		child = devlist[i];
3227 		if (device_get_state(child) != DS_NOTPRESENT)
3228 			continue;
3229 		dinfo = device_get_ivars(child);
3230 		pci_print_verbose(dinfo);
3231 		if (bootverbose)
3232 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3233 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3234 			    dinfo->cfg.func);
3235 		pci_cfg_restore(child, dinfo);
3236 		if (device_probe_and_attach(child) != 0)
3237 			pci_cfg_save(child, dinfo, 1);
3238 	}
3239 	kfree(devlist, M_TEMP);
3240 }
3241 
3242 static void
3243 pci_child_detached(device_t parent __unused, device_t child)
3244 {
3245 	/* Turn child's power off */
3246 	pci_cfg_save(child, device_get_ivars(child), 1);
3247 }
3248 
3249 int
3250 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3251     driver_intr_t *intr, void *arg, void **cookiep,
3252     lwkt_serialize_t serializer, const char *desc)
3253 {
3254 	int rid, error;
3255 	void *cookie;
3256 
3257 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3258 	    arg, &cookie, serializer, desc);
3259 	if (error)
3260 		return (error);
3261 
3262 	/* If this is not a direct child, just bail out. */
3263 	if (device_get_parent(child) != dev) {
3264 		*cookiep = cookie;
3265 		return(0);
3266 	}
3267 
3268 	rid = rman_get_rid(irq);
3269 	if (rid == 0) {
3270 		/* Make sure that INTx is enabled */
3271 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3272 	} else {
3273 		struct pci_devinfo *dinfo = device_get_ivars(child);
3274 		uint64_t addr;
3275 		uint32_t data;
3276 
3277 		/*
3278 		 * Check to see if the interrupt is MSI or MSI-X.
3279 		 * Ask our parent to map the MSI and give
3280 		 * us the address and data register values.
3281 		 * If we fail for some reason, teardown the
3282 		 * interrupt handler.
3283 		 */
3284 		if (dinfo->cfg.msi.msi_alloc > 0) {
3285 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3286 
3287 			if (msi->msi_addr == 0) {
3288 				KASSERT(msi->msi_handlers == 0,
3289 			    ("MSI has handlers, but vectors not mapped"));
3290 				error = PCIB_MAP_MSI(device_get_parent(dev),
3291 				    child, rman_get_start(irq), &addr, &data,
3292 				    rman_get_cpuid(irq));
3293 				if (error)
3294 					goto bad;
3295 				msi->msi_addr = addr;
3296 				msi->msi_data = data;
3297 				pci_enable_msi(child, addr, data);
3298 			}
3299 			msi->msi_handlers++;
3300 		} else {
3301 			struct msix_vector *mv;
3302 			u_int vector;
3303 
3304 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3305 			    ("No MSI-X or MSI rid %d allocated", rid));
3306 
3307 			mv = pci_find_msix_vector(child, rid);
3308 			KASSERT(mv != NULL,
3309 			    ("MSI-X rid %d is not allocated", rid));
3310 			KASSERT(mv->mv_address == 0,
3311 			    ("MSI-X rid %d has been setup", rid));
3312 
3313 			error = PCIB_MAP_MSI(device_get_parent(dev),
3314 			    child, rman_get_start(irq), &addr, &data,
3315 			    rman_get_cpuid(irq));
3316 			if (error)
3317 				goto bad;
3318 			mv->mv_address = addr;
3319 			mv->mv_data = data;
3320 
3321 			vector = PCI_MSIX_RID2VEC(rid);
3322 			pci_setup_msix_vector(child, vector,
3323 			    mv->mv_address, mv->mv_data);
3324 			pci_unmask_msix_vector(child, vector);
3325 		}
3326 
3327 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3328 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3329 	bad:
3330 		if (error) {
3331 			(void)bus_generic_teardown_intr(dev, child, irq,
3332 			    cookie);
3333 			return (error);
3334 		}
3335 	}
3336 	*cookiep = cookie;
3337 	return (0);
3338 }
3339 
3340 int
3341 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3342     void *cookie)
3343 {
3344 	int rid, error;
3345 
3346 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3347 		return (EINVAL);
3348 
3349 	/* If this isn't a direct child, just bail out */
3350 	if (device_get_parent(child) != dev)
3351 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3352 
3353 	rid = rman_get_rid(irq);
3354 	if (rid == 0) {
3355 		/* Mask INTx */
3356 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3357 	} else {
3358 		struct pci_devinfo *dinfo = device_get_ivars(child);
3359 
3360 		/*
3361 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3362 		 * decrement the appropriate handlers count and mask the
3363 		 * MSI-X message, or disable MSI messages if the count
3364 		 * drops to 0.
3365 		 */
3366 		if (dinfo->cfg.msi.msi_alloc > 0) {
3367 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3368 
3369 			KASSERT(rid <= msi->msi_alloc,
3370 			    ("MSI-X index too high"));
3371 			KASSERT(msi->msi_handlers > 0,
3372 			    ("MSI rid %d is not setup", rid));
3373 
3374 			msi->msi_handlers--;
3375 			if (msi->msi_handlers == 0)
3376 				pci_disable_msi(child);
3377 		} else {
3378 			struct msix_vector *mv;
3379 
3380 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3381 			    ("No MSI or MSI-X rid %d allocated", rid));
3382 
3383 			mv = pci_find_msix_vector(child, rid);
3384 			KASSERT(mv != NULL,
3385 			    ("MSI-X rid %d is not allocated", rid));
3386 			KASSERT(mv->mv_address != 0,
3387 			    ("MSI-X rid %d has not been setup", rid));
3388 
3389 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3390 			mv->mv_address = 0;
3391 			mv->mv_data = 0;
3392 		}
3393 	}
3394 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3395 	if (rid > 0)
3396 		KASSERT(error == 0,
3397 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3398 	return (error);
3399 }
3400 
3401 int
3402 pci_print_child(device_t dev, device_t child)
3403 {
3404 	struct pci_devinfo *dinfo;
3405 	struct resource_list *rl;
3406 	int retval = 0;
3407 
3408 	dinfo = device_get_ivars(child);
3409 	rl = &dinfo->resources;
3410 
3411 	retval += bus_print_child_header(dev, child);
3412 
3413 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3414 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3415 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3416 	if (device_get_flags(dev))
3417 		retval += kprintf(" flags %#x", device_get_flags(dev));
3418 
3419 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3420 	    pci_get_function(child));
3421 
3422 	retval += bus_print_child_footer(dev, child);
3423 
3424 	return (retval);
3425 }
3426 
3427 static struct
3428 {
3429 	int	class;
3430 	int	subclass;
3431 	char	*desc;
3432 } pci_nomatch_tab[] = {
3433 	{PCIC_OLD,		-1,			"old"},
3434 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3435 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3436 	{PCIC_STORAGE,		-1,			"mass storage"},
3437 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3438 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3439 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3440 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3441 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3442 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3443 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3444 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3445 	{PCIC_NETWORK,		-1,			"network"},
3446 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3447 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3448 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3449 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3450 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3451 	{PCIC_DISPLAY,		-1,			"display"},
3452 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3453 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3454 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3455 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3456 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3457 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3458 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3459 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3460 	{PCIC_MEMORY,		-1,			"memory"},
3461 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3462 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3463 	{PCIC_BRIDGE,		-1,			"bridge"},
3464 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3465 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3466 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3467 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3468 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3469 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3470 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3471 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3472 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3473 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3474 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3475 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3476 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3477 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3478 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3479 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3480 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3481 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3482 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3483 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3484 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3485 	{PCIC_INPUTDEV,		-1,			"input device"},
3486 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3487 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3488 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3489 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3490 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3491 	{PCIC_DOCKING,		-1,			"docking station"},
3492 	{PCIC_PROCESSOR,	-1,			"processor"},
3493 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3494 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3495 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3496 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3497 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3498 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3499 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3500 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3501 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3502 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3503 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3504 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3505 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3506 	{PCIC_SATCOM,		-1,			"satellite communication"},
3507 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3508 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3509 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3510 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3511 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3512 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3513 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3514 	{PCIC_DASP,		-1,			"dasp"},
3515 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3516 	{0, 0,		NULL}
3517 };
3518 
3519 void
3520 pci_probe_nomatch(device_t dev, device_t child)
3521 {
3522 	int	i;
3523 	char	*cp, *scp, *device;
3524 
3525 	/*
3526 	 * Look for a listing for this device in a loaded device database.
3527 	 */
3528 	if ((device = pci_describe_device(child)) != NULL) {
3529 		device_printf(dev, "<%s>", device);
3530 		kfree(device, M_DEVBUF);
3531 	} else {
3532 		/*
3533 		 * Scan the class/subclass descriptions for a general
3534 		 * description.
3535 		 */
3536 		cp = "unknown";
3537 		scp = NULL;
3538 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3539 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3540 				if (pci_nomatch_tab[i].subclass == -1) {
3541 					cp = pci_nomatch_tab[i].desc;
3542 				} else if (pci_nomatch_tab[i].subclass ==
3543 				    pci_get_subclass(child)) {
3544 					scp = pci_nomatch_tab[i].desc;
3545 				}
3546 			}
3547 		}
3548 		device_printf(dev, "<%s%s%s>",
3549 		    cp ? cp : "",
3550 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3551 		    scp ? scp : "");
3552 	}
3553 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3554 		pci_get_vendor(child), pci_get_device(child),
3555 		pci_get_slot(child), pci_get_function(child));
3556 	if (pci_get_intpin(child) > 0) {
3557 		int irq;
3558 
3559 		irq = pci_get_irq(child);
3560 		if (PCI_INTERRUPT_VALID(irq))
3561 			kprintf(" irq %d", irq);
3562 	}
3563 	kprintf("\n");
3564 
3565 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3566 }
3567 
3568 /*
3569  * Parse the PCI device database, if loaded, and return a pointer to a
3570  * description of the device.
3571  *
3572  * The database is flat text formatted as follows:
3573  *
3574  * Any line not in a valid format is ignored.
3575  * Lines are terminated with newline '\n' characters.
3576  *
3577  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3578  * the vendor name.
3579  *
3580  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3581  * - devices cannot be listed without a corresponding VENDOR line.
3582  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3583  * another TAB, then the device name.
3584  */
3585 
3586 /*
3587  * Assuming (ptr) points to the beginning of a line in the database,
3588  * return the vendor or device and description of the next entry.
3589  * The value of (vendor) or (device) inappropriate for the entry type
3590  * is set to -1.  Returns nonzero at the end of the database.
3591  *
3592  * Note that this is slightly unrobust in the face of corrupt data;
3593  * we attempt to safeguard against this by spamming the end of the
3594  * database with a newline when we initialise.
3595  */
3596 static int
3597 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3598 {
3599 	char	*cp = *ptr;
3600 	int	left;
3601 
3602 	*device = -1;
3603 	*vendor = -1;
3604 	**desc = '\0';
3605 	for (;;) {
3606 		left = pci_vendordata_size - (cp - pci_vendordata);
3607 		if (left <= 0) {
3608 			*ptr = cp;
3609 			return(1);
3610 		}
3611 
3612 		/* vendor entry? */
3613 		if (*cp != '\t' &&
3614 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3615 			break;
3616 		/* device entry? */
3617 		if (*cp == '\t' &&
3618 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3619 			break;
3620 
3621 		/* skip to next line */
3622 		while (*cp != '\n' && left > 0) {
3623 			cp++;
3624 			left--;
3625 		}
3626 		if (*cp == '\n') {
3627 			cp++;
3628 			left--;
3629 		}
3630 	}
3631 	/* skip to next line */
3632 	while (*cp != '\n' && left > 0) {
3633 		cp++;
3634 		left--;
3635 	}
3636 	if (*cp == '\n' && left > 0)
3637 		cp++;
3638 	*ptr = cp;
3639 	return(0);
3640 }
3641 
3642 static char *
3643 pci_describe_device(device_t dev)
3644 {
3645 	int	vendor, device;
3646 	char	*desc, *vp, *dp, *line;
3647 
3648 	desc = vp = dp = NULL;
3649 
3650 	/*
3651 	 * If we have no vendor data, we can't do anything.
3652 	 */
3653 	if (pci_vendordata == NULL)
3654 		goto out;
3655 
3656 	/*
3657 	 * Scan the vendor data looking for this device
3658 	 */
3659 	line = pci_vendordata;
3660 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3661 		goto out;
3662 	for (;;) {
3663 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3664 			goto out;
3665 		if (vendor == pci_get_vendor(dev))
3666 			break;
3667 	}
3668 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3669 		goto out;
3670 	for (;;) {
3671 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3672 			*dp = 0;
3673 			break;
3674 		}
3675 		if (vendor != -1) {
3676 			*dp = 0;
3677 			break;
3678 		}
3679 		if (device == pci_get_device(dev))
3680 			break;
3681 	}
3682 	if (dp[0] == '\0')
3683 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3684 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3685 	    NULL)
3686 		ksprintf(desc, "%s, %s", vp, dp);
3687  out:
3688 	if (vp != NULL)
3689 		kfree(vp, M_DEVBUF);
3690 	if (dp != NULL)
3691 		kfree(dp, M_DEVBUF);
3692 	return(desc);
3693 }
3694 
3695 int
3696 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3697 {
3698 	struct pci_devinfo *dinfo;
3699 	pcicfgregs *cfg;
3700 
3701 	dinfo = device_get_ivars(child);
3702 	cfg = &dinfo->cfg;
3703 
3704 	switch (which) {
3705 	case PCI_IVAR_ETHADDR:
3706 		/*
3707 		 * The generic accessor doesn't deal with failure, so
3708 		 * we set the return value, then return an error.
3709 		 */
3710 		*((uint8_t **) result) = NULL;
3711 		return (EINVAL);
3712 	case PCI_IVAR_SUBVENDOR:
3713 		*result = cfg->subvendor;
3714 		break;
3715 	case PCI_IVAR_SUBDEVICE:
3716 		*result = cfg->subdevice;
3717 		break;
3718 	case PCI_IVAR_VENDOR:
3719 		*result = cfg->vendor;
3720 		break;
3721 	case PCI_IVAR_DEVICE:
3722 		*result = cfg->device;
3723 		break;
3724 	case PCI_IVAR_DEVID:
3725 		*result = (cfg->device << 16) | cfg->vendor;
3726 		break;
3727 	case PCI_IVAR_CLASS:
3728 		*result = cfg->baseclass;
3729 		break;
3730 	case PCI_IVAR_SUBCLASS:
3731 		*result = cfg->subclass;
3732 		break;
3733 	case PCI_IVAR_PROGIF:
3734 		*result = cfg->progif;
3735 		break;
3736 	case PCI_IVAR_REVID:
3737 		*result = cfg->revid;
3738 		break;
3739 	case PCI_IVAR_INTPIN:
3740 		*result = cfg->intpin;
3741 		break;
3742 	case PCI_IVAR_IRQ:
3743 		*result = cfg->intline;
3744 		break;
3745 	case PCI_IVAR_DOMAIN:
3746 		*result = cfg->domain;
3747 		break;
3748 	case PCI_IVAR_BUS:
3749 		*result = cfg->bus;
3750 		break;
3751 	case PCI_IVAR_SLOT:
3752 		*result = cfg->slot;
3753 		break;
3754 	case PCI_IVAR_FUNCTION:
3755 		*result = cfg->func;
3756 		break;
3757 	case PCI_IVAR_CMDREG:
3758 		*result = cfg->cmdreg;
3759 		break;
3760 	case PCI_IVAR_CACHELNSZ:
3761 		*result = cfg->cachelnsz;
3762 		break;
3763 	case PCI_IVAR_MINGNT:
3764 		*result = cfg->mingnt;
3765 		break;
3766 	case PCI_IVAR_MAXLAT:
3767 		*result = cfg->maxlat;
3768 		break;
3769 	case PCI_IVAR_LATTIMER:
3770 		*result = cfg->lattimer;
3771 		break;
3772 	case PCI_IVAR_PCIXCAP_PTR:
3773 		*result = cfg->pcix.pcix_ptr;
3774 		break;
3775 	case PCI_IVAR_PCIECAP_PTR:
3776 		*result = cfg->expr.expr_ptr;
3777 		break;
3778 	case PCI_IVAR_VPDCAP_PTR:
3779 		*result = cfg->vpd.vpd_reg;
3780 		break;
3781 	default:
3782 		return (ENOENT);
3783 	}
3784 	return (0);
3785 }
3786 
3787 int
3788 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3789 {
3790 	struct pci_devinfo *dinfo;
3791 
3792 	dinfo = device_get_ivars(child);
3793 
3794 	switch (which) {
3795 	case PCI_IVAR_INTPIN:
3796 		dinfo->cfg.intpin = value;
3797 		return (0);
3798 	case PCI_IVAR_ETHADDR:
3799 	case PCI_IVAR_SUBVENDOR:
3800 	case PCI_IVAR_SUBDEVICE:
3801 	case PCI_IVAR_VENDOR:
3802 	case PCI_IVAR_DEVICE:
3803 	case PCI_IVAR_DEVID:
3804 	case PCI_IVAR_CLASS:
3805 	case PCI_IVAR_SUBCLASS:
3806 	case PCI_IVAR_PROGIF:
3807 	case PCI_IVAR_REVID:
3808 	case PCI_IVAR_IRQ:
3809 	case PCI_IVAR_DOMAIN:
3810 	case PCI_IVAR_BUS:
3811 	case PCI_IVAR_SLOT:
3812 	case PCI_IVAR_FUNCTION:
3813 		return (EINVAL);	/* disallow for now */
3814 
3815 	default:
3816 		return (ENOENT);
3817 	}
3818 }
3819 #ifdef notyet
3820 #include "opt_ddb.h"
3821 #ifdef DDB
3822 #include <ddb/ddb.h>
3823 #include <sys/cons.h>
3824 
3825 /*
3826  * List resources based on pci map registers, used for within ddb
3827  */
3828 
3829 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3830 {
3831 	struct pci_devinfo *dinfo;
3832 	struct devlist *devlist_head;
3833 	struct pci_conf *p;
3834 	const char *name;
3835 	int i, error, none_count;
3836 
3837 	none_count = 0;
3838 	/* get the head of the device queue */
3839 	devlist_head = &pci_devq;
3840 
3841 	/*
3842 	 * Go through the list of devices and print out devices
3843 	 */
3844 	for (error = 0, i = 0,
3845 	     dinfo = STAILQ_FIRST(devlist_head);
3846 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3847 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3848 
3849 		/* Populate pd_name and pd_unit */
3850 		name = NULL;
3851 		if (dinfo->cfg.dev)
3852 			name = device_get_name(dinfo->cfg.dev);
3853 
3854 		p = &dinfo->conf;
3855 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3856 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3857 			(name && *name) ? name : "none",
3858 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3859 			none_count++,
3860 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3861 			p->pc_sel.pc_func, (p->pc_class << 16) |
3862 			(p->pc_subclass << 8) | p->pc_progif,
3863 			(p->pc_subdevice << 16) | p->pc_subvendor,
3864 			(p->pc_device << 16) | p->pc_vendor,
3865 			p->pc_revid, p->pc_hdr);
3866 	}
3867 }
3868 #endif /* DDB */
3869 #endif
3870 
3871 static struct resource *
3872 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3873     u_long start, u_long end, u_long count, u_int flags)
3874 {
3875 	struct pci_devinfo *dinfo = device_get_ivars(child);
3876 	struct resource_list *rl = &dinfo->resources;
3877 	struct resource_list_entry *rle;
3878 	struct resource *res;
3879 	pci_addr_t map, testval;
3880 	int mapsize;
3881 
3882 	/*
3883 	 * Weed out the bogons, and figure out how large the BAR/map
3884 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3885 	 * Note: atapci in legacy mode are special and handled elsewhere
3886 	 * in the code.  If you have a atapci device in legacy mode and
3887 	 * it fails here, that other code is broken.
3888 	 */
3889 	res = NULL;
3890 	map = pci_read_config(child, *rid, 4);
3891 	pci_write_config(child, *rid, 0xffffffff, 4);
3892 	testval = pci_read_config(child, *rid, 4);
3893 	if (pci_maprange(testval) == 64)
3894 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3895 	if (pci_mapbase(testval) == 0)
3896 		goto out;
3897 
3898 	/*
3899 	 * Restore the original value of the BAR.  We may have reprogrammed
3900 	 * the BAR of the low-level console device and when booting verbose,
3901 	 * we need the console device addressable.
3902 	 */
3903 	pci_write_config(child, *rid, map, 4);
3904 
3905 	if (PCI_BAR_MEM(testval)) {
3906 		if (type != SYS_RES_MEMORY) {
3907 			if (bootverbose)
3908 				device_printf(dev,
3909 				    "child %s requested type %d for rid %#x,"
3910 				    " but the BAR says it is an memio\n",
3911 				    device_get_nameunit(child), type, *rid);
3912 			goto out;
3913 		}
3914 	} else {
3915 		if (type != SYS_RES_IOPORT) {
3916 			if (bootverbose)
3917 				device_printf(dev,
3918 				    "child %s requested type %d for rid %#x,"
3919 				    " but the BAR says it is an ioport\n",
3920 				    device_get_nameunit(child), type, *rid);
3921 			goto out;
3922 		}
3923 	}
3924 	/*
3925 	 * For real BARs, we need to override the size that
3926 	 * the driver requests, because that's what the BAR
3927 	 * actually uses and we would otherwise have a
3928 	 * situation where we might allocate the excess to
3929 	 * another driver, which won't work.
3930 	 */
3931 	mapsize = pci_mapsize(testval);
3932 	count = 1UL << mapsize;
3933 	if (RF_ALIGNMENT(flags) < mapsize)
3934 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3935 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3936 		flags |= RF_PREFETCHABLE;
3937 
3938 	/*
3939 	 * Allocate enough resource, and then write back the
3940 	 * appropriate bar for that resource.
3941 	 */
3942 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3943 	    start, end, count, flags, -1);
3944 	if (res == NULL) {
3945 		device_printf(child,
3946 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3947 		    count, *rid, type, start, end);
3948 		goto out;
3949 	}
3950 	resource_list_add(rl, type, *rid, start, end, count, -1);
3951 	rle = resource_list_find(rl, type, *rid);
3952 	if (rle == NULL)
3953 		panic("pci_alloc_map: unexpectedly can't find resource.");
3954 	rle->res = res;
3955 	rle->start = rman_get_start(res);
3956 	rle->end = rman_get_end(res);
3957 	rle->count = count;
3958 	if (bootverbose)
3959 		device_printf(child,
3960 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3961 		    count, *rid, type, rman_get_start(res));
3962 	map = rman_get_start(res);
3963 out:;
3964 	pci_write_config(child, *rid, map, 4);
3965 	if (pci_maprange(testval) == 64)
3966 		pci_write_config(child, *rid + 4, map >> 32, 4);
3967 	return (res);
3968 }
3969 
3970 
3971 struct resource *
3972 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3973     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3974 {
3975 	struct pci_devinfo *dinfo = device_get_ivars(child);
3976 	struct resource_list *rl = &dinfo->resources;
3977 	struct resource_list_entry *rle;
3978 	pcicfgregs *cfg = &dinfo->cfg;
3979 
3980 	/*
3981 	 * Perform lazy resource allocation
3982 	 */
3983 	if (device_get_parent(child) == dev) {
3984 		switch (type) {
3985 		case SYS_RES_IRQ:
3986 			/*
3987 			 * Can't alloc legacy interrupt once MSI messages
3988 			 * have been allocated.
3989 			 */
3990 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3991 			    cfg->msix.msix_alloc > 0))
3992 				return (NULL);
3993 			/*
3994 			 * If the child device doesn't have an
3995 			 * interrupt routed and is deserving of an
3996 			 * interrupt, try to assign it one.
3997 			 */
3998 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3999 			    (cfg->intpin != 0))
4000 				pci_assign_interrupt(dev, child, 0);
4001 			break;
4002 		case SYS_RES_IOPORT:
4003 		case SYS_RES_MEMORY:
4004 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4005 				/*
4006 				 * Enable the I/O mode.  We should
4007 				 * also be assigning resources too
4008 				 * when none are present.  The
4009 				 * resource_list_alloc kind of sorta does
4010 				 * this...
4011 				 */
4012 				if (PCI_ENABLE_IO(dev, child, type))
4013 					return (NULL);
4014 			}
4015 			rle = resource_list_find(rl, type, *rid);
4016 			if (rle == NULL)
4017 				return (pci_alloc_map(dev, child, type, rid,
4018 				    start, end, count, flags));
4019 			break;
4020 		}
4021 		/*
4022 		 * If we've already allocated the resource, then
4023 		 * return it now.  But first we may need to activate
4024 		 * it, since we don't allocate the resource as active
4025 		 * above.  Normally this would be done down in the
4026 		 * nexus, but since we short-circuit that path we have
4027 		 * to do its job here.  Not sure if we should kfree the
4028 		 * resource if it fails to activate.
4029 		 */
4030 		rle = resource_list_find(rl, type, *rid);
4031 		if (rle != NULL && rle->res != NULL) {
4032 			if (bootverbose)
4033 				device_printf(child,
4034 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4035 				    rman_get_size(rle->res), *rid, type,
4036 				    rman_get_start(rle->res));
4037 			if ((flags & RF_ACTIVE) &&
4038 			    bus_generic_activate_resource(dev, child, type,
4039 			    *rid, rle->res) != 0)
4040 				return (NULL);
4041 			return (rle->res);
4042 		}
4043 	}
4044 	return (resource_list_alloc(rl, dev, child, type, rid,
4045 	    start, end, count, flags, cpuid));
4046 }
4047 
4048 void
4049 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4050 {
4051 	struct pci_devinfo *dinfo;
4052 	struct resource_list *rl;
4053 	struct resource_list_entry *rle;
4054 
4055 	if (device_get_parent(child) != dev)
4056 		return;
4057 
4058 	dinfo = device_get_ivars(child);
4059 	rl = &dinfo->resources;
4060 	rle = resource_list_find(rl, type, rid);
4061 	if (rle) {
4062 		if (rle->res) {
4063 			if (rman_get_device(rle->res) != dev ||
4064 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4065 				device_printf(dev, "delete_resource: "
4066 				    "Resource still owned by child, oops. "
4067 				    "(type=%d, rid=%d, addr=%lx)\n",
4068 				    rle->type, rle->rid,
4069 				    rman_get_start(rle->res));
4070 				return;
4071 			}
4072 			bus_release_resource(dev, type, rid, rle->res);
4073 		}
4074 		resource_list_delete(rl, type, rid);
4075 	}
4076 	/*
4077 	 * Why do we turn off the PCI configuration BAR when we delete a
4078 	 * resource? -- imp
4079 	 */
4080 	pci_write_config(child, rid, 0, 4);
4081 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4082 }
4083 
4084 struct resource_list *
4085 pci_get_resource_list (device_t dev, device_t child)
4086 {
4087 	struct pci_devinfo *dinfo = device_get_ivars(child);
4088 
4089 	if (dinfo == NULL)
4090 		return (NULL);
4091 
4092 	return (&dinfo->resources);
4093 }
4094 
4095 uint32_t
4096 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4097 {
4098 	struct pci_devinfo *dinfo = device_get_ivars(child);
4099 	pcicfgregs *cfg = &dinfo->cfg;
4100 
4101 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4102 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4103 }
4104 
4105 void
4106 pci_write_config_method(device_t dev, device_t child, int reg,
4107     uint32_t val, int width)
4108 {
4109 	struct pci_devinfo *dinfo = device_get_ivars(child);
4110 	pcicfgregs *cfg = &dinfo->cfg;
4111 
4112 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4113 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4114 }
4115 
4116 int
4117 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4118     size_t buflen)
4119 {
4120 
4121 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4122 	    pci_get_function(child));
4123 	return (0);
4124 }
4125 
4126 int
4127 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4128     size_t buflen)
4129 {
4130 	struct pci_devinfo *dinfo;
4131 	pcicfgregs *cfg;
4132 
4133 	dinfo = device_get_ivars(child);
4134 	cfg = &dinfo->cfg;
4135 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4136 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4137 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4138 	    cfg->progif);
4139 	return (0);
4140 }
4141 
4142 int
4143 pci_assign_interrupt_method(device_t dev, device_t child)
4144 {
4145 	struct pci_devinfo *dinfo = device_get_ivars(child);
4146 	pcicfgregs *cfg = &dinfo->cfg;
4147 
4148 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4149 	    cfg->intpin));
4150 }
4151 
4152 static int
4153 pci_modevent(module_t mod, int what, void *arg)
4154 {
4155 	static struct cdev *pci_cdev;
4156 
4157 	switch (what) {
4158 	case MOD_LOAD:
4159 		STAILQ_INIT(&pci_devq);
4160 		pci_generation = 0;
4161 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4162 				    "pci");
4163 		pci_load_vendor_data();
4164 		break;
4165 
4166 	case MOD_UNLOAD:
4167 		destroy_dev(pci_cdev);
4168 		break;
4169 	}
4170 
4171 	return (0);
4172 }
4173 
4174 void
4175 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4176 {
4177 	int i;
4178 
4179 	/*
4180 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4181 	 * which we know need special treatment.  Type 2 devices are
4182 	 * cardbus bridges which also require special treatment.
4183 	 * Other types are unknown, and we err on the side of safety
4184 	 * by ignoring them.
4185 	 */
4186 	if (dinfo->cfg.hdrtype != 0)
4187 		return;
4188 
4189 	/*
4190 	 * Restore the device to full power mode.  We must do this
4191 	 * before we restore the registers because moving from D3 to
4192 	 * D0 will cause the chip's BARs and some other registers to
4193 	 * be reset to some unknown power on reset values.  Cut down
4194 	 * the noise on boot by doing nothing if we are already in
4195 	 * state D0.
4196 	 */
4197 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4198 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4199 	}
4200 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4201 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4202 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4203 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4204 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4205 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4206 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4207 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4208 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4209 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4210 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4211 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4212 
4213 	/* Restore MSI and MSI-X configurations if they are present. */
4214 	if (dinfo->cfg.msi.msi_location != 0)
4215 		pci_resume_msi(dev);
4216 	if (dinfo->cfg.msix.msix_location != 0)
4217 		pci_resume_msix(dev);
4218 }
4219 
4220 void
4221 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4222 {
4223 	int i;
4224 	uint32_t cls;
4225 	int ps;
4226 
4227 	/*
4228 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4229 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4230 	 * which also require special treatment.  Other types are unknown, and
4231 	 * we err on the side of safety by ignoring them.  Powering down
4232 	 * bridges should not be undertaken lightly.
4233 	 */
4234 	if (dinfo->cfg.hdrtype != 0)
4235 		return;
4236 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4237 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4238 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4239 
4240 	/*
4241 	 * Some drivers apparently write to these registers w/o updating our
4242 	 * cached copy.  No harm happens if we update the copy, so do so here
4243 	 * so we can restore them.  The COMMAND register is modified by the
4244 	 * bus w/o updating the cache.  This should represent the normally
4245 	 * writable portion of the 'defined' part of type 0 headers.  In
4246 	 * theory we also need to save/restore the PCI capability structures
4247 	 * we know about, but apart from power we don't know any that are
4248 	 * writable.
4249 	 */
4250 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4251 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4252 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4253 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4254 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4255 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4256 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4257 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4258 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4259 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4260 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4261 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4262 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4263 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4264 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4265 
4266 	/*
4267 	 * don't set the state for display devices, base peripherals and
4268 	 * memory devices since bad things happen when they are powered down.
4269 	 * We should (a) have drivers that can easily detach and (b) use
4270 	 * generic drivers for these devices so that some device actually
4271 	 * attaches.  We need to make sure that when we implement (a) we don't
4272 	 * power the device down on a reattach.
4273 	 */
4274 	cls = pci_get_class(dev);
4275 	if (!setstate)
4276 		return;
4277 	switch (pci_do_power_nodriver)
4278 	{
4279 		case 0:		/* NO powerdown at all */
4280 			return;
4281 		case 1:		/* Conservative about what to power down */
4282 			if (cls == PCIC_STORAGE)
4283 				return;
4284 			/*FALLTHROUGH*/
4285 		case 2:		/* Agressive about what to power down */
4286 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4287 			    cls == PCIC_BASEPERIPH)
4288 				return;
4289 			/*FALLTHROUGH*/
4290 		case 3:		/* Power down everything */
4291 			break;
4292 	}
4293 	/*
4294 	 * PCI spec says we can only go into D3 state from D0 state.
4295 	 * Transition from D[12] into D0 before going to D3 state.
4296 	 */
4297 	ps = pci_get_powerstate(dev);
4298 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4299 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4300 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4301 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4302 }
4303 
4304 #ifdef COMPAT_OLDPCI
4305 
4306 /*
4307  * Locate the parent of a PCI device by scanning the PCI devlist
4308  * and return the entry for the parent.
4309  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4310  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4311  */
4312 pcicfgregs *
4313 pci_devlist_get_parent(pcicfgregs *cfg)
4314 {
4315 	struct devlist *devlist_head;
4316 	struct pci_devinfo *dinfo;
4317 	pcicfgregs *bridge_cfg;
4318 	int i;
4319 
4320 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4321 
4322 	/* If the device is on PCI bus 0, look for the host */
4323 	if (cfg->bus == 0) {
4324 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4325 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4326 			bridge_cfg = &dinfo->cfg;
4327 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4328 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4329 		    		&& bridge_cfg->bus == cfg->bus) {
4330 				return bridge_cfg;
4331 			}
4332 		}
4333 	}
4334 
4335 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4336 	if (cfg->bus > 0) {
4337 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4338 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4339 			bridge_cfg = &dinfo->cfg;
4340 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4341 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4342 				&& bridge_cfg->secondarybus == cfg->bus) {
4343 				return bridge_cfg;
4344 			}
4345 		}
4346 	}
4347 
4348 	return NULL;
4349 }
4350 
4351 #endif	/* COMPAT_OLDPCI */
4352 
4353 int
4354 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4355 {
4356 	int rid, type;
4357 	u_int flags;
4358 
4359 	rid = 0;
4360 	type = PCI_INTR_TYPE_LEGACY;
4361 	flags = RF_SHAREABLE | RF_ACTIVE;
4362 
4363 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4364 	if (msi_enable) {
4365 		int cpu;
4366 
4367 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4368 		if (cpu >= ncpus)
4369 			cpu = ncpus - 1;
4370 
4371 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4372 			flags &= ~RF_SHAREABLE;
4373 			type = PCI_INTR_TYPE_MSI;
4374 		}
4375 	}
4376 
4377 	*rid0 = rid;
4378 	*flags0 = flags;
4379 
4380 	return type;
4381 }
4382 
4383 /* Wrapper APIs suitable for device driver use. */
4384 void
4385 pci_save_state(device_t dev)
4386 {
4387 	struct pci_devinfo *dinfo;
4388 
4389 	dinfo = device_get_ivars(dev);
4390 	pci_cfg_save(dev, dinfo, 0);
4391 }
4392 
4393 void
4394 pci_restore_state(device_t dev)
4395 {
4396 	struct pci_devinfo *dinfo;
4397 
4398 	dinfo = device_get_ivars(dev);
4399 	pci_cfg_restore(dev, dinfo);
4400 }
4401