xref: /dragonfly/sys/bus/pci/pci.c (revision caa7a3ee)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/linker.h>
38 #include <sys/fcntl.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/endian.h>
44 #include <sys/machintr.h>
45 
46 #include <machine/msi_machdep.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 #include <sys/device.h>
55 
56 #include <sys/pciio.h>
57 #include <bus/pci/pcireg.h>
58 #include <bus/pci/pcivar.h>
59 #include <bus/pci/pci_private.h>
60 
61 #include <bus/u4b/controller/xhcireg.h>
62 #include <bus/u4b/controller/ehcireg.h>
63 #include <bus/u4b/controller/ohcireg.h>
64 #include <bus/u4b/controller/uhcireg.h>
65 
66 #include "pcib_if.h"
67 #include "pci_if.h"
68 
69 #ifdef __HAVE_ACPI
70 #include <contrib/dev/acpica/acpi.h>
71 #include "acpi_if.h"
72 #else
73 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
74 #endif
75 
76 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
77 
78 static uint32_t		pci_mapbase(unsigned mapreg);
79 static const char	*pci_maptype(unsigned mapreg);
80 static int		pci_mapsize(unsigned testval);
81 static int		pci_maprange(unsigned mapreg);
82 static void		pci_fixancient(pcicfgregs *cfg);
83 
84 static int		pci_porten(device_t pcib, int b, int s, int f);
85 static int		pci_memen(device_t pcib, int b, int s, int f);
86 static void		pci_assign_interrupt(device_t bus, device_t dev,
87 			    int force_route);
88 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
89 			    int b, int s, int f, int reg,
90 			    struct resource_list *rl, int force, int prefetch);
91 static int		pci_probe(device_t dev);
92 static int		pci_attach(device_t dev);
93 static void		pci_child_detached(device_t, device_t);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_setup_msix_vector(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix_vector(device_t dev, u_int index);
115 static void		pci_unmask_msix_vector(device_t dev, u_int index);
116 static void		pci_mask_msix_allvectors(device_t dev);
117 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pcie_slotimpl(const pcicfgregs *);
122 static void		pci_print_verbose_expr(const pcicfgregs *);
123 
124 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_subvendor(device_t, int, int,
130 			    pcicfgregs *);
131 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
132 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
133 
134 static device_method_t pci_methods[] = {
135 	/* Device interface */
136 	DEVMETHOD(device_probe,		pci_probe),
137 	DEVMETHOD(device_attach,	pci_attach),
138 	DEVMETHOD(device_detach,	bus_generic_detach),
139 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
140 	DEVMETHOD(device_suspend,	pci_suspend),
141 	DEVMETHOD(device_resume,	pci_resume),
142 
143 	/* Bus interface */
144 	DEVMETHOD(bus_print_child,	pci_print_child),
145 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
146 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
147 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
148 	DEVMETHOD(bus_driver_added,	pci_driver_added),
149 	DEVMETHOD(bus_child_detached,	pci_child_detached),
150 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
151 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
152 
153 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
154 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
155 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
156 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
157 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
158 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
159 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
160 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 
164 	/* PCI interface */
165 	DEVMETHOD(pci_read_config,	pci_read_config_method),
166 	DEVMETHOD(pci_write_config,	pci_write_config_method),
167 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
168 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
169 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
170 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
171 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
172 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
173 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
174 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
175 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
179 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
180 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
181 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
182 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
183 
184 	DEVMETHOD_END
185 };
186 
187 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
188 
189 static devclass_t pci_devclass;
190 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
191 MODULE_VERSION(pci, 1);
192 
193 static char	*pci_vendordata;
194 static size_t	pci_vendordata_size;
195 
196 
197 static const struct pci_read_cap {
198 	int		cap;
199 	pci_read_cap_t	read_cap;
200 } pci_read_caps[] = {
201 	{ PCIY_PMG,		pci_read_cap_pmgt },
202 	{ PCIY_HT,		pci_read_cap_ht },
203 	{ PCIY_MSI,		pci_read_cap_msi },
204 	{ PCIY_MSIX,		pci_read_cap_msix },
205 	{ PCIY_VPD,		pci_read_cap_vpd },
206 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
207 	{ PCIY_PCIX,		pci_read_cap_pcix },
208 	{ PCIY_EXPRESS,		pci_read_cap_express },
209 	{ 0, NULL } /* required last entry */
210 };
211 
212 struct pci_quirk {
213 	uint32_t devid;	/* Vendor/device of the card */
214 	int	type;
215 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
216 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
217 	int	arg1;
218 	int	arg2;
219 };
220 
221 struct pci_quirk pci_quirks[] = {
222 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
223 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
224 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
226 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
227 
228 	/*
229 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
230 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
231 	 */
232 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
233 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 
235 	/*
236 	 * MSI doesn't work on earlier Intel chipsets including
237 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
238 	 */
239 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 
247 	/*
248 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
249 	 * bridge.
250 	 */
251 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
252 
253 	{ 0 }
254 };
255 
256 /* map register information */
257 #define	PCI_MAPMEM	0x01	/* memory map */
258 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
259 #define	PCI_MAPPORT	0x04	/* port map */
260 
261 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
262 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
263 
264 struct devlist pci_devq;
265 uint32_t pci_generation;
266 uint32_t pci_numdevs = 0;
267 static int pcie_chipset, pcix_chipset;
268 
269 /* sysctl vars */
270 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
271 
272 static int pci_enable_io_modes = 1;
273 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
274 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
275     &pci_enable_io_modes, 1,
276     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
277 enable these bits correctly.  We'd like to do this all the time, but there\n\
278 are some peripherals that this causes problems with.");
279 
280 static int pci_do_power_nodriver = 0;
281 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
282 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
283     &pci_do_power_nodriver, 0,
284   "Place a function into D3 state when no driver attaches to it.  0 means\n\
285 disable.  1 means conservatively place devices into D3 state.  2 means\n\
286 aggressively place devices into D3 state.  3 means put absolutely everything\n\
287 in D3 state.");
288 
289 static int pci_do_power_resume = 1;
290 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
291 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
292     &pci_do_power_resume, 1,
293   "Transition from D3 -> D0 on resume.");
294 
295 static int pci_do_msi = 1;
296 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
297 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
298     "Enable support for MSI interrupts");
299 
300 static int pci_do_msix = 1;
301 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
302 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
303     "Enable support for MSI-X interrupts");
304 
305 static int pci_honor_msi_blacklist = 1;
306 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
307 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
308     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
309 
310 #if defined(__x86_64__)
311 static int pci_usb_takeover = 1;
312 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
313 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
314     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
315 Disable this if you depend on BIOS emulation of USB devices, that is\n\
316 you use USB devices (like keyboard or mouse) but do not load USB drivers");
317 #endif
318 
319 static int pci_msi_cpuid;
320 
321 /* Find a device_t by bus/slot/function in domain 0 */
322 
323 device_t
324 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
325 {
326 
327 	return (pci_find_dbsf(0, bus, slot, func));
328 }
329 
330 /* Find a device_t by domain/bus/slot/function */
331 
332 device_t
333 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
334 {
335 	struct pci_devinfo *dinfo;
336 
337 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
338 		if ((dinfo->cfg.domain == domain) &&
339 		    (dinfo->cfg.bus == bus) &&
340 		    (dinfo->cfg.slot == slot) &&
341 		    (dinfo->cfg.func == func)) {
342 			return (dinfo->cfg.dev);
343 		}
344 	}
345 
346 	return (NULL);
347 }
348 
349 /* Find a device_t by vendor/device ID */
350 
351 device_t
352 pci_find_device(uint16_t vendor, uint16_t device)
353 {
354 	struct pci_devinfo *dinfo;
355 
356 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
357 		if ((dinfo->cfg.vendor == vendor) &&
358 		    (dinfo->cfg.device == device)) {
359 			return (dinfo->cfg.dev);
360 		}
361 	}
362 
363 	return (NULL);
364 }
365 
366 device_t
367 pci_find_class(uint8_t class, uint8_t subclass)
368 {
369 	struct pci_devinfo *dinfo;
370 
371 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
372 		if (dinfo->cfg.baseclass == class &&
373 		    dinfo->cfg.subclass == subclass) {
374 			return (dinfo->cfg.dev);
375 		}
376 	}
377 
378 	return (NULL);
379 }
380 
381 device_t
382 pci_iterate_class(struct pci_devinfo **dinfop, uint8_t class, uint8_t subclass)
383 {
384 	struct pci_devinfo *dinfo;
385 
386 	if (*dinfop)
387 		dinfo = STAILQ_NEXT(*dinfop, pci_links);
388 	else
389 		dinfo = STAILQ_FIRST(&pci_devq);
390 
391 	while (dinfo) {
392 		if (dinfo->cfg.baseclass == class &&
393 		    dinfo->cfg.subclass == subclass) {
394 			*dinfop = dinfo;
395 			return (dinfo->cfg.dev);
396 		}
397 		dinfo = STAILQ_NEXT(dinfo, pci_links);
398 	}
399 	*dinfop = NULL;
400 	return (NULL);
401 }
402 
403 /* return base address of memory or port map */
404 
405 static uint32_t
406 pci_mapbase(uint32_t mapreg)
407 {
408 
409 	if (PCI_BAR_MEM(mapreg))
410 		return (mapreg & PCIM_BAR_MEM_BASE);
411 	else
412 		return (mapreg & PCIM_BAR_IO_BASE);
413 }
414 
415 /* return map type of memory or port map */
416 
417 static const char *
418 pci_maptype(unsigned mapreg)
419 {
420 
421 	if (PCI_BAR_IO(mapreg))
422 		return ("I/O Port");
423 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
424 		return ("Prefetchable Memory");
425 	return ("Memory");
426 }
427 
428 /* return log2 of map size decoded for memory or port map */
429 
430 static int
431 pci_mapsize(uint32_t testval)
432 {
433 	int ln2size;
434 
435 	testval = pci_mapbase(testval);
436 	ln2size = 0;
437 	if (testval != 0) {
438 		while ((testval & 1) == 0)
439 		{
440 			ln2size++;
441 			testval >>= 1;
442 		}
443 	}
444 	return (ln2size);
445 }
446 
447 /* return log2 of address range supported by map register */
448 
449 static int
450 pci_maprange(unsigned mapreg)
451 {
452 	int ln2range = 0;
453 
454 	if (PCI_BAR_IO(mapreg))
455 		ln2range = 32;
456 	else
457 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
458 		case PCIM_BAR_MEM_32:
459 			ln2range = 32;
460 			break;
461 		case PCIM_BAR_MEM_1MB:
462 			ln2range = 20;
463 			break;
464 		case PCIM_BAR_MEM_64:
465 			ln2range = 64;
466 			break;
467 		}
468 	return (ln2range);
469 }
470 
471 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
472 
473 static void
474 pci_fixancient(pcicfgregs *cfg)
475 {
476 	if (cfg->hdrtype != 0)
477 		return;
478 
479 	/* PCI to PCI bridges use header type 1 */
480 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
481 		cfg->hdrtype = 1;
482 }
483 
484 /* extract header type specific config data */
485 
486 static void
487 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
488 {
489 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
490 	switch (cfg->hdrtype) {
491 	case 0:
492 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
493 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
494 		cfg->nummaps	    = PCI_MAXMAPS_0;
495 		break;
496 	case 1:
497 		cfg->nummaps	    = PCI_MAXMAPS_1;
498 		break;
499 	case 2:
500 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
501 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
502 		cfg->nummaps	    = PCI_MAXMAPS_2;
503 		break;
504 	}
505 #undef REG
506 }
507 
508 /* read configuration header into pcicfgregs structure */
509 struct pci_devinfo *
510 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
511 {
512 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
513 	pcicfgregs *cfg = NULL;
514 	struct pci_devinfo *devlist_entry;
515 	struct devlist *devlist_head;
516 
517 	devlist_head = &pci_devq;
518 
519 	devlist_entry = NULL;
520 
521 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
522 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
523 
524 		cfg = &devlist_entry->cfg;
525 
526 		cfg->domain		= d;
527 		cfg->bus		= b;
528 		cfg->slot		= s;
529 		cfg->func		= f;
530 		cfg->vendor		= REG(PCIR_VENDOR, 2);
531 		cfg->device		= REG(PCIR_DEVICE, 2);
532 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
533 		cfg->statreg		= REG(PCIR_STATUS, 2);
534 		cfg->baseclass		= REG(PCIR_CLASS, 1);
535 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
536 		cfg->progif		= REG(PCIR_PROGIF, 1);
537 		cfg->revid		= REG(PCIR_REVID, 1);
538 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
539 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
540 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
541 		cfg->intpin		= REG(PCIR_INTPIN, 1);
542 		cfg->intline		= REG(PCIR_INTLINE, 1);
543 
544 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
545 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
546 
547 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
548 		cfg->hdrtype		&= ~PCIM_MFDEV;
549 
550 		pci_fixancient(cfg);
551 		pci_hdrtypedata(pcib, b, s, f, cfg);
552 
553 		pci_read_capabilities(pcib, cfg);
554 
555 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
556 
557 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
558 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
559 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
560 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
561 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
562 
563 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
564 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
565 		devlist_entry->conf.pc_vendor = cfg->vendor;
566 		devlist_entry->conf.pc_device = cfg->device;
567 
568 		devlist_entry->conf.pc_class = cfg->baseclass;
569 		devlist_entry->conf.pc_subclass = cfg->subclass;
570 		devlist_entry->conf.pc_progif = cfg->progif;
571 		devlist_entry->conf.pc_revid = cfg->revid;
572 
573 		pci_numdevs++;
574 		pci_generation++;
575 	}
576 	return (devlist_entry);
577 #undef REG
578 }
579 
580 static int
581 pci_fixup_nextptr(int *nextptr0)
582 {
583 	int nextptr = *nextptr0;
584 
585 	/* "Next pointer" is only one byte */
586 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
587 
588 	if (nextptr & 0x3) {
589 		/*
590 		 * PCI local bus spec 3.0:
591 		 *
592 		 * "... The bottom two bits of all pointers are reserved
593 		 *  and must be implemented as 00b although software must
594 		 *  mask them to allow for future uses of these bits ..."
595 		 */
596 		if (bootverbose) {
597 			kprintf("Illegal PCI extended capability "
598 				"offset, fixup 0x%02x -> 0x%02x\n",
599 				nextptr, nextptr & ~0x3);
600 		}
601 		nextptr &= ~0x3;
602 	}
603 	*nextptr0 = nextptr;
604 
605 	if (nextptr < 0x40) {
606 		if (nextptr != 0) {
607 			kprintf("Illegal PCI extended capability "
608 				"offset 0x%02x", nextptr);
609 		}
610 		return 0;
611 	}
612 	return 1;
613 }
614 
615 static void
616 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
617 {
618 #define REG(n, w)	\
619 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
620 
621 	struct pcicfg_pp *pp = &cfg->pp;
622 
623 	if (pp->pp_cap)
624 		return;
625 
626 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
627 	pp->pp_status = ptr + PCIR_POWER_STATUS;
628 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
629 
630 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
631 		/*
632 		 * XXX
633 		 * We should write to data_select and read back from
634 		 * data_scale to determine whether data register is
635 		 * implemented.
636 		 */
637 #ifdef foo
638 		pp->pp_data = ptr + PCIR_POWER_DATA;
639 #else
640 		pp->pp_data = 0;
641 #endif
642 	}
643 
644 #undef REG
645 }
646 
647 static void
648 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
649 {
650 #if defined(__x86_64__)
651 
652 #define REG(n, w)	\
653 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
654 
655 	struct pcicfg_ht *ht = &cfg->ht;
656 	uint64_t addr;
657 	uint32_t val;
658 
659 	/* Determine HT-specific capability type. */
660 	val = REG(ptr + PCIR_HT_COMMAND, 2);
661 
662 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
663 		cfg->ht.ht_slave = ptr;
664 
665 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
666 		return;
667 
668 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
669 		/* Sanity check the mapping window. */
670 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
671 		addr <<= 32;
672 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
673 		if (addr != MSI_X86_ADDR_BASE) {
674 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
675 				"has non-default MSI window 0x%llx\n",
676 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
677 				(long long)addr);
678 		}
679 	} else {
680 		addr = MSI_X86_ADDR_BASE;
681 	}
682 
683 	ht->ht_msimap = ptr;
684 	ht->ht_msictrl = val;
685 	ht->ht_msiaddr = addr;
686 
687 #undef REG
688 
689 #endif	/* __x86_64__ */
690 }
691 
692 static void
693 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
694 {
695 #define REG(n, w)	\
696 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
697 
698 	struct pcicfg_msi *msi = &cfg->msi;
699 
700 	msi->msi_location = ptr;
701 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
702 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
703 
704 #undef REG
705 }
706 
707 static void
708 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
709 {
710 #define REG(n, w)	\
711 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
712 
713 	struct pcicfg_msix *msix = &cfg->msix;
714 	uint32_t val;
715 
716 	msix->msix_location = ptr;
717 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
718 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
719 
720 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
721 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
722 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
723 
724 	val = REG(ptr + PCIR_MSIX_PBA, 4);
725 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
726 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
727 
728 	TAILQ_INIT(&msix->msix_vectors);
729 
730 #undef REG
731 }
732 
733 static void
734 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
735 {
736 	cfg->vpd.vpd_reg = ptr;
737 }
738 
739 static void
740 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
741 {
742 #define REG(n, w)	\
743 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
744 
745 	/* Should always be true. */
746 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
747 		uint32_t val;
748 
749 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
750 		cfg->subvendor = val & 0xffff;
751 		cfg->subdevice = val >> 16;
752 	}
753 
754 #undef REG
755 }
756 
757 static void
758 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 	/*
761 	 * Assume we have a PCI-X chipset if we have
762 	 * at least one PCI-PCI bridge with a PCI-X
763 	 * capability.  Note that some systems with
764 	 * PCI-express or HT chipsets might match on
765 	 * this check as well.
766 	 */
767 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
768 		pcix_chipset = 1;
769 
770 	cfg->pcix.pcix_ptr = ptr;
771 }
772 
773 static int
774 pcie_slotimpl(const pcicfgregs *cfg)
775 {
776 	const struct pcicfg_expr *expr = &cfg->expr;
777 	uint16_t port_type;
778 
779 	/*
780 	 * - Slot implemented bit is meaningful iff current port is
781 	 *   root port or down stream port.
782 	 * - Testing for root port or down stream port is meanningful
783 	 *   iff PCI configure has type 1 header.
784 	 */
785 
786 	if (cfg->hdrtype != 1)
787 		return 0;
788 
789 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
790 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
791 		return 0;
792 
793 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
794 		return 0;
795 
796 	return 1;
797 }
798 
799 static void
800 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
801 {
802 #define REG(n, w)	\
803 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
804 
805 	struct pcicfg_expr *expr = &cfg->expr;
806 
807 	/*
808 	 * Assume we have a PCI-express chipset if we have
809 	 * at least one PCI-express device.
810 	 */
811 	pcie_chipset = 1;
812 
813 	expr->expr_ptr = ptr;
814 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
815 
816 	/*
817 	 * Read slot capabilities.  Slot capabilities exists iff
818 	 * current port's slot is implemented
819 	 */
820 	if (pcie_slotimpl(cfg))
821 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
822 
823 #undef REG
824 }
825 
826 static void
827 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
828 {
829 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
830 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
831 
832 	uint32_t val;
833 	int nextptr, ptrptr;
834 
835 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
836 		/* No capabilities */
837 		return;
838 	}
839 
840 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
841 	case 0:
842 	case 1:
843 		ptrptr = PCIR_CAP_PTR;
844 		break;
845 	case 2:
846 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
847 		break;
848 	default:
849 		return;				/* no capabilities support */
850 	}
851 	nextptr = REG(ptrptr, 1);	/* sanity check? */
852 
853 	/*
854 	 * Read capability entries.
855 	 */
856 	while (pci_fixup_nextptr(&nextptr)) {
857 		const struct pci_read_cap *rc;
858 		int ptr = nextptr;
859 
860 		/* Find the next entry */
861 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
862 
863 		/* Process this entry */
864 		val = REG(ptr + PCICAP_ID, 1);
865 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
866 			if (rc->cap == val) {
867 				rc->read_cap(pcib, ptr, nextptr, cfg);
868 				break;
869 			}
870 		}
871 	}
872 
873 #if defined(__x86_64__)
874 	/*
875 	 * Enable the MSI mapping window for all HyperTransport
876 	 * slaves.  PCI-PCI bridges have their windows enabled via
877 	 * PCIB_MAP_MSI().
878 	 */
879 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
880 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
881 		device_printf(pcib,
882 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
883 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
884 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
885 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
886 		     2);
887 	}
888 #endif
889 
890 /* REG and WREG use carry through to next functions */
891 }
892 
893 /*
894  * PCI Vital Product Data
895  */
896 
897 #define	PCI_VPD_TIMEOUT		1000000
898 
899 static int
900 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
901 {
902 	int count = PCI_VPD_TIMEOUT;
903 
904 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
905 
906 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
907 
908 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
909 		if (--count < 0)
910 			return (ENXIO);
911 		DELAY(1);	/* limit looping */
912 	}
913 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
914 
915 	return (0);
916 }
917 
918 #if 0
919 static int
920 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
921 {
922 	int count = PCI_VPD_TIMEOUT;
923 
924 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
925 
926 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
927 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
928 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
929 		if (--count < 0)
930 			return (ENXIO);
931 		DELAY(1);	/* limit looping */
932 	}
933 
934 	return (0);
935 }
936 #endif
937 
938 #undef PCI_VPD_TIMEOUT
939 
940 struct vpd_readstate {
941 	device_t	pcib;
942 	pcicfgregs	*cfg;
943 	uint32_t	val;
944 	int		bytesinval;
945 	int		off;
946 	uint8_t		cksum;
947 };
948 
949 static int
950 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
951 {
952 	uint32_t reg;
953 	uint8_t byte;
954 
955 	if (vrs->bytesinval == 0) {
956 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
957 			return (ENXIO);
958 		vrs->val = le32toh(reg);
959 		vrs->off += 4;
960 		byte = vrs->val & 0xff;
961 		vrs->bytesinval = 3;
962 	} else {
963 		vrs->val = vrs->val >> 8;
964 		byte = vrs->val & 0xff;
965 		vrs->bytesinval--;
966 	}
967 
968 	vrs->cksum += byte;
969 	*data = byte;
970 	return (0);
971 }
972 
973 int
974 pcie_slot_implemented(device_t dev)
975 {
976 	struct pci_devinfo *dinfo = device_get_ivars(dev);
977 
978 	return pcie_slotimpl(&dinfo->cfg);
979 }
980 
981 void
982 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
983 {
984 	uint8_t expr_ptr;
985 	uint16_t val;
986 
987 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
988 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
989 		panic("%s: invalid max read request size 0x%02x",
990 		      device_get_nameunit(dev), rqsize);
991 	}
992 
993 	expr_ptr = pci_get_pciecap_ptr(dev);
994 	if (!expr_ptr)
995 		panic("%s: not PCIe device", device_get_nameunit(dev));
996 
997 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
998 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
999 		if (bootverbose)
1000 			device_printf(dev, "adjust device control 0x%04x", val);
1001 
1002 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
1003 		val |= rqsize;
1004 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
1005 
1006 		if (bootverbose)
1007 			kprintf(" -> 0x%04x\n", val);
1008 	}
1009 }
1010 
1011 uint16_t
1012 pcie_get_max_readrq(device_t dev)
1013 {
1014 	uint8_t expr_ptr;
1015 	uint16_t val;
1016 
1017 	expr_ptr = pci_get_pciecap_ptr(dev);
1018 	if (!expr_ptr)
1019 		panic("%s: not PCIe device", device_get_nameunit(dev));
1020 
1021 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1022 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1023 }
1024 
1025 static void
1026 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1027 {
1028 	struct vpd_readstate vrs;
1029 	int state;
1030 	int name;
1031 	int remain;
1032 	int i;
1033 	int alloc, off;		/* alloc/off for RO/W arrays */
1034 	int cksumvalid;
1035 	int dflen;
1036 	uint8_t byte;
1037 	uint8_t byte2;
1038 
1039 	/* init vpd reader */
1040 	vrs.bytesinval = 0;
1041 	vrs.off = 0;
1042 	vrs.pcib = pcib;
1043 	vrs.cfg = cfg;
1044 	vrs.cksum = 0;
1045 
1046 	state = 0;
1047 	name = remain = i = 0;	/* shut up stupid gcc */
1048 	alloc = off = 0;	/* shut up stupid gcc */
1049 	dflen = 0;		/* shut up stupid gcc */
1050 	cksumvalid = -1;
1051 	while (state >= 0) {
1052 		if (vpd_nextbyte(&vrs, &byte)) {
1053 			state = -2;
1054 			break;
1055 		}
1056 #if 0
1057 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1058 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1059 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1060 #endif
1061 		switch (state) {
1062 		case 0:		/* item name */
1063 			if (byte & 0x80) {
1064 				if (vpd_nextbyte(&vrs, &byte2)) {
1065 					state = -2;
1066 					break;
1067 				}
1068 				remain = byte2;
1069 				if (vpd_nextbyte(&vrs, &byte2)) {
1070 					state = -2;
1071 					break;
1072 				}
1073 				remain |= byte2 << 8;
1074 				if (remain > (0x7f*4 - vrs.off)) {
1075 					state = -1;
1076 					kprintf(
1077 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1078 					    cfg->domain, cfg->bus, cfg->slot,
1079 					    cfg->func, remain);
1080 				}
1081 				name = byte & 0x7f;
1082 			} else {
1083 				remain = byte & 0x7;
1084 				name = (byte >> 3) & 0xf;
1085 			}
1086 			switch (name) {
1087 			case 0x2:	/* String */
1088 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1089 				    M_DEVBUF, M_WAITOK);
1090 				i = 0;
1091 				state = 1;
1092 				break;
1093 			case 0xf:	/* End */
1094 				state = -1;
1095 				break;
1096 			case 0x10:	/* VPD-R */
1097 				alloc = 8;
1098 				off = 0;
1099 				cfg->vpd.vpd_ros = kmalloc(alloc *
1100 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1101 				    M_WAITOK | M_ZERO);
1102 				state = 2;
1103 				break;
1104 			case 0x11:	/* VPD-W */
1105 				alloc = 8;
1106 				off = 0;
1107 				cfg->vpd.vpd_w = kmalloc(alloc *
1108 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1109 				    M_WAITOK | M_ZERO);
1110 				state = 5;
1111 				break;
1112 			default:	/* Invalid data, abort */
1113 				state = -1;
1114 				break;
1115 			}
1116 			break;
1117 
1118 		case 1:	/* Identifier String */
1119 			cfg->vpd.vpd_ident[i++] = byte;
1120 			remain--;
1121 			if (remain == 0)  {
1122 				cfg->vpd.vpd_ident[i] = '\0';
1123 				state = 0;
1124 			}
1125 			break;
1126 
1127 		case 2:	/* VPD-R Keyword Header */
1128 			if (off == alloc) {
1129 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1130 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1131 				    M_DEVBUF, M_WAITOK | M_ZERO);
1132 			}
1133 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1134 			if (vpd_nextbyte(&vrs, &byte2)) {
1135 				state = -2;
1136 				break;
1137 			}
1138 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1139 			if (vpd_nextbyte(&vrs, &byte2)) {
1140 				state = -2;
1141 				break;
1142 			}
1143 			dflen = byte2;
1144 			if (dflen == 0 &&
1145 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1146 			    2) == 0) {
1147 				/*
1148 				 * if this happens, we can't trust the rest
1149 				 * of the VPD.
1150 				 */
1151 				kprintf(
1152 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1153 				    cfg->domain, cfg->bus, cfg->slot,
1154 				    cfg->func, dflen);
1155 				cksumvalid = 0;
1156 				state = -1;
1157 				break;
1158 			} else if (dflen == 0) {
1159 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1160 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1161 				    M_DEVBUF, M_WAITOK);
1162 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1163 			} else
1164 				cfg->vpd.vpd_ros[off].value = kmalloc(
1165 				    (dflen + 1) *
1166 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1167 				    M_DEVBUF, M_WAITOK);
1168 			remain -= 3;
1169 			i = 0;
1170 			/* keep in sync w/ state 3's transistions */
1171 			if (dflen == 0 && remain == 0)
1172 				state = 0;
1173 			else if (dflen == 0)
1174 				state = 2;
1175 			else
1176 				state = 3;
1177 			break;
1178 
1179 		case 3:	/* VPD-R Keyword Value */
1180 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1181 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1182 			    "RV", 2) == 0 && cksumvalid == -1) {
1183 				if (vrs.cksum == 0)
1184 					cksumvalid = 1;
1185 				else {
1186 					if (bootverbose)
1187 						kprintf(
1188 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1189 						    cfg->domain, cfg->bus,
1190 						    cfg->slot, cfg->func,
1191 						    vrs.cksum);
1192 					cksumvalid = 0;
1193 					state = -1;
1194 					break;
1195 				}
1196 			}
1197 			dflen--;
1198 			remain--;
1199 			/* keep in sync w/ state 2's transistions */
1200 			if (dflen == 0)
1201 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1202 			if (dflen == 0 && remain == 0) {
1203 				cfg->vpd.vpd_rocnt = off;
1204 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1205 				    off * sizeof(*cfg->vpd.vpd_ros),
1206 				    M_DEVBUF, M_WAITOK | M_ZERO);
1207 				state = 0;
1208 			} else if (dflen == 0)
1209 				state = 2;
1210 			break;
1211 
1212 		case 4:
1213 			remain--;
1214 			if (remain == 0)
1215 				state = 0;
1216 			break;
1217 
1218 		case 5:	/* VPD-W Keyword Header */
1219 			if (off == alloc) {
1220 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1221 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1222 				    M_DEVBUF, M_WAITOK | M_ZERO);
1223 			}
1224 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1225 			if (vpd_nextbyte(&vrs, &byte2)) {
1226 				state = -2;
1227 				break;
1228 			}
1229 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1230 			if (vpd_nextbyte(&vrs, &byte2)) {
1231 				state = -2;
1232 				break;
1233 			}
1234 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1235 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1236 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1237 			    sizeof(*cfg->vpd.vpd_w[off].value),
1238 			    M_DEVBUF, M_WAITOK);
1239 			remain -= 3;
1240 			i = 0;
1241 			/* keep in sync w/ state 6's transistions */
1242 			if (dflen == 0 && remain == 0)
1243 				state = 0;
1244 			else if (dflen == 0)
1245 				state = 5;
1246 			else
1247 				state = 6;
1248 			break;
1249 
1250 		case 6:	/* VPD-W Keyword Value */
1251 			cfg->vpd.vpd_w[off].value[i++] = byte;
1252 			dflen--;
1253 			remain--;
1254 			/* keep in sync w/ state 5's transistions */
1255 			if (dflen == 0)
1256 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1257 			if (dflen == 0 && remain == 0) {
1258 				cfg->vpd.vpd_wcnt = off;
1259 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1260 				    off * sizeof(*cfg->vpd.vpd_w),
1261 				    M_DEVBUF, M_WAITOK | M_ZERO);
1262 				state = 0;
1263 			} else if (dflen == 0)
1264 				state = 5;
1265 			break;
1266 
1267 		default:
1268 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1269 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1270 			    state);
1271 			state = -1;
1272 			break;
1273 		}
1274 	}
1275 
1276 	if (cksumvalid == 0 || state < -1) {
1277 		/* read-only data bad, clean up */
1278 		if (cfg->vpd.vpd_ros != NULL) {
1279 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1280 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1281 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1282 			cfg->vpd.vpd_ros = NULL;
1283 		}
1284 	}
1285 	if (state < -1) {
1286 		/* I/O error, clean up */
1287 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1288 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1289 		if (cfg->vpd.vpd_ident != NULL) {
1290 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1291 			cfg->vpd.vpd_ident = NULL;
1292 		}
1293 		if (cfg->vpd.vpd_w != NULL) {
1294 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1295 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1296 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1297 			cfg->vpd.vpd_w = NULL;
1298 		}
1299 	}
1300 	cfg->vpd.vpd_cached = 1;
1301 #undef REG
1302 #undef WREG
1303 }
1304 
1305 int
1306 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1307 {
1308 	struct pci_devinfo *dinfo = device_get_ivars(child);
1309 	pcicfgregs *cfg = &dinfo->cfg;
1310 
1311 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1312 		pci_read_vpd(device_get_parent(dev), cfg);
1313 
1314 	*identptr = cfg->vpd.vpd_ident;
1315 
1316 	if (*identptr == NULL)
1317 		return (ENXIO);
1318 
1319 	return (0);
1320 }
1321 
1322 int
1323 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1324 	const char **vptr)
1325 {
1326 	struct pci_devinfo *dinfo = device_get_ivars(child);
1327 	pcicfgregs *cfg = &dinfo->cfg;
1328 	int i;
1329 
1330 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1331 		pci_read_vpd(device_get_parent(dev), cfg);
1332 
1333 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1334 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1335 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1336 			*vptr = cfg->vpd.vpd_ros[i].value;
1337 		}
1338 
1339 	if (i != cfg->vpd.vpd_rocnt)
1340 		return (0);
1341 
1342 	*vptr = NULL;
1343 	return (ENXIO);
1344 }
1345 
1346 /*
1347  * Return the offset in configuration space of the requested extended
1348  * capability entry or 0 if the specified capability was not found.
1349  */
1350 int
1351 pci_find_extcap_method(device_t dev, device_t child, int capability,
1352     int *capreg)
1353 {
1354 	struct pci_devinfo *dinfo = device_get_ivars(child);
1355 	pcicfgregs *cfg = &dinfo->cfg;
1356 	u_int32_t status;
1357 	u_int8_t ptr;
1358 
1359 	/*
1360 	 * Check the CAP_LIST bit of the PCI status register first.
1361 	 */
1362 	status = pci_read_config(child, PCIR_STATUS, 2);
1363 	if (!(status & PCIM_STATUS_CAPPRESENT))
1364 		return (ENXIO);
1365 
1366 	/*
1367 	 * Determine the start pointer of the capabilities list.
1368 	 */
1369 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1370 	case 0:
1371 	case 1:
1372 		ptr = PCIR_CAP_PTR;
1373 		break;
1374 	case 2:
1375 		ptr = PCIR_CAP_PTR_2;
1376 		break;
1377 	default:
1378 		/* XXX: panic? */
1379 		return (ENXIO);		/* no extended capabilities support */
1380 	}
1381 	ptr = pci_read_config(child, ptr, 1);
1382 
1383 	/*
1384 	 * Traverse the capabilities list.
1385 	 */
1386 	while (ptr != 0) {
1387 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1388 			if (capreg != NULL)
1389 				*capreg = ptr;
1390 			return (0);
1391 		}
1392 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1393 	}
1394 
1395 	return (ENOENT);
1396 }
1397 
1398 /*
1399  * Support for MSI-X message interrupts.
1400  */
1401 static void
1402 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1403     uint32_t data)
1404 {
1405 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1406 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1407 	uint32_t offset;
1408 
1409 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1410 	offset = msix->msix_table_offset + index * 16;
1411 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1412 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1413 	bus_write_4(msix->msix_table_res, offset + 8, data);
1414 
1415 	/* Enable MSI -> HT mapping. */
1416 	pci_ht_map_msi(dev, address);
1417 }
1418 
1419 static void
1420 pci_mask_msix_vector(device_t dev, u_int index)
1421 {
1422 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1423 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1424 	uint32_t offset, val;
1425 
1426 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1427 	offset = msix->msix_table_offset + index * 16 + 12;
1428 	val = bus_read_4(msix->msix_table_res, offset);
1429 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1430 		val |= PCIM_MSIX_VCTRL_MASK;
1431 		bus_write_4(msix->msix_table_res, offset, val);
1432 	}
1433 }
1434 
1435 static void
1436 pci_unmask_msix_vector(device_t dev, u_int index)
1437 {
1438 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1439 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1440 	uint32_t offset, val;
1441 
1442 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1443 	offset = msix->msix_table_offset + index * 16 + 12;
1444 	val = bus_read_4(msix->msix_table_res, offset);
1445 	if (val & PCIM_MSIX_VCTRL_MASK) {
1446 		val &= ~PCIM_MSIX_VCTRL_MASK;
1447 		bus_write_4(msix->msix_table_res, offset, val);
1448 	}
1449 }
1450 
1451 int
1452 pci_pending_msix_vector(device_t dev, u_int index)
1453 {
1454 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1455 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1456 	uint32_t offset, bit;
1457 
1458 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1459 	    ("MSI-X is not setup yet"));
1460 
1461 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1462 	offset = msix->msix_pba_offset + (index / 32) * 4;
1463 	bit = 1 << index % 32;
1464 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1465 }
1466 
1467 /*
1468  * Restore MSI-X registers and table during resume.  If MSI-X is
1469  * enabled then walk the virtual table to restore the actual MSI-X
1470  * table.
1471  */
1472 static void
1473 pci_resume_msix(device_t dev)
1474 {
1475 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1476 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1477 
1478 	if (msix->msix_table_res != NULL) {
1479 		const struct msix_vector *mv;
1480 
1481 		pci_mask_msix_allvectors(dev);
1482 
1483 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1484 			u_int vector;
1485 
1486 			if (mv->mv_address == 0)
1487 				continue;
1488 
1489 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1490 			pci_setup_msix_vector(dev, vector,
1491 			    mv->mv_address, mv->mv_data);
1492 			pci_unmask_msix_vector(dev, vector);
1493 		}
1494 	}
1495 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1496 	    msix->msix_ctrl, 2);
1497 }
1498 
1499 /*
1500  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1501  *
1502  * After this function returns, the MSI-X's rid will be saved in rid0.
1503  */
1504 int
1505 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1506     int *rid0, int cpuid)
1507 {
1508 	struct pci_devinfo *dinfo = device_get_ivars(child);
1509 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1510 	struct msix_vector *mv;
1511 	struct resource_list_entry *rle;
1512 	int error, irq, rid;
1513 
1514 	KASSERT(msix->msix_table_res != NULL &&
1515 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1516 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1517 	KASSERT(vector < msix->msix_msgnum,
1518 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1519 
1520 	if (bootverbose) {
1521 		device_printf(child,
1522 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1523 		    vector, msix->msix_msgnum);
1524 	}
1525 
1526 	/* Set rid according to vector number */
1527 	rid = PCI_MSIX_VEC2RID(vector);
1528 
1529 	/* Vector has already been allocated */
1530 	mv = pci_find_msix_vector(child, rid);
1531 	if (mv != NULL)
1532 		return EBUSY;
1533 
1534 	/* Allocate a message. */
1535 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1536 	if (error)
1537 		return error;
1538 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1539 	    irq, irq, 1, cpuid);
1540 
1541 	if (bootverbose) {
1542 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1543 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1544 		    rle->start, cpuid);
1545 	}
1546 
1547 	/* Update counts of alloc'd messages. */
1548 	msix->msix_alloc++;
1549 
1550 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1551 	mv->mv_rid = rid;
1552 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1553 
1554 	*rid0 = rid;
1555 	return 0;
1556 }
1557 
1558 int
1559 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1560 {
1561 	struct pci_devinfo *dinfo = device_get_ivars(child);
1562 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1563 	struct resource_list_entry *rle;
1564 	struct msix_vector *mv;
1565 	int irq, cpuid;
1566 
1567 	KASSERT(msix->msix_table_res != NULL &&
1568 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1569 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1570 	KASSERT(rid > 0, ("invalid rid %d", rid));
1571 
1572 	mv = pci_find_msix_vector(child, rid);
1573 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1574 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1575 
1576 	/* Make sure resource is no longer allocated. */
1577 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1578 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1579 	KASSERT(rle->res == NULL,
1580 	    ("MSI-X resource is still allocated, rid %d", rid));
1581 
1582 	irq = rle->start;
1583 	cpuid = rle->cpuid;
1584 
1585 	/* Free the resource list entries. */
1586 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1587 
1588 	/* Release the IRQ. */
1589 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1590 
1591 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1592 	kfree(mv, M_DEVBUF);
1593 
1594 	msix->msix_alloc--;
1595 	return (0);
1596 }
1597 
1598 /*
1599  * Return the max supported MSI-X messages this device supports.
1600  * Basically, assuming the MD code can alloc messages, this function
1601  * should return the maximum value that pci_alloc_msix() can return.
1602  * Thus, it is subject to the tunables, etc.
1603  */
1604 int
1605 pci_msix_count_method(device_t dev, device_t child)
1606 {
1607 	struct pci_devinfo *dinfo = device_get_ivars(child);
1608 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1609 
1610 	if (pci_do_msix && msix->msix_location != 0)
1611 		return (msix->msix_msgnum);
1612 	return (0);
1613 }
1614 
1615 int
1616 pci_setup_msix(device_t dev)
1617 {
1618 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1619 	pcicfgregs *cfg = &dinfo->cfg;
1620 	struct resource_list_entry *rle;
1621 	struct resource *table_res, *pba_res;
1622 
1623 	KASSERT(cfg->msix.msix_table_res == NULL &&
1624 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1625 
1626 	/* If rid 0 is allocated, then fail. */
1627 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1628 	if (rle != NULL && rle->res != NULL)
1629 		return (ENXIO);
1630 
1631 	/* Already have allocated MSIs? */
1632 	if (cfg->msi.msi_alloc != 0)
1633 		return (ENXIO);
1634 
1635 	/* If MSI is blacklisted for this system, fail. */
1636 	if (pci_msi_blacklisted())
1637 		return (ENXIO);
1638 
1639 	/* MSI-X capability present? */
1640 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1641 	    !pci_do_msix)
1642 		return (ENODEV);
1643 
1644 	KASSERT(cfg->msix.msix_alloc == 0 &&
1645 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1646 	    ("MSI-X vector has been allocated"));
1647 
1648 	/* Make sure the appropriate BARs are mapped. */
1649 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1650 	    cfg->msix.msix_table_bar);
1651 	if (rle == NULL || rle->res == NULL ||
1652 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1653 		return (ENXIO);
1654 	table_res = rle->res;
1655 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1656 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1657 		    cfg->msix.msix_pba_bar);
1658 		if (rle == NULL || rle->res == NULL ||
1659 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1660 			return (ENXIO);
1661 	}
1662 	pba_res = rle->res;
1663 
1664 	cfg->msix.msix_table_res = table_res;
1665 	cfg->msix.msix_pba_res = pba_res;
1666 
1667 	pci_mask_msix_allvectors(dev);
1668 
1669 	return 0;
1670 }
1671 
1672 void
1673 pci_teardown_msix(device_t dev)
1674 {
1675 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1676 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1677 
1678 	KASSERT(msix->msix_table_res != NULL &&
1679 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1680 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1681 	    ("MSI-X vector is still allocated"));
1682 
1683 	pci_mask_msix_allvectors(dev);
1684 
1685 	msix->msix_table_res = NULL;
1686 	msix->msix_pba_res = NULL;
1687 }
1688 
1689 void
1690 pci_enable_msix(device_t dev)
1691 {
1692 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1693 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1694 
1695 	KASSERT(msix->msix_table_res != NULL &&
1696 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1697 
1698 	/* Update control register to enable MSI-X. */
1699 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1700 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1701 	    msix->msix_ctrl, 2);
1702 }
1703 
1704 void
1705 pci_disable_msix(device_t dev)
1706 {
1707 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1708 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1709 
1710 	KASSERT(msix->msix_table_res != NULL &&
1711 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1712 
1713 	/* Disable MSI -> HT mapping. */
1714 	pci_ht_map_msi(dev, 0);
1715 
1716 	/* Update control register to disable MSI-X. */
1717 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1718 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1719 	    msix->msix_ctrl, 2);
1720 }
1721 
1722 static void
1723 pci_mask_msix_allvectors(device_t dev)
1724 {
1725 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1726 	u_int i;
1727 
1728 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1729 		pci_mask_msix_vector(dev, i);
1730 }
1731 
1732 static struct msix_vector *
1733 pci_find_msix_vector(device_t dev, int rid)
1734 {
1735 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1736 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1737 	struct msix_vector *mv;
1738 
1739 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1740 		if (mv->mv_rid == rid)
1741 			return mv;
1742 	}
1743 	return NULL;
1744 }
1745 
1746 /*
1747  * HyperTransport MSI mapping control
1748  */
1749 void
1750 pci_ht_map_msi(device_t dev, uint64_t addr)
1751 {
1752 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1753 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1754 
1755 	if (!ht->ht_msimap)
1756 		return;
1757 
1758 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1759 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1760 		/* Enable MSI -> HT mapping. */
1761 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1762 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1763 		    ht->ht_msictrl, 2);
1764 	}
1765 
1766 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1767 		/* Disable MSI -> HT mapping. */
1768 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1769 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1770 		    ht->ht_msictrl, 2);
1771 	}
1772 }
1773 
1774 /*
1775  * Support for MSI message signalled interrupts.
1776  */
1777 static void
1778 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1779 {
1780 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1781 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1782 
1783 	/* Write data and address values. */
1784 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1785 	    address & 0xffffffff, 4);
1786 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1787 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1788 		    address >> 32, 4);
1789 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1790 		    data, 2);
1791 	} else
1792 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1793 		    2);
1794 
1795 	/* Enable MSI in the control register. */
1796 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1797 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1798 	    2);
1799 
1800 	/* Enable MSI -> HT mapping. */
1801 	pci_ht_map_msi(dev, address);
1802 }
1803 
1804 static void
1805 pci_disable_msi(device_t dev)
1806 {
1807 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1808 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1809 
1810 	/* Disable MSI -> HT mapping. */
1811 	pci_ht_map_msi(dev, 0);
1812 
1813 	/* Disable MSI in the control register. */
1814 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1815 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1816 	    2);
1817 }
1818 
1819 /*
1820  * Restore MSI registers during resume.  If MSI is enabled then
1821  * restore the data and address registers in addition to the control
1822  * register.
1823  */
1824 static void
1825 pci_resume_msi(device_t dev)
1826 {
1827 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1828 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1829 	uint64_t address;
1830 	uint16_t data;
1831 
1832 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1833 		address = msi->msi_addr;
1834 		data = msi->msi_data;
1835 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1836 		    address & 0xffffffff, 4);
1837 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1838 			pci_write_config(dev, msi->msi_location +
1839 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1840 			pci_write_config(dev, msi->msi_location +
1841 			    PCIR_MSI_DATA_64BIT, data, 2);
1842 		} else
1843 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1844 			    data, 2);
1845 	}
1846 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1847 	    2);
1848 }
1849 
1850 /*
1851  * Returns true if the specified device is blacklisted because MSI
1852  * doesn't work.
1853  */
1854 int
1855 pci_msi_device_blacklisted(device_t dev)
1856 {
1857 	struct pci_quirk *q;
1858 
1859 	if (!pci_honor_msi_blacklist)
1860 		return (0);
1861 
1862 	for (q = &pci_quirks[0]; q->devid; q++) {
1863 		if (q->devid == pci_get_devid(dev) &&
1864 		    q->type == PCI_QUIRK_DISABLE_MSI)
1865 			return (1);
1866 	}
1867 	return (0);
1868 }
1869 
1870 /*
1871  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1872  * we just check for blacklisted chipsets as represented by the
1873  * host-PCI bridge at device 0:0:0.  In the future, it may become
1874  * necessary to check other system attributes, such as the kenv values
1875  * that give the motherboard manufacturer and model number.
1876  */
1877 static int
1878 pci_msi_blacklisted(void)
1879 {
1880 	device_t dev;
1881 
1882 	if (!pci_honor_msi_blacklist)
1883 		return (0);
1884 
1885 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1886 	if (!(pcie_chipset || pcix_chipset))
1887 		return (1);
1888 
1889 	dev = pci_find_bsf(0, 0, 0);
1890 	if (dev != NULL)
1891 		return (pci_msi_device_blacklisted(dev));
1892 	return (0);
1893 }
1894 
1895 /*
1896  * Attempt to allocate count MSI messages on start_cpuid.
1897  *
1898  * If start_cpuid < 0, then the MSI messages' target CPU will be
1899  * selected automaticly.
1900  *
1901  * If the caller explicitly specified the MSI messages' target CPU,
1902  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1903  * messages on the specified CPU, if the allocation fails due to MD
1904  * does not have enough vectors (EMSGSIZE), then we will try next
1905  * available CPU, until the allocation fails on all CPUs.
1906  *
1907  * EMSGSIZE will be returned, if all available CPUs does not have
1908  * enough vectors for the requested amount of MSI messages.  Caller
1909  * should either reduce the amount of MSI messages to be requested,
1910  * or simply giving up using MSI.
1911  *
1912  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1913  * returned in 'rid' array, if the allocation succeeds.
1914  */
1915 int
1916 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1917     int start_cpuid)
1918 {
1919 	struct pci_devinfo *dinfo = device_get_ivars(child);
1920 	pcicfgregs *cfg = &dinfo->cfg;
1921 	struct resource_list_entry *rle;
1922 	int error, i, irqs[32], cpuid = 0;
1923 	uint16_t ctrl;
1924 
1925 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1926 	    ("invalid MSI count %d", count));
1927 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1928 
1929 	/* If rid 0 is allocated, then fail. */
1930 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1931 	if (rle != NULL && rle->res != NULL)
1932 		return (ENXIO);
1933 
1934 	/* Already have allocated messages? */
1935 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1936 		return (ENXIO);
1937 
1938 	/* If MSI is blacklisted for this system, fail. */
1939 	if (pci_msi_blacklisted())
1940 		return (ENXIO);
1941 
1942 	/* MSI capability present? */
1943 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1944 	    !pci_do_msi)
1945 		return (ENODEV);
1946 
1947 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1948 	    count, cfg->msi.msi_msgnum));
1949 
1950 	if (bootverbose) {
1951 		device_printf(child,
1952 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1953 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1954 	}
1955 
1956 	if (start_cpuid < 0)
1957 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1958 
1959 	error = EINVAL;
1960 	for (i = 0; i < ncpus; ++i) {
1961 		cpuid = (start_cpuid + i) % ncpus;
1962 
1963 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1964 		    cfg->msi.msi_msgnum, irqs, cpuid);
1965 		if (error == 0)
1966 			break;
1967 		else if (error != EMSGSIZE)
1968 			return error;
1969 	}
1970 	if (error)
1971 		return error;
1972 
1973 	/*
1974 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1975 	 * the irqs[] array, so add new resources starting at rid 1.
1976 	 */
1977 	for (i = 0; i < count; i++) {
1978 		rid[i] = i + 1;
1979 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1980 		    irqs[i], irqs[i], 1, cpuid);
1981 	}
1982 
1983 	if (bootverbose) {
1984 		if (count == 1) {
1985 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1986 			    irqs[0], cpuid);
1987 		} else {
1988 			int run;
1989 
1990 			/*
1991 			 * Be fancy and try to print contiguous runs
1992 			 * of IRQ values as ranges.  'run' is true if
1993 			 * we are in a range.
1994 			 */
1995 			device_printf(child, "using IRQs %d", irqs[0]);
1996 			run = 0;
1997 			for (i = 1; i < count; i++) {
1998 
1999 				/* Still in a run? */
2000 				if (irqs[i] == irqs[i - 1] + 1) {
2001 					run = 1;
2002 					continue;
2003 				}
2004 
2005 				/* Finish previous range. */
2006 				if (run) {
2007 					kprintf("-%d", irqs[i - 1]);
2008 					run = 0;
2009 				}
2010 
2011 				/* Start new range. */
2012 				kprintf(",%d", irqs[i]);
2013 			}
2014 
2015 			/* Unfinished range? */
2016 			if (run)
2017 				kprintf("-%d", irqs[count - 1]);
2018 			kprintf(" for MSI on cpu%d\n", cpuid);
2019 		}
2020 	}
2021 
2022 	/* Update control register with count. */
2023 	ctrl = cfg->msi.msi_ctrl;
2024 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2025 	ctrl |= (ffs(count) - 1) << 4;
2026 	cfg->msi.msi_ctrl = ctrl;
2027 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2028 
2029 	/* Update counts of alloc'd messages. */
2030 	cfg->msi.msi_alloc = count;
2031 	cfg->msi.msi_handlers = 0;
2032 	return (0);
2033 }
2034 
2035 /* Release the MSI messages associated with this device. */
2036 int
2037 pci_release_msi_method(device_t dev, device_t child)
2038 {
2039 	struct pci_devinfo *dinfo = device_get_ivars(child);
2040 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2041 	struct resource_list_entry *rle;
2042 	int i, irqs[32], cpuid = -1;
2043 
2044 	/* Do we have any messages to release? */
2045 	if (msi->msi_alloc == 0)
2046 		return (ENODEV);
2047 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2048 
2049 	/* Make sure none of the resources are allocated. */
2050 	if (msi->msi_handlers > 0)
2051 		return (EBUSY);
2052 	for (i = 0; i < msi->msi_alloc; i++) {
2053 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2054 		KASSERT(rle != NULL, ("missing MSI resource"));
2055 		if (rle->res != NULL)
2056 			return (EBUSY);
2057 		if (i == 0) {
2058 			cpuid = rle->cpuid;
2059 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2060 			    ("invalid MSI target cpuid %d", cpuid));
2061 		} else {
2062 			KASSERT(rle->cpuid == cpuid,
2063 			    ("MSI targets different cpus, "
2064 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2065 		}
2066 		irqs[i] = rle->start;
2067 	}
2068 
2069 	/* Update control register with 0 count. */
2070 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2071 	    ("%s: MSI still enabled", __func__));
2072 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2073 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2074 	    msi->msi_ctrl, 2);
2075 
2076 	/* Release the messages. */
2077 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2078 	    cpuid);
2079 	for (i = 0; i < msi->msi_alloc; i++)
2080 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2081 
2082 	/* Update alloc count. */
2083 	msi->msi_alloc = 0;
2084 	msi->msi_addr = 0;
2085 	msi->msi_data = 0;
2086 	return (0);
2087 }
2088 
2089 /*
2090  * Return the max supported MSI messages this device supports.
2091  * Basically, assuming the MD code can alloc messages, this function
2092  * should return the maximum value that pci_alloc_msi() can return.
2093  * Thus, it is subject to the tunables, etc.
2094  */
2095 int
2096 pci_msi_count_method(device_t dev, device_t child)
2097 {
2098 	struct pci_devinfo *dinfo = device_get_ivars(child);
2099 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2100 
2101 	if (pci_do_msi && msi->msi_location != 0)
2102 		return (msi->msi_msgnum);
2103 	return (0);
2104 }
2105 
2106 /* kfree pcicfgregs structure and all depending data structures */
2107 
2108 int
2109 pci_freecfg(struct pci_devinfo *dinfo)
2110 {
2111 	struct devlist *devlist_head;
2112 	int i;
2113 
2114 	devlist_head = &pci_devq;
2115 
2116 	if (dinfo->cfg.vpd.vpd_reg) {
2117 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2118 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2119 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2120 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2121 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2122 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2123 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2124 	}
2125 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2126 	kfree(dinfo, M_DEVBUF);
2127 
2128 	/* increment the generation count */
2129 	pci_generation++;
2130 
2131 	/* we're losing one device */
2132 	pci_numdevs--;
2133 	return (0);
2134 }
2135 
2136 /*
2137  * PCI power manangement
2138  */
2139 int
2140 pci_set_powerstate_method(device_t dev, device_t child, int state)
2141 {
2142 	struct pci_devinfo *dinfo = device_get_ivars(child);
2143 	pcicfgregs *cfg = &dinfo->cfg;
2144 	uint16_t status;
2145 	int oldstate, highest, delay;
2146 
2147 	if (cfg->pp.pp_cap == 0)
2148 		return (EOPNOTSUPP);
2149 
2150 	/*
2151 	 * Optimize a no state change request away.  While it would be OK to
2152 	 * write to the hardware in theory, some devices have shown odd
2153 	 * behavior when going from D3 -> D3.
2154 	 */
2155 	oldstate = pci_get_powerstate(child);
2156 	if (oldstate == state)
2157 		return (0);
2158 
2159 	/*
2160 	 * The PCI power management specification states that after a state
2161 	 * transition between PCI power states, system software must
2162 	 * guarantee a minimal delay before the function accesses the device.
2163 	 * Compute the worst case delay that we need to guarantee before we
2164 	 * access the device.  Many devices will be responsive much more
2165 	 * quickly than this delay, but there are some that don't respond
2166 	 * instantly to state changes.  Transitions to/from D3 state require
2167 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2168 	 * is done below with DELAY rather than a sleeper function because
2169 	 * this function can be called from contexts where we cannot sleep.
2170 	 */
2171 	highest = (oldstate > state) ? oldstate : state;
2172 	if (highest == PCI_POWERSTATE_D3)
2173 	    delay = 10000;
2174 	else if (highest == PCI_POWERSTATE_D2)
2175 	    delay = 200;
2176 	else
2177 	    delay = 0;
2178 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2179 	    & ~PCIM_PSTAT_DMASK;
2180 	switch (state) {
2181 	case PCI_POWERSTATE_D0:
2182 		status |= PCIM_PSTAT_D0;
2183 		break;
2184 	case PCI_POWERSTATE_D1:
2185 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2186 			return (EOPNOTSUPP);
2187 		status |= PCIM_PSTAT_D1;
2188 		break;
2189 	case PCI_POWERSTATE_D2:
2190 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2191 			return (EOPNOTSUPP);
2192 		status |= PCIM_PSTAT_D2;
2193 		break;
2194 	case PCI_POWERSTATE_D3:
2195 		status |= PCIM_PSTAT_D3;
2196 		break;
2197 	default:
2198 		return (EINVAL);
2199 	}
2200 
2201 	if (bootverbose)
2202 		kprintf(
2203 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2204 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2205 		    dinfo->cfg.func, oldstate, state);
2206 
2207 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2208 	if (delay)
2209 		DELAY(delay);
2210 	return (0);
2211 }
2212 
2213 int
2214 pci_get_powerstate_method(device_t dev, device_t child)
2215 {
2216 	struct pci_devinfo *dinfo = device_get_ivars(child);
2217 	pcicfgregs *cfg = &dinfo->cfg;
2218 	uint16_t status;
2219 	int result;
2220 
2221 	if (cfg->pp.pp_cap != 0) {
2222 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2223 		switch (status & PCIM_PSTAT_DMASK) {
2224 		case PCIM_PSTAT_D0:
2225 			result = PCI_POWERSTATE_D0;
2226 			break;
2227 		case PCIM_PSTAT_D1:
2228 			result = PCI_POWERSTATE_D1;
2229 			break;
2230 		case PCIM_PSTAT_D2:
2231 			result = PCI_POWERSTATE_D2;
2232 			break;
2233 		case PCIM_PSTAT_D3:
2234 			result = PCI_POWERSTATE_D3;
2235 			break;
2236 		default:
2237 			result = PCI_POWERSTATE_UNKNOWN;
2238 			break;
2239 		}
2240 	} else {
2241 		/* No support, device is always at D0 */
2242 		result = PCI_POWERSTATE_D0;
2243 	}
2244 	return (result);
2245 }
2246 
2247 /*
2248  * Some convenience functions for PCI device drivers.
2249  */
2250 
2251 static __inline void
2252 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2253 {
2254 	uint16_t	command;
2255 
2256 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2257 	command |= bit;
2258 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2259 }
2260 
2261 static __inline void
2262 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2263 {
2264 	uint16_t	command;
2265 
2266 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2267 	command &= ~bit;
2268 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2269 }
2270 
2271 int
2272 pci_enable_busmaster_method(device_t dev, device_t child)
2273 {
2274 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2275 	return (0);
2276 }
2277 
2278 int
2279 pci_disable_busmaster_method(device_t dev, device_t child)
2280 {
2281 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2282 	return (0);
2283 }
2284 
2285 int
2286 pci_enable_io_method(device_t dev, device_t child, int space)
2287 {
2288 	uint16_t command;
2289 	uint16_t bit;
2290 	char *error;
2291 
2292 	bit = 0;
2293 	error = NULL;
2294 
2295 	switch(space) {
2296 	case SYS_RES_IOPORT:
2297 		bit = PCIM_CMD_PORTEN;
2298 		error = "port";
2299 		break;
2300 	case SYS_RES_MEMORY:
2301 		bit = PCIM_CMD_MEMEN;
2302 		error = "memory";
2303 		break;
2304 	default:
2305 		return (EINVAL);
2306 	}
2307 	pci_set_command_bit(dev, child, bit);
2308 	/* Some devices seem to need a brief stall here, what do to? */
2309 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2310 	if (command & bit)
2311 		return (0);
2312 	device_printf(child, "failed to enable %s mapping!\n", error);
2313 	return (ENXIO);
2314 }
2315 
2316 int
2317 pci_disable_io_method(device_t dev, device_t child, int space)
2318 {
2319 	uint16_t command;
2320 	uint16_t bit;
2321 	char *error;
2322 
2323 	bit = 0;
2324 	error = NULL;
2325 
2326 	switch(space) {
2327 	case SYS_RES_IOPORT:
2328 		bit = PCIM_CMD_PORTEN;
2329 		error = "port";
2330 		break;
2331 	case SYS_RES_MEMORY:
2332 		bit = PCIM_CMD_MEMEN;
2333 		error = "memory";
2334 		break;
2335 	default:
2336 		return (EINVAL);
2337 	}
2338 	pci_clear_command_bit(dev, child, bit);
2339 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2340 	if (command & bit) {
2341 		device_printf(child, "failed to disable %s mapping!\n", error);
2342 		return (ENXIO);
2343 	}
2344 	return (0);
2345 }
2346 
2347 /*
2348  * New style pci driver.  Parent device is either a pci-host-bridge or a
2349  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2350  */
2351 
2352 void
2353 pci_print_verbose(struct pci_devinfo *dinfo)
2354 {
2355 
2356 	if (bootverbose) {
2357 		pcicfgregs *cfg = &dinfo->cfg;
2358 
2359 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2360 		    cfg->vendor, cfg->device, cfg->revid);
2361 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2362 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2363 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2364 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2365 		    cfg->mfdev);
2366 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2367 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2368 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2369 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2370 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2371 		if (cfg->intpin > 0)
2372 			kprintf("\tintpin=%c, irq=%d\n",
2373 			    cfg->intpin +'a' -1, cfg->intline);
2374 		if (cfg->pp.pp_cap) {
2375 			uint16_t status;
2376 
2377 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2378 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2379 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2380 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2381 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2382 			    status & PCIM_PSTAT_DMASK);
2383 		}
2384 		if (cfg->msi.msi_location) {
2385 			int ctrl;
2386 
2387 			ctrl = cfg->msi.msi_ctrl;
2388 			kprintf("\tMSI supports %d message%s%s%s\n",
2389 			    cfg->msi.msi_msgnum,
2390 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2391 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2392 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2393 		}
2394 		if (cfg->msix.msix_location) {
2395 			kprintf("\tMSI-X supports %d message%s ",
2396 			    cfg->msix.msix_msgnum,
2397 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2398 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2399 				kprintf("in map 0x%x\n",
2400 				    cfg->msix.msix_table_bar);
2401 			else
2402 				kprintf("in maps 0x%x and 0x%x\n",
2403 				    cfg->msix.msix_table_bar,
2404 				    cfg->msix.msix_pba_bar);
2405 		}
2406 		pci_print_verbose_expr(cfg);
2407 	}
2408 }
2409 
2410 static void
2411 pci_print_verbose_expr(const pcicfgregs *cfg)
2412 {
2413 	const struct pcicfg_expr *expr = &cfg->expr;
2414 	const char *port_name;
2415 	uint16_t port_type;
2416 
2417 	if (!bootverbose)
2418 		return;
2419 
2420 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2421 		return;
2422 
2423 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2424 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2425 
2426 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2427 
2428 	switch (port_type) {
2429 	case PCIE_END_POINT:
2430 		port_name = "DEVICE";
2431 		break;
2432 	case PCIE_LEG_END_POINT:
2433 		port_name = "LEGDEV";
2434 		break;
2435 	case PCIE_ROOT_PORT:
2436 		port_name = "ROOT";
2437 		break;
2438 	case PCIE_UP_STREAM_PORT:
2439 		port_name = "UPSTREAM";
2440 		break;
2441 	case PCIE_DOWN_STREAM_PORT:
2442 		port_name = "DOWNSTRM";
2443 		break;
2444 	case PCIE_PCIE2PCI_BRIDGE:
2445 		port_name = "PCIE2PCI";
2446 		break;
2447 	case PCIE_PCI2PCIE_BRIDGE:
2448 		port_name = "PCI2PCIE";
2449 		break;
2450 	case PCIE_ROOT_END_POINT:
2451 		port_name = "ROOTDEV";
2452 		break;
2453 	case PCIE_ROOT_EVT_COLL:
2454 		port_name = "ROOTEVTC";
2455 		break;
2456 	default:
2457 		port_name = NULL;
2458 		break;
2459 	}
2460 	if ((port_type == PCIE_ROOT_PORT ||
2461 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2462 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2463 		port_name = NULL;
2464 	if (port_name != NULL)
2465 		kprintf("[%s]", port_name);
2466 
2467 	if (pcie_slotimpl(cfg)) {
2468 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2469 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2470 			kprintf("[HOTPLUG]");
2471 	}
2472 	kprintf("\n");
2473 }
2474 
2475 static int
2476 pci_porten(device_t pcib, int b, int s, int f)
2477 {
2478 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2479 		& PCIM_CMD_PORTEN) != 0;
2480 }
2481 
2482 static int
2483 pci_memen(device_t pcib, int b, int s, int f)
2484 {
2485 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2486 		& PCIM_CMD_MEMEN) != 0;
2487 }
2488 
2489 /*
2490  * Add a resource based on a pci map register. Return 1 if the map
2491  * register is a 32bit map register or 2 if it is a 64bit register.
2492  */
2493 static int
2494 pci_add_map(device_t pcib, device_t bus, device_t dev,
2495     int b, int s, int f, int reg, struct resource_list *rl, int force,
2496     int prefetch)
2497 {
2498 	uint32_t map;
2499 	uint16_t old_cmd;
2500 	pci_addr_t base;
2501 	pci_addr_t start, end, count;
2502 	uint8_t ln2size;
2503 	uint8_t ln2range;
2504 	uint32_t testval;
2505 	uint16_t cmd;
2506 	int type;
2507 	int barlen;
2508 	struct resource *res;
2509 
2510 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2511 
2512         /* Disable access to device memory */
2513 	old_cmd = 0;
2514 	if (PCI_BAR_MEM(map)) {
2515 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2516 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2517 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2518 	}
2519 
2520 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2521 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2522 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2523 
2524         /* Restore memory access mode */
2525 	if (PCI_BAR_MEM(map)) {
2526 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2527 	}
2528 
2529 	if (PCI_BAR_MEM(map)) {
2530 		type = SYS_RES_MEMORY;
2531 		if (map & PCIM_BAR_MEM_PREFETCH)
2532 			prefetch = 1;
2533 	} else
2534 		type = SYS_RES_IOPORT;
2535 	ln2size = pci_mapsize(testval);
2536 	ln2range = pci_maprange(testval);
2537 	base = pci_mapbase(map);
2538 	barlen = ln2range == 64 ? 2 : 1;
2539 
2540 	/*
2541 	 * For I/O registers, if bottom bit is set, and the next bit up
2542 	 * isn't clear, we know we have a BAR that doesn't conform to the
2543 	 * spec, so ignore it.  Also, sanity check the size of the data
2544 	 * areas to the type of memory involved.  Memory must be at least
2545 	 * 16 bytes in size, while I/O ranges must be at least 4.
2546 	 */
2547 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2548 		return (barlen);
2549 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2550 	    (type == SYS_RES_IOPORT && ln2size < 2))
2551 		return (barlen);
2552 
2553 	if (ln2range == 64)
2554 		/* Read the other half of a 64bit map register */
2555 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2556 	if (bootverbose) {
2557 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2558 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2559 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2560 			kprintf(", port disabled\n");
2561 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2562 			kprintf(", memory disabled\n");
2563 		else
2564 			kprintf(", enabled\n");
2565 	}
2566 
2567 	/*
2568 	 * If base is 0, then we have problems.  It is best to ignore
2569 	 * such entries for the moment.  These will be allocated later if
2570 	 * the driver specifically requests them.  However, some
2571 	 * removable busses look better when all resources are allocated,
2572 	 * so allow '0' to be overriden.
2573 	 *
2574 	 * Similarly treat maps whose values is the same as the test value
2575 	 * read back.  These maps have had all f's written to them by the
2576 	 * BIOS in an attempt to disable the resources.
2577 	 */
2578 	if (!force && (base == 0 || map == testval))
2579 		return (barlen);
2580 	if ((u_long)base != base) {
2581 		device_printf(bus,
2582 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2583 		    pci_get_domain(dev), b, s, f, reg);
2584 		return (barlen);
2585 	}
2586 
2587 	/*
2588 	 * This code theoretically does the right thing, but has
2589 	 * undesirable side effects in some cases where peripherals
2590 	 * respond oddly to having these bits enabled.  Let the user
2591 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2592 	 * default).
2593 	 */
2594 	if (pci_enable_io_modes) {
2595 		/* Turn on resources that have been left off by a lazy BIOS */
2596 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2597 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2598 			cmd |= PCIM_CMD_PORTEN;
2599 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2600 		}
2601 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2602 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2603 			cmd |= PCIM_CMD_MEMEN;
2604 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2605 		}
2606 	} else {
2607 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2608 			return (barlen);
2609 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2610 			return (barlen);
2611 	}
2612 
2613 	count = 1 << ln2size;
2614 	if (base == 0 || base == pci_mapbase(testval)) {
2615 		start = 0;	/* Let the parent decide. */
2616 		end = ~0ULL;
2617 	} else {
2618 		start = base;
2619 		end = base + (1 << ln2size) - 1;
2620 	}
2621 	resource_list_add(rl, type, reg, start, end, count, -1);
2622 
2623 	/*
2624 	 * Try to allocate the resource for this BAR from our parent
2625 	 * so that this resource range is already reserved.  The
2626 	 * driver for this device will later inherit this resource in
2627 	 * pci_alloc_resource().
2628 	 */
2629 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2630 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2631 	if (res == NULL) {
2632 		/*
2633 		 * If the allocation fails, delete the resource list
2634 		 * entry to force pci_alloc_resource() to allocate
2635 		 * resources from the parent.
2636 		 */
2637 		resource_list_delete(rl, type, reg);
2638 #ifdef PCI_BAR_CLEAR
2639 		/* Clear the BAR */
2640 		start = 0;
2641 #else	/* !PCI_BAR_CLEAR */
2642 		/*
2643 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2644 		 * PCI function, clearing the BAR causes HPET timer
2645 		 * stop ticking.
2646 		 */
2647 		if (bootverbose) {
2648 			kprintf("pci:%d:%d:%d: resource reservation failed "
2649 				"%#jx - %#jx\n", b, s, f,
2650 				(intmax_t)start, (intmax_t)end);
2651 		}
2652 		return (barlen);
2653 #endif	/* PCI_BAR_CLEAR */
2654 	} else {
2655 		start = rman_get_start(res);
2656 	}
2657 	pci_write_config(dev, reg, start, 4);
2658 	if (ln2range == 64)
2659 		pci_write_config(dev, reg + 4, start >> 32, 4);
2660 	return (barlen);
2661 }
2662 
2663 /*
2664  * For ATA devices we need to decide early what addressing mode to use.
2665  * Legacy demands that the primary and secondary ATA ports sits on the
2666  * same addresses that old ISA hardware did. This dictates that we use
2667  * those addresses and ignore the BAR's if we cannot set PCI native
2668  * addressing mode.
2669  */
2670 static void
2671 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2672     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2673 {
2674 	int rid, type, progif;
2675 #if 0
2676 	/* if this device supports PCI native addressing use it */
2677 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2678 	if ((progif & 0x8a) == 0x8a) {
2679 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2680 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2681 			kprintf("Trying ATA native PCI addressing mode\n");
2682 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2683 		}
2684 	}
2685 #endif
2686 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2687 	type = SYS_RES_IOPORT;
2688 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2689 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2690 		    prefetchmask & (1 << 0));
2691 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2692 		    prefetchmask & (1 << 1));
2693 	} else {
2694 		rid = PCIR_BAR(0);
2695 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2696 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2697 		    0, -1);
2698 		rid = PCIR_BAR(1);
2699 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2700 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2701 		    0, -1);
2702 	}
2703 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2704 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2705 		    prefetchmask & (1 << 2));
2706 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2707 		    prefetchmask & (1 << 3));
2708 	} else {
2709 		rid = PCIR_BAR(2);
2710 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2711 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2712 		    0, -1);
2713 		rid = PCIR_BAR(3);
2714 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2715 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2716 		    0, -1);
2717 	}
2718 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2719 	    prefetchmask & (1 << 4));
2720 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2721 	    prefetchmask & (1 << 5));
2722 }
2723 
2724 static void
2725 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2726 {
2727 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2728 	pcicfgregs *cfg = &dinfo->cfg;
2729 	char tunable_name[64];
2730 	int irq;
2731 
2732 	/* Has to have an intpin to have an interrupt. */
2733 	if (cfg->intpin == 0)
2734 		return;
2735 
2736 	/* Let the user override the IRQ with a tunable. */
2737 	irq = PCI_INVALID_IRQ;
2738 	ksnprintf(tunable_name, sizeof(tunable_name),
2739 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2740 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2741 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2742 		if (irq >= 255 || irq <= 0) {
2743 			irq = PCI_INVALID_IRQ;
2744 		} else {
2745 			if (machintr_legacy_intr_find(irq,
2746 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2747 				device_printf(dev,
2748 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2749 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2750 				    cfg->intpin + 'A' - 1, irq);
2751 				irq = PCI_INVALID_IRQ;
2752 			} else {
2753 				BUS_CONFIG_INTR(bus, dev, irq,
2754 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2755 			}
2756 		}
2757 	}
2758 
2759 	/*
2760 	 * If we didn't get an IRQ via the tunable, then we either use the
2761 	 * IRQ value in the intline register or we ask the bus to route an
2762 	 * interrupt for us.  If force_route is true, then we only use the
2763 	 * value in the intline register if the bus was unable to assign an
2764 	 * IRQ.
2765 	 */
2766 	if (!PCI_INTERRUPT_VALID(irq)) {
2767 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2768 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2769 		if (!PCI_INTERRUPT_VALID(irq))
2770 			irq = cfg->intline;
2771 	}
2772 
2773 	/* If after all that we don't have an IRQ, just bail. */
2774 	if (!PCI_INTERRUPT_VALID(irq))
2775 		return;
2776 
2777 	/* Update the config register if it changed. */
2778 	if (irq != cfg->intline) {
2779 		cfg->intline = irq;
2780 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2781 	}
2782 
2783 	/* Add this IRQ as rid 0 interrupt resource. */
2784 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2785 	    machintr_legacy_intr_cpuid(irq));
2786 }
2787 
2788 /* Perform early OHCI takeover from SMM. */
2789 static void
2790 ohci_early_takeover(device_t self)
2791 {
2792 	struct resource *res;
2793 	uint32_t ctl;
2794 	int rid;
2795 	int i;
2796 
2797 	rid = PCIR_BAR(0);
2798 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2799 	if (res == NULL)
2800 		return;
2801 
2802 	ctl = bus_read_4(res, OHCI_CONTROL);
2803 	if (ctl & OHCI_IR) {
2804 		if (bootverbose)
2805 			kprintf("ohci early: "
2806 			    "SMM active, request owner change\n");
2807 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2808 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2809 			DELAY(1000);
2810 			ctl = bus_read_4(res, OHCI_CONTROL);
2811 		}
2812 		if (ctl & OHCI_IR) {
2813 			if (bootverbose)
2814 				kprintf("ohci early: "
2815 				    "SMM does not respond, resetting\n");
2816 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2817 		}
2818 		/* Disable interrupts */
2819 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2820 	}
2821 
2822 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2823 }
2824 
2825 /* Perform early UHCI takeover from SMM. */
2826 static void
2827 uhci_early_takeover(device_t self)
2828 {
2829 	struct resource *res;
2830 	int rid;
2831 
2832 	/*
2833 	 * Set the PIRQD enable bit and switch off all the others. We don't
2834 	 * want legacy support to interfere with us XXX Does this also mean
2835 	 * that the BIOS won't touch the keyboard anymore if it is connected
2836 	 * to the ports of the root hub?
2837 	 */
2838 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2839 
2840 	/* Disable interrupts */
2841 	rid = PCI_UHCI_BASE_REG;
2842 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2843 	if (res != NULL) {
2844 		bus_write_2(res, UHCI_INTR, 0);
2845 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2846 	}
2847 }
2848 
2849 /* Perform early EHCI takeover from SMM. */
2850 static void
2851 ehci_early_takeover(device_t self)
2852 {
2853 	struct resource *res;
2854 	uint32_t cparams;
2855 	uint32_t eec;
2856 	uint32_t eecp;
2857 	uint32_t bios_sem;
2858 	uint32_t offs;
2859 	int rid;
2860 	int i;
2861 
2862 	rid = PCIR_BAR(0);
2863 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2864 	if (res == NULL)
2865 		return;
2866 
2867 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2868 
2869 	/* Synchronise with the BIOS if it owns the controller. */
2870 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2871 	    eecp = EHCI_EECP_NEXT(eec)) {
2872 		eec = pci_read_config(self, eecp, 4);
2873 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2874 			continue;
2875 		}
2876 		bios_sem = pci_read_config(self, eecp +
2877 		    EHCI_LEGSUP_BIOS_SEM, 1);
2878 		if (bios_sem == 0) {
2879 			continue;
2880 		}
2881 		if (bootverbose)
2882 			kprintf("ehci early: "
2883 			    "SMM active, request owner change\n");
2884 
2885 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2886 
2887 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2888 			DELAY(1000);
2889 			bios_sem = pci_read_config(self, eecp +
2890 			    EHCI_LEGSUP_BIOS_SEM, 1);
2891 		}
2892 
2893 		if (bios_sem != 0) {
2894 			if (bootverbose)
2895 				kprintf("ehci early: "
2896 				    "SMM does not respond\n");
2897 		}
2898 		/* Disable interrupts */
2899 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2900 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2901 	}
2902 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2903 }
2904 
2905 /* Perform early XHCI takeover from SMM. */
2906 static void
2907 xhci_early_takeover(device_t self)
2908 {
2909 	struct resource *res;
2910 	uint32_t cparams;
2911 	uint32_t eec;
2912 	uint32_t eecp;
2913 	uint32_t bios_sem;
2914 	uint32_t offs;
2915 	int rid;
2916 	int i;
2917 
2918 	rid = PCIR_BAR(0);
2919 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2920 	if (res == NULL)
2921 		return;
2922 
2923 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2924 
2925 	eec = -1;
2926 
2927 	/* Synchronise with the BIOS if it owns the controller. */
2928 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2929 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2930 		eec = bus_read_4(res, eecp);
2931 
2932 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2933 			continue;
2934 
2935 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2936 
2937 		if (bios_sem == 0) {
2938 			if (bootverbose)
2939 				kprintf("xhci early: xhci is not owned by SMM\n");
2940 
2941 			continue;
2942 		}
2943 
2944 		if (bootverbose)
2945 			kprintf("xhci early: "
2946 			    "SMM active, request owner change\n");
2947 
2948 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2949 
2950 		/* wait a maximum of 5 seconds */
2951 
2952 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2953 			DELAY(1000);
2954 
2955 			bios_sem = bus_read_1(res, eecp +
2956 			    XHCI_XECP_BIOS_SEM);
2957 		}
2958 
2959 		if (bios_sem != 0) {
2960 			if (bootverbose) {
2961 				kprintf("xhci early: "
2962 				    "SMM does not respond\n");
2963 				kprintf("xhci early: "
2964 				    "taking xhci by force\n");
2965 			}
2966 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2967 		} else {
2968 			if (bootverbose)
2969 				kprintf("xhci early: "
2970 				    "handover successful\n");
2971 		}
2972 
2973 		/* Disable interrupts */
2974 		offs = bus_read_1(res, XHCI_CAPLENGTH);
2975 		bus_write_4(res, offs + XHCI_USBCMD, 0);
2976 		bus_read_4(res, offs + XHCI_USBSTS);
2977 	}
2978 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2979 }
2980 
2981 void
2982 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2983 {
2984 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2985 	pcicfgregs *cfg = &dinfo->cfg;
2986 	struct resource_list *rl = &dinfo->resources;
2987 	struct pci_quirk *q;
2988 	int b, i, f, s;
2989 
2990 	b = cfg->bus;
2991 	s = cfg->slot;
2992 	f = cfg->func;
2993 
2994 	/* ATA devices needs special map treatment */
2995 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2996 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2997 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2998 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2999 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3000 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
3001 	else
3002 		for (i = 0; i < cfg->nummaps;)
3003 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
3004 			    rl, force, prefetchmask & (1 << i));
3005 
3006 	/*
3007 	 * Add additional, quirked resources.
3008 	 */
3009 	for (q = &pci_quirks[0]; q->devid; q++) {
3010 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3011 		    && q->type == PCI_QUIRK_MAP_REG)
3012 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3013 			  force, 0);
3014 	}
3015 
3016 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3017 		/*
3018 		 * Try to re-route interrupts. Sometimes the BIOS or
3019 		 * firmware may leave bogus values in these registers.
3020 		 * If the re-route fails, then just stick with what we
3021 		 * have.
3022 		 */
3023 		pci_assign_interrupt(bus, dev, 1);
3024 	}
3025 
3026 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3027 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3028 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3029 			xhci_early_takeover(dev);
3030 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3031 			ehci_early_takeover(dev);
3032 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3033 			ohci_early_takeover(dev);
3034 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3035 			uhci_early_takeover(dev);
3036 	}
3037 }
3038 
3039 void
3040 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3041 {
3042 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3043 	device_t pcib = device_get_parent(dev);
3044 	struct pci_devinfo *dinfo;
3045 	int maxslots;
3046 	int s, f, pcifunchigh;
3047 	uint8_t hdrtype;
3048 
3049 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3050 	    ("dinfo_size too small"));
3051 	maxslots = PCIB_MAXSLOTS(pcib);
3052 	for (s = 0; s <= maxslots; s++) {
3053 		pcifunchigh = 0;
3054 		f = 0;
3055 		DELAY(1);
3056 		hdrtype = REG(PCIR_HDRTYPE, 1);
3057 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3058 			continue;
3059 		if (hdrtype & PCIM_MFDEV)
3060 			pcifunchigh = PCI_FUNCMAX;
3061 		for (f = 0; f <= pcifunchigh; f++) {
3062 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3063 			    dinfo_size);
3064 			if (dinfo != NULL) {
3065 				pci_add_child(dev, dinfo);
3066 			}
3067 		}
3068 	}
3069 #undef REG
3070 }
3071 
3072 void
3073 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3074 {
3075 	device_t pcib;
3076 
3077 	pcib = device_get_parent(bus);
3078 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3079 	device_set_ivars(dinfo->cfg.dev, dinfo);
3080 	resource_list_init(&dinfo->resources);
3081 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3082 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3083 	pci_print_verbose(dinfo);
3084 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3085 }
3086 
3087 static int
3088 pci_probe(device_t dev)
3089 {
3090 	device_set_desc(dev, "PCI bus");
3091 
3092 	/* Allow other subclasses to override this driver. */
3093 	return (-1000);
3094 }
3095 
3096 static int
3097 pci_attach(device_t dev)
3098 {
3099 	int busno, domain;
3100 
3101 	/*
3102 	 * Since there can be multiple independantly numbered PCI
3103 	 * busses on systems with multiple PCI domains, we can't use
3104 	 * the unit number to decide which bus we are probing. We ask
3105 	 * the parent pcib what our domain and bus numbers are.
3106 	 */
3107 	domain = pcib_get_domain(dev);
3108 	busno = pcib_get_bus(dev);
3109 	if (bootverbose)
3110 		device_printf(dev, "domain=%d, physical bus=%d\n",
3111 		    domain, busno);
3112 
3113 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3114 
3115 	return (bus_generic_attach(dev));
3116 }
3117 
3118 int
3119 pci_suspend(device_t dev)
3120 {
3121 	int dstate, error, i, numdevs;
3122 	device_t acpi_dev, child, *devlist;
3123 	struct pci_devinfo *dinfo;
3124 
3125 	/*
3126 	 * Save the PCI configuration space for each child and set the
3127 	 * device in the appropriate power state for this sleep state.
3128 	 */
3129 	acpi_dev = NULL;
3130 	if (pci_do_power_resume)
3131 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3132 	device_get_children(dev, &devlist, &numdevs);
3133 	for (i = 0; i < numdevs; i++) {
3134 		child = devlist[i];
3135 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3136 		pci_cfg_save(child, dinfo, 0);
3137 	}
3138 
3139 	/* Suspend devices before potentially powering them down. */
3140 	error = bus_generic_suspend(dev);
3141 	if (error) {
3142 		kfree(devlist, M_TEMP);
3143 		return (error);
3144 	}
3145 
3146 	/*
3147 	 * Always set the device to D3.  If ACPI suggests a different
3148 	 * power state, use it instead.  If ACPI is not present, the
3149 	 * firmware is responsible for managing device power.  Skip
3150 	 * children who aren't attached since they are powered down
3151 	 * separately.  Only manage type 0 devices for now.
3152 	 */
3153 	for (i = 0; acpi_dev && i < numdevs; i++) {
3154 		child = devlist[i];
3155 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3156 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3157 			dstate = PCI_POWERSTATE_D3;
3158 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3159 			pci_set_powerstate(child, dstate);
3160 		}
3161 	}
3162 	kfree(devlist, M_TEMP);
3163 	return (0);
3164 }
3165 
3166 int
3167 pci_resume(device_t dev)
3168 {
3169 	int i, numdevs;
3170 	device_t acpi_dev, child, *devlist;
3171 	struct pci_devinfo *dinfo;
3172 
3173 	/*
3174 	 * Set each child to D0 and restore its PCI configuration space.
3175 	 */
3176 	acpi_dev = NULL;
3177 	if (pci_do_power_resume)
3178 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3179 	device_get_children(dev, &devlist, &numdevs);
3180 	for (i = 0; i < numdevs; i++) {
3181 		/*
3182 		 * Notify ACPI we're going to D0 but ignore the result.  If
3183 		 * ACPI is not present, the firmware is responsible for
3184 		 * managing device power.  Only manage type 0 devices for now.
3185 		 */
3186 		child = devlist[i];
3187 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3188 		if (acpi_dev && device_is_attached(child) &&
3189 		    dinfo->cfg.hdrtype == 0) {
3190 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3191 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3192 		}
3193 
3194 		/* Now the device is powered up, restore its config space. */
3195 		pci_cfg_restore(child, dinfo);
3196 	}
3197 	kfree(devlist, M_TEMP);
3198 	return (bus_generic_resume(dev));
3199 }
3200 
3201 static void
3202 pci_load_vendor_data(void)
3203 {
3204 	caddr_t vendordata, info;
3205 
3206 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3207 		info = preload_search_info(vendordata, MODINFO_ADDR);
3208 		pci_vendordata = *(char **)info;
3209 		info = preload_search_info(vendordata, MODINFO_SIZE);
3210 		pci_vendordata_size = *(size_t *)info;
3211 		/* terminate the database */
3212 		pci_vendordata[pci_vendordata_size] = '\n';
3213 	}
3214 }
3215 
3216 void
3217 pci_driver_added(device_t dev, driver_t *driver)
3218 {
3219 	int numdevs;
3220 	device_t *devlist;
3221 	device_t child;
3222 	struct pci_devinfo *dinfo;
3223 	int i;
3224 
3225 	if (bootverbose)
3226 		device_printf(dev, "driver added\n");
3227 	DEVICE_IDENTIFY(driver, dev);
3228 	device_get_children(dev, &devlist, &numdevs);
3229 	for (i = 0; i < numdevs; i++) {
3230 		child = devlist[i];
3231 		if (device_get_state(child) != DS_NOTPRESENT)
3232 			continue;
3233 		dinfo = device_get_ivars(child);
3234 		pci_print_verbose(dinfo);
3235 		if (bootverbose)
3236 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3237 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3238 			    dinfo->cfg.func);
3239 		pci_cfg_restore(child, dinfo);
3240 		if (device_probe_and_attach(child) != 0)
3241 			pci_cfg_save(child, dinfo, 1);
3242 	}
3243 	kfree(devlist, M_TEMP);
3244 }
3245 
3246 static void
3247 pci_child_detached(device_t parent __unused, device_t child)
3248 {
3249 	/* Turn child's power off */
3250 	pci_cfg_save(child, device_get_ivars(child), 1);
3251 }
3252 
3253 int
3254 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3255     driver_intr_t *intr, void *arg, void **cookiep,
3256     lwkt_serialize_t serializer, const char *desc)
3257 {
3258 	int rid, error;
3259 	void *cookie;
3260 
3261 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3262 	    arg, &cookie, serializer, desc);
3263 	if (error)
3264 		return (error);
3265 
3266 	/* If this is not a direct child, just bail out. */
3267 	if (device_get_parent(child) != dev) {
3268 		*cookiep = cookie;
3269 		return(0);
3270 	}
3271 
3272 	rid = rman_get_rid(irq);
3273 	if (rid == 0) {
3274 		/* Make sure that INTx is enabled */
3275 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3276 	} else {
3277 		struct pci_devinfo *dinfo = device_get_ivars(child);
3278 		uint64_t addr;
3279 		uint32_t data;
3280 
3281 		/*
3282 		 * Check to see if the interrupt is MSI or MSI-X.
3283 		 * Ask our parent to map the MSI and give
3284 		 * us the address and data register values.
3285 		 * If we fail for some reason, teardown the
3286 		 * interrupt handler.
3287 		 */
3288 		if (dinfo->cfg.msi.msi_alloc > 0) {
3289 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3290 
3291 			if (msi->msi_addr == 0) {
3292 				KASSERT(msi->msi_handlers == 0,
3293 			    ("MSI has handlers, but vectors not mapped"));
3294 				error = PCIB_MAP_MSI(device_get_parent(dev),
3295 				    child, rman_get_start(irq), &addr, &data,
3296 				    rman_get_cpuid(irq));
3297 				if (error)
3298 					goto bad;
3299 				msi->msi_addr = addr;
3300 				msi->msi_data = data;
3301 				pci_enable_msi(child, addr, data);
3302 			}
3303 			msi->msi_handlers++;
3304 		} else {
3305 			struct msix_vector *mv;
3306 			u_int vector;
3307 
3308 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3309 			    ("No MSI-X or MSI rid %d allocated", rid));
3310 
3311 			mv = pci_find_msix_vector(child, rid);
3312 			KASSERT(mv != NULL,
3313 			    ("MSI-X rid %d is not allocated", rid));
3314 			KASSERT(mv->mv_address == 0,
3315 			    ("MSI-X rid %d has been setup", rid));
3316 
3317 			error = PCIB_MAP_MSI(device_get_parent(dev),
3318 			    child, rman_get_start(irq), &addr, &data,
3319 			    rman_get_cpuid(irq));
3320 			if (error)
3321 				goto bad;
3322 			mv->mv_address = addr;
3323 			mv->mv_data = data;
3324 
3325 			vector = PCI_MSIX_RID2VEC(rid);
3326 			pci_setup_msix_vector(child, vector,
3327 			    mv->mv_address, mv->mv_data);
3328 			pci_unmask_msix_vector(child, vector);
3329 		}
3330 
3331 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3332 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3333 	bad:
3334 		if (error) {
3335 			(void)bus_generic_teardown_intr(dev, child, irq,
3336 			    cookie);
3337 			return (error);
3338 		}
3339 	}
3340 	*cookiep = cookie;
3341 	return (0);
3342 }
3343 
3344 int
3345 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3346     void *cookie)
3347 {
3348 	int rid, error;
3349 
3350 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3351 		return (EINVAL);
3352 
3353 	/* If this isn't a direct child, just bail out */
3354 	if (device_get_parent(child) != dev)
3355 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3356 
3357 	rid = rman_get_rid(irq);
3358 	if (rid == 0) {
3359 		/* Mask INTx */
3360 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3361 	} else {
3362 		struct pci_devinfo *dinfo = device_get_ivars(child);
3363 
3364 		/*
3365 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3366 		 * decrement the appropriate handlers count and mask the
3367 		 * MSI-X message, or disable MSI messages if the count
3368 		 * drops to 0.
3369 		 */
3370 		if (dinfo->cfg.msi.msi_alloc > 0) {
3371 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3372 
3373 			KASSERT(rid <= msi->msi_alloc,
3374 			    ("MSI-X index too high"));
3375 			KASSERT(msi->msi_handlers > 0,
3376 			    ("MSI rid %d is not setup", rid));
3377 
3378 			msi->msi_handlers--;
3379 			if (msi->msi_handlers == 0)
3380 				pci_disable_msi(child);
3381 		} else {
3382 			struct msix_vector *mv;
3383 
3384 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3385 			    ("No MSI or MSI-X rid %d allocated", rid));
3386 
3387 			mv = pci_find_msix_vector(child, rid);
3388 			KASSERT(mv != NULL,
3389 			    ("MSI-X rid %d is not allocated", rid));
3390 			KASSERT(mv->mv_address != 0,
3391 			    ("MSI-X rid %d has not been setup", rid));
3392 
3393 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3394 			mv->mv_address = 0;
3395 			mv->mv_data = 0;
3396 		}
3397 	}
3398 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3399 	if (rid > 0)
3400 		KASSERT(error == 0,
3401 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3402 	return (error);
3403 }
3404 
3405 int
3406 pci_print_child(device_t dev, device_t child)
3407 {
3408 	struct pci_devinfo *dinfo;
3409 	struct resource_list *rl;
3410 	int retval = 0;
3411 
3412 	dinfo = device_get_ivars(child);
3413 	rl = &dinfo->resources;
3414 
3415 	retval += bus_print_child_header(dev, child);
3416 
3417 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3418 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3419 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3420 	if (device_get_flags(dev))
3421 		retval += kprintf(" flags %#x", device_get_flags(dev));
3422 
3423 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3424 	    pci_get_function(child));
3425 
3426 	retval += bus_print_child_footer(dev, child);
3427 
3428 	return (retval);
3429 }
3430 
3431 static struct
3432 {
3433 	int	class;
3434 	int	subclass;
3435 	char	*desc;
3436 } pci_nomatch_tab[] = {
3437 	{PCIC_OLD,		-1,			"old"},
3438 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3439 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3440 	{PCIC_STORAGE,		-1,			"mass storage"},
3441 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3442 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3443 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3444 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3445 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3446 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3447 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3448 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3449 	{PCIC_NETWORK,		-1,			"network"},
3450 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3451 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3452 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3453 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3454 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3455 	{PCIC_DISPLAY,		-1,			"display"},
3456 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3457 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3458 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3459 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3460 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3461 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3462 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3463 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3464 	{PCIC_MEMORY,		-1,			"memory"},
3465 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3466 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3467 	{PCIC_BRIDGE,		-1,			"bridge"},
3468 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3469 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3470 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3471 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3472 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3473 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3474 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3475 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3476 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3477 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3478 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3479 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3480 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3481 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3482 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3483 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3484 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3485 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3486 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3487 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3488 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3489 	{PCIC_INPUTDEV,		-1,			"input device"},
3490 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3491 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3492 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3493 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3494 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3495 	{PCIC_DOCKING,		-1,			"docking station"},
3496 	{PCIC_PROCESSOR,	-1,			"processor"},
3497 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3498 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3499 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3500 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3501 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3502 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3503 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3504 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3505 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3506 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3507 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3508 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3509 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3510 	{PCIC_SATCOM,		-1,			"satellite communication"},
3511 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3512 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3513 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3514 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3515 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3516 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3517 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3518 	{PCIC_DASP,		-1,			"dasp"},
3519 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3520 	{0, 0,		NULL}
3521 };
3522 
3523 void
3524 pci_probe_nomatch(device_t dev, device_t child)
3525 {
3526 	int	i;
3527 	char	*cp, *scp, *device;
3528 
3529 	/*
3530 	 * Look for a listing for this device in a loaded device database.
3531 	 */
3532 	if ((device = pci_describe_device(child)) != NULL) {
3533 		device_printf(dev, "<%s>", device);
3534 		kfree(device, M_DEVBUF);
3535 	} else {
3536 		/*
3537 		 * Scan the class/subclass descriptions for a general
3538 		 * description.
3539 		 */
3540 		cp = "unknown";
3541 		scp = NULL;
3542 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3543 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3544 				if (pci_nomatch_tab[i].subclass == -1) {
3545 					cp = pci_nomatch_tab[i].desc;
3546 				} else if (pci_nomatch_tab[i].subclass ==
3547 				    pci_get_subclass(child)) {
3548 					scp = pci_nomatch_tab[i].desc;
3549 				}
3550 			}
3551 		}
3552 		device_printf(dev, "<%s%s%s>",
3553 		    cp ? cp : "",
3554 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3555 		    scp ? scp : "");
3556 	}
3557 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3558 		pci_get_vendor(child), pci_get_device(child),
3559 		pci_get_slot(child), pci_get_function(child));
3560 	if (pci_get_intpin(child) > 0) {
3561 		int irq;
3562 
3563 		irq = pci_get_irq(child);
3564 		if (PCI_INTERRUPT_VALID(irq))
3565 			kprintf(" irq %d", irq);
3566 	}
3567 	kprintf("\n");
3568 
3569 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3570 }
3571 
3572 /*
3573  * Parse the PCI device database, if loaded, and return a pointer to a
3574  * description of the device.
3575  *
3576  * The database is flat text formatted as follows:
3577  *
3578  * Any line not in a valid format is ignored.
3579  * Lines are terminated with newline '\n' characters.
3580  *
3581  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3582  * the vendor name.
3583  *
3584  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3585  * - devices cannot be listed without a corresponding VENDOR line.
3586  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3587  * another TAB, then the device name.
3588  */
3589 
3590 /*
3591  * Assuming (ptr) points to the beginning of a line in the database,
3592  * return the vendor or device and description of the next entry.
3593  * The value of (vendor) or (device) inappropriate for the entry type
3594  * is set to -1.  Returns nonzero at the end of the database.
3595  *
3596  * Note that this is slightly unrobust in the face of corrupt data;
3597  * we attempt to safeguard against this by spamming the end of the
3598  * database with a newline when we initialise.
3599  */
3600 static int
3601 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3602 {
3603 	char	*cp = *ptr;
3604 	int	left;
3605 
3606 	*device = -1;
3607 	*vendor = -1;
3608 	**desc = '\0';
3609 	for (;;) {
3610 		left = pci_vendordata_size - (cp - pci_vendordata);
3611 		if (left <= 0) {
3612 			*ptr = cp;
3613 			return(1);
3614 		}
3615 
3616 		/* vendor entry? */
3617 		if (*cp != '\t' &&
3618 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3619 			break;
3620 		/* device entry? */
3621 		if (*cp == '\t' &&
3622 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3623 			break;
3624 
3625 		/* skip to next line */
3626 		while (*cp != '\n' && left > 0) {
3627 			cp++;
3628 			left--;
3629 		}
3630 		if (*cp == '\n') {
3631 			cp++;
3632 			left--;
3633 		}
3634 	}
3635 	/* skip to next line */
3636 	while (*cp != '\n' && left > 0) {
3637 		cp++;
3638 		left--;
3639 	}
3640 	if (*cp == '\n' && left > 0)
3641 		cp++;
3642 	*ptr = cp;
3643 	return(0);
3644 }
3645 
3646 static char *
3647 pci_describe_device(device_t dev)
3648 {
3649 	int	vendor, device;
3650 	char	*desc, *vp, *dp, *line;
3651 
3652 	desc = vp = dp = NULL;
3653 
3654 	/*
3655 	 * If we have no vendor data, we can't do anything.
3656 	 */
3657 	if (pci_vendordata == NULL)
3658 		goto out;
3659 
3660 	/*
3661 	 * Scan the vendor data looking for this device
3662 	 */
3663 	line = pci_vendordata;
3664 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3665 		goto out;
3666 	for (;;) {
3667 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3668 			goto out;
3669 		if (vendor == pci_get_vendor(dev))
3670 			break;
3671 	}
3672 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3673 		goto out;
3674 	for (;;) {
3675 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3676 			*dp = 0;
3677 			break;
3678 		}
3679 		if (vendor != -1) {
3680 			*dp = 0;
3681 			break;
3682 		}
3683 		if (device == pci_get_device(dev))
3684 			break;
3685 	}
3686 	if (dp[0] == '\0')
3687 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3688 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3689 	    NULL)
3690 		ksprintf(desc, "%s, %s", vp, dp);
3691  out:
3692 	if (vp != NULL)
3693 		kfree(vp, M_DEVBUF);
3694 	if (dp != NULL)
3695 		kfree(dp, M_DEVBUF);
3696 	return(desc);
3697 }
3698 
3699 int
3700 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3701 {
3702 	struct pci_devinfo *dinfo;
3703 	pcicfgregs *cfg;
3704 
3705 	dinfo = device_get_ivars(child);
3706 	cfg = &dinfo->cfg;
3707 
3708 	switch (which) {
3709 	case PCI_IVAR_ETHADDR:
3710 		/*
3711 		 * The generic accessor doesn't deal with failure, so
3712 		 * we set the return value, then return an error.
3713 		 */
3714 		*((uint8_t **) result) = NULL;
3715 		return (EINVAL);
3716 	case PCI_IVAR_SUBVENDOR:
3717 		*result = cfg->subvendor;
3718 		break;
3719 	case PCI_IVAR_SUBDEVICE:
3720 		*result = cfg->subdevice;
3721 		break;
3722 	case PCI_IVAR_VENDOR:
3723 		*result = cfg->vendor;
3724 		break;
3725 	case PCI_IVAR_DEVICE:
3726 		*result = cfg->device;
3727 		break;
3728 	case PCI_IVAR_DEVID:
3729 		*result = (cfg->device << 16) | cfg->vendor;
3730 		break;
3731 	case PCI_IVAR_CLASS:
3732 		*result = cfg->baseclass;
3733 		break;
3734 	case PCI_IVAR_SUBCLASS:
3735 		*result = cfg->subclass;
3736 		break;
3737 	case PCI_IVAR_PROGIF:
3738 		*result = cfg->progif;
3739 		break;
3740 	case PCI_IVAR_REVID:
3741 		*result = cfg->revid;
3742 		break;
3743 	case PCI_IVAR_INTPIN:
3744 		*result = cfg->intpin;
3745 		break;
3746 	case PCI_IVAR_IRQ:
3747 		*result = cfg->intline;
3748 		break;
3749 	case PCI_IVAR_DOMAIN:
3750 		*result = cfg->domain;
3751 		break;
3752 	case PCI_IVAR_BUS:
3753 		*result = cfg->bus;
3754 		break;
3755 	case PCI_IVAR_SLOT:
3756 		*result = cfg->slot;
3757 		break;
3758 	case PCI_IVAR_FUNCTION:
3759 		*result = cfg->func;
3760 		break;
3761 	case PCI_IVAR_CMDREG:
3762 		*result = cfg->cmdreg;
3763 		break;
3764 	case PCI_IVAR_CACHELNSZ:
3765 		*result = cfg->cachelnsz;
3766 		break;
3767 	case PCI_IVAR_MINGNT:
3768 		*result = cfg->mingnt;
3769 		break;
3770 	case PCI_IVAR_MAXLAT:
3771 		*result = cfg->maxlat;
3772 		break;
3773 	case PCI_IVAR_LATTIMER:
3774 		*result = cfg->lattimer;
3775 		break;
3776 	case PCI_IVAR_PCIXCAP_PTR:
3777 		*result = cfg->pcix.pcix_ptr;
3778 		break;
3779 	case PCI_IVAR_PCIECAP_PTR:
3780 		*result = cfg->expr.expr_ptr;
3781 		break;
3782 	case PCI_IVAR_VPDCAP_PTR:
3783 		*result = cfg->vpd.vpd_reg;
3784 		break;
3785 	default:
3786 		return (ENOENT);
3787 	}
3788 	return (0);
3789 }
3790 
3791 int
3792 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3793 {
3794 	struct pci_devinfo *dinfo;
3795 
3796 	dinfo = device_get_ivars(child);
3797 
3798 	switch (which) {
3799 	case PCI_IVAR_INTPIN:
3800 		dinfo->cfg.intpin = value;
3801 		return (0);
3802 	case PCI_IVAR_ETHADDR:
3803 	case PCI_IVAR_SUBVENDOR:
3804 	case PCI_IVAR_SUBDEVICE:
3805 	case PCI_IVAR_VENDOR:
3806 	case PCI_IVAR_DEVICE:
3807 	case PCI_IVAR_DEVID:
3808 	case PCI_IVAR_CLASS:
3809 	case PCI_IVAR_SUBCLASS:
3810 	case PCI_IVAR_PROGIF:
3811 	case PCI_IVAR_REVID:
3812 	case PCI_IVAR_IRQ:
3813 	case PCI_IVAR_DOMAIN:
3814 	case PCI_IVAR_BUS:
3815 	case PCI_IVAR_SLOT:
3816 	case PCI_IVAR_FUNCTION:
3817 		return (EINVAL);	/* disallow for now */
3818 
3819 	default:
3820 		return (ENOENT);
3821 	}
3822 }
3823 #ifdef notyet
3824 #include "opt_ddb.h"
3825 #ifdef DDB
3826 #include <ddb/ddb.h>
3827 #include <sys/cons.h>
3828 
3829 /*
3830  * List resources based on pci map registers, used for within ddb
3831  */
3832 
3833 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3834 {
3835 	struct pci_devinfo *dinfo;
3836 	struct devlist *devlist_head;
3837 	struct pci_conf *p;
3838 	const char *name;
3839 	int i, error, none_count;
3840 
3841 	none_count = 0;
3842 	/* get the head of the device queue */
3843 	devlist_head = &pci_devq;
3844 
3845 	/*
3846 	 * Go through the list of devices and print out devices
3847 	 */
3848 	for (error = 0, i = 0,
3849 	     dinfo = STAILQ_FIRST(devlist_head);
3850 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3851 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3852 
3853 		/* Populate pd_name and pd_unit */
3854 		name = NULL;
3855 		if (dinfo->cfg.dev)
3856 			name = device_get_name(dinfo->cfg.dev);
3857 
3858 		p = &dinfo->conf;
3859 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3860 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3861 			(name && *name) ? name : "none",
3862 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3863 			none_count++,
3864 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3865 			p->pc_sel.pc_func, (p->pc_class << 16) |
3866 			(p->pc_subclass << 8) | p->pc_progif,
3867 			(p->pc_subdevice << 16) | p->pc_subvendor,
3868 			(p->pc_device << 16) | p->pc_vendor,
3869 			p->pc_revid, p->pc_hdr);
3870 	}
3871 }
3872 #endif /* DDB */
3873 #endif
3874 
3875 static struct resource *
3876 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3877     u_long start, u_long end, u_long count, u_int flags)
3878 {
3879 	struct pci_devinfo *dinfo = device_get_ivars(child);
3880 	struct resource_list *rl = &dinfo->resources;
3881 	struct resource_list_entry *rle;
3882 	struct resource *res;
3883 	pci_addr_t map, testval;
3884 	int mapsize;
3885 
3886 	/*
3887 	 * Weed out the bogons, and figure out how large the BAR/map
3888 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3889 	 * Note: atapci in legacy mode are special and handled elsewhere
3890 	 * in the code.  If you have a atapci device in legacy mode and
3891 	 * it fails here, that other code is broken.
3892 	 */
3893 	res = NULL;
3894 	map = pci_read_config(child, *rid, 4);
3895 	pci_write_config(child, *rid, 0xffffffff, 4);
3896 	testval = pci_read_config(child, *rid, 4);
3897 	if (pci_maprange(testval) == 64)
3898 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3899 	if (pci_mapbase(testval) == 0)
3900 		goto out;
3901 
3902 	/*
3903 	 * Restore the original value of the BAR.  We may have reprogrammed
3904 	 * the BAR of the low-level console device and when booting verbose,
3905 	 * we need the console device addressable.
3906 	 */
3907 	pci_write_config(child, *rid, map, 4);
3908 
3909 	if (PCI_BAR_MEM(testval)) {
3910 		if (type != SYS_RES_MEMORY) {
3911 			if (bootverbose)
3912 				device_printf(dev,
3913 				    "child %s requested type %d for rid %#x,"
3914 				    " but the BAR says it is an memio\n",
3915 				    device_get_nameunit(child), type, *rid);
3916 			goto out;
3917 		}
3918 	} else {
3919 		if (type != SYS_RES_IOPORT) {
3920 			if (bootverbose)
3921 				device_printf(dev,
3922 				    "child %s requested type %d for rid %#x,"
3923 				    " but the BAR says it is an ioport\n",
3924 				    device_get_nameunit(child), type, *rid);
3925 			goto out;
3926 		}
3927 	}
3928 	/*
3929 	 * For real BARs, we need to override the size that
3930 	 * the driver requests, because that's what the BAR
3931 	 * actually uses and we would otherwise have a
3932 	 * situation where we might allocate the excess to
3933 	 * another driver, which won't work.
3934 	 */
3935 	mapsize = pci_mapsize(testval);
3936 	count = 1UL << mapsize;
3937 	if (RF_ALIGNMENT(flags) < mapsize)
3938 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3939 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3940 		flags |= RF_PREFETCHABLE;
3941 
3942 	/*
3943 	 * Allocate enough resource, and then write back the
3944 	 * appropriate bar for that resource.
3945 	 */
3946 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3947 	    start, end, count, flags, -1);
3948 	if (res == NULL) {
3949 		device_printf(child,
3950 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3951 		    count, *rid, type, start, end);
3952 		goto out;
3953 	}
3954 	resource_list_add(rl, type, *rid, start, end, count, -1);
3955 	rle = resource_list_find(rl, type, *rid);
3956 	if (rle == NULL)
3957 		panic("pci_alloc_map: unexpectedly can't find resource.");
3958 	rle->res = res;
3959 	rle->start = rman_get_start(res);
3960 	rle->end = rman_get_end(res);
3961 	rle->count = count;
3962 	if (bootverbose)
3963 		device_printf(child,
3964 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3965 		    count, *rid, type, rman_get_start(res));
3966 	map = rman_get_start(res);
3967 out:;
3968 	pci_write_config(child, *rid, map, 4);
3969 	if (pci_maprange(testval) == 64)
3970 		pci_write_config(child, *rid + 4, map >> 32, 4);
3971 	return (res);
3972 }
3973 
3974 
3975 struct resource *
3976 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3977     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3978 {
3979 	struct pci_devinfo *dinfo = device_get_ivars(child);
3980 	struct resource_list *rl = &dinfo->resources;
3981 	struct resource_list_entry *rle;
3982 	pcicfgregs *cfg = &dinfo->cfg;
3983 
3984 	/*
3985 	 * Perform lazy resource allocation
3986 	 */
3987 	if (device_get_parent(child) == dev) {
3988 		switch (type) {
3989 		case SYS_RES_IRQ:
3990 			/*
3991 			 * Can't alloc legacy interrupt once MSI messages
3992 			 * have been allocated.
3993 			 */
3994 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3995 			    cfg->msix.msix_alloc > 0))
3996 				return (NULL);
3997 			/*
3998 			 * If the child device doesn't have an
3999 			 * interrupt routed and is deserving of an
4000 			 * interrupt, try to assign it one.
4001 			 */
4002 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4003 			    (cfg->intpin != 0))
4004 				pci_assign_interrupt(dev, child, 0);
4005 			break;
4006 		case SYS_RES_IOPORT:
4007 		case SYS_RES_MEMORY:
4008 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4009 				/*
4010 				 * Enable the I/O mode.  We should
4011 				 * also be assigning resources too
4012 				 * when none are present.  The
4013 				 * resource_list_alloc kind of sorta does
4014 				 * this...
4015 				 */
4016 				if (PCI_ENABLE_IO(dev, child, type))
4017 					return (NULL);
4018 			}
4019 			rle = resource_list_find(rl, type, *rid);
4020 			if (rle == NULL)
4021 				return (pci_alloc_map(dev, child, type, rid,
4022 				    start, end, count, flags));
4023 			break;
4024 		}
4025 		/*
4026 		 * If we've already allocated the resource, then
4027 		 * return it now.  But first we may need to activate
4028 		 * it, since we don't allocate the resource as active
4029 		 * above.  Normally this would be done down in the
4030 		 * nexus, but since we short-circuit that path we have
4031 		 * to do its job here.  Not sure if we should kfree the
4032 		 * resource if it fails to activate.
4033 		 */
4034 		rle = resource_list_find(rl, type, *rid);
4035 		if (rle != NULL && rle->res != NULL) {
4036 			if (bootverbose)
4037 				device_printf(child,
4038 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4039 				    rman_get_size(rle->res), *rid, type,
4040 				    rman_get_start(rle->res));
4041 			if ((flags & RF_ACTIVE) &&
4042 			    bus_generic_activate_resource(dev, child, type,
4043 			    *rid, rle->res) != 0)
4044 				return (NULL);
4045 			return (rle->res);
4046 		}
4047 	}
4048 	return (resource_list_alloc(rl, dev, child, type, rid,
4049 	    start, end, count, flags, cpuid));
4050 }
4051 
4052 void
4053 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4054 {
4055 	struct pci_devinfo *dinfo;
4056 	struct resource_list *rl;
4057 	struct resource_list_entry *rle;
4058 
4059 	if (device_get_parent(child) != dev)
4060 		return;
4061 
4062 	dinfo = device_get_ivars(child);
4063 	rl = &dinfo->resources;
4064 	rle = resource_list_find(rl, type, rid);
4065 	if (rle) {
4066 		if (rle->res) {
4067 			if (rman_get_device(rle->res) != dev ||
4068 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4069 				device_printf(dev, "delete_resource: "
4070 				    "Resource still owned by child, oops. "
4071 				    "(type=%d, rid=%d, addr=%lx)\n",
4072 				    rle->type, rle->rid,
4073 				    rman_get_start(rle->res));
4074 				return;
4075 			}
4076 			bus_release_resource(dev, type, rid, rle->res);
4077 		}
4078 		resource_list_delete(rl, type, rid);
4079 	}
4080 	/*
4081 	 * Why do we turn off the PCI configuration BAR when we delete a
4082 	 * resource? -- imp
4083 	 */
4084 	pci_write_config(child, rid, 0, 4);
4085 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4086 }
4087 
4088 struct resource_list *
4089 pci_get_resource_list (device_t dev, device_t child)
4090 {
4091 	struct pci_devinfo *dinfo = device_get_ivars(child);
4092 
4093 	if (dinfo == NULL)
4094 		return (NULL);
4095 
4096 	return (&dinfo->resources);
4097 }
4098 
4099 uint32_t
4100 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4101 {
4102 	struct pci_devinfo *dinfo = device_get_ivars(child);
4103 	pcicfgregs *cfg = &dinfo->cfg;
4104 
4105 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4106 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4107 }
4108 
4109 void
4110 pci_write_config_method(device_t dev, device_t child, int reg,
4111     uint32_t val, int width)
4112 {
4113 	struct pci_devinfo *dinfo = device_get_ivars(child);
4114 	pcicfgregs *cfg = &dinfo->cfg;
4115 
4116 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4117 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4118 }
4119 
4120 int
4121 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4122     size_t buflen)
4123 {
4124 
4125 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4126 	    pci_get_function(child));
4127 	return (0);
4128 }
4129 
4130 int
4131 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4132     size_t buflen)
4133 {
4134 	struct pci_devinfo *dinfo;
4135 	pcicfgregs *cfg;
4136 
4137 	dinfo = device_get_ivars(child);
4138 	cfg = &dinfo->cfg;
4139 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4140 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4141 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4142 	    cfg->progif);
4143 	return (0);
4144 }
4145 
4146 int
4147 pci_assign_interrupt_method(device_t dev, device_t child)
4148 {
4149 	struct pci_devinfo *dinfo = device_get_ivars(child);
4150 	pcicfgregs *cfg = &dinfo->cfg;
4151 
4152 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4153 	    cfg->intpin));
4154 }
4155 
4156 static int
4157 pci_modevent(module_t mod, int what, void *arg)
4158 {
4159 	static struct cdev *pci_cdev;
4160 
4161 	switch (what) {
4162 	case MOD_LOAD:
4163 		STAILQ_INIT(&pci_devq);
4164 		pci_generation = 0;
4165 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4166 				    "pci");
4167 		pci_load_vendor_data();
4168 		break;
4169 
4170 	case MOD_UNLOAD:
4171 		destroy_dev(pci_cdev);
4172 		break;
4173 	}
4174 
4175 	return (0);
4176 }
4177 
4178 void
4179 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4180 {
4181 	int i;
4182 
4183 	/*
4184 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4185 	 * which we know need special treatment.  Type 2 devices are
4186 	 * cardbus bridges which also require special treatment.
4187 	 * Other types are unknown, and we err on the side of safety
4188 	 * by ignoring them.
4189 	 */
4190 	if (dinfo->cfg.hdrtype != 0)
4191 		return;
4192 
4193 	/*
4194 	 * Restore the device to full power mode.  We must do this
4195 	 * before we restore the registers because moving from D3 to
4196 	 * D0 will cause the chip's BARs and some other registers to
4197 	 * be reset to some unknown power on reset values.  Cut down
4198 	 * the noise on boot by doing nothing if we are already in
4199 	 * state D0.
4200 	 */
4201 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4202 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4203 	}
4204 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4205 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4206 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4207 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4208 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4209 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4210 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4211 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4212 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4213 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4214 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4215 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4216 
4217 	/* Restore MSI and MSI-X configurations if they are present. */
4218 	if (dinfo->cfg.msi.msi_location != 0)
4219 		pci_resume_msi(dev);
4220 	if (dinfo->cfg.msix.msix_location != 0)
4221 		pci_resume_msix(dev);
4222 }
4223 
4224 void
4225 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4226 {
4227 	int i;
4228 	uint32_t cls;
4229 	int ps;
4230 
4231 	/*
4232 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4233 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4234 	 * which also require special treatment.  Other types are unknown, and
4235 	 * we err on the side of safety by ignoring them.  Powering down
4236 	 * bridges should not be undertaken lightly.
4237 	 */
4238 	if (dinfo->cfg.hdrtype != 0)
4239 		return;
4240 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4241 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4242 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4243 
4244 	/*
4245 	 * Some drivers apparently write to these registers w/o updating our
4246 	 * cached copy.  No harm happens if we update the copy, so do so here
4247 	 * so we can restore them.  The COMMAND register is modified by the
4248 	 * bus w/o updating the cache.  This should represent the normally
4249 	 * writable portion of the 'defined' part of type 0 headers.  In
4250 	 * theory we also need to save/restore the PCI capability structures
4251 	 * we know about, but apart from power we don't know any that are
4252 	 * writable.
4253 	 */
4254 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4255 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4256 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4257 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4258 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4259 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4260 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4261 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4262 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4263 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4264 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4265 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4266 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4267 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4268 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4269 
4270 	/*
4271 	 * don't set the state for display devices, base peripherals and
4272 	 * memory devices since bad things happen when they are powered down.
4273 	 * We should (a) have drivers that can easily detach and (b) use
4274 	 * generic drivers for these devices so that some device actually
4275 	 * attaches.  We need to make sure that when we implement (a) we don't
4276 	 * power the device down on a reattach.
4277 	 */
4278 	cls = pci_get_class(dev);
4279 	if (!setstate)
4280 		return;
4281 	switch (pci_do_power_nodriver)
4282 	{
4283 		case 0:		/* NO powerdown at all */
4284 			return;
4285 		case 1:		/* Conservative about what to power down */
4286 			if (cls == PCIC_STORAGE)
4287 				return;
4288 			/*FALLTHROUGH*/
4289 		case 2:		/* Agressive about what to power down */
4290 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4291 			    cls == PCIC_BASEPERIPH)
4292 				return;
4293 			/*FALLTHROUGH*/
4294 		case 3:		/* Power down everything */
4295 			break;
4296 	}
4297 	/*
4298 	 * PCI spec says we can only go into D3 state from D0 state.
4299 	 * Transition from D[12] into D0 before going to D3 state.
4300 	 */
4301 	ps = pci_get_powerstate(dev);
4302 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4303 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4304 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4305 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4306 }
4307 
4308 int
4309 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4310 {
4311 	int rid, type;
4312 	u_int flags;
4313 
4314 	rid = 0;
4315 	type = PCI_INTR_TYPE_LEGACY;
4316 	flags = RF_SHAREABLE | RF_ACTIVE;
4317 
4318 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4319 	if (msi_enable) {
4320 		int cpu;
4321 
4322 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4323 		if (cpu >= ncpus)
4324 			cpu = ncpus - 1;
4325 
4326 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4327 			flags &= ~RF_SHAREABLE;
4328 			type = PCI_INTR_TYPE_MSI;
4329 		}
4330 	}
4331 
4332 	*rid0 = rid;
4333 	*flags0 = flags;
4334 
4335 	return type;
4336 }
4337 
4338 /* Wrapper APIs suitable for device driver use. */
4339 void
4340 pci_save_state(device_t dev)
4341 {
4342 	struct pci_devinfo *dinfo;
4343 
4344 	dinfo = device_get_ivars(dev);
4345 	pci_cfg_save(dev, dinfo, 0);
4346 }
4347 
4348 void
4349 pci_restore_state(device_t dev)
4350 {
4351 	struct pci_devinfo *dinfo;
4352 
4353 	dinfo = device_get_ivars(dev);
4354 	pci_cfg_restore(dev, dinfo);
4355 }
4356