xref: /dragonfly/sys/bus/pci/pci.c (revision 9f47dde1)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/linker.h>
38 #include <sys/fcntl.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/endian.h>
44 #include <sys/machintr.h>
45 
46 #include <machine/msi_machdep.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 #include <sys/device.h>
55 
56 #include <sys/pciio.h>
57 #include <bus/pci/pcireg.h>
58 #include <bus/pci/pcivar.h>
59 #include <bus/pci/pci_private.h>
60 
61 #include <bus/u4b/controller/xhcireg.h>
62 #include <bus/u4b/controller/ehcireg.h>
63 #include <bus/u4b/controller/ohcireg.h>
64 #include <bus/u4b/controller/uhcireg.h>
65 
66 #include "pcib_if.h"
67 #include "pci_if.h"
68 
69 #ifdef __HAVE_ACPI
70 #include <contrib/dev/acpica/acpi.h>
71 #include "acpi_if.h"
72 #else
73 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
74 #endif
75 
76 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
77 
78 static uint32_t		pci_mapbase(unsigned mapreg);
79 static const char	*pci_maptype(unsigned mapreg);
80 static int		pci_mapsize(unsigned testval);
81 static int		pci_maprange(unsigned mapreg);
82 static void		pci_fixancient(pcicfgregs *cfg);
83 
84 static int		pci_porten(device_t pcib, int b, int s, int f);
85 static int		pci_memen(device_t pcib, int b, int s, int f);
86 static void		pci_assign_interrupt(device_t bus, device_t dev,
87 			    int force_route);
88 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
89 			    int b, int s, int f, int reg,
90 			    struct resource_list *rl, int force, int prefetch);
91 static int		pci_probe(device_t dev);
92 static int		pci_attach(device_t dev);
93 static void		pci_child_detached(device_t, device_t);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_setup_msix_vector(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix_vector(device_t dev, u_int index);
115 static void		pci_unmask_msix_vector(device_t dev, u_int index);
116 static void		pci_mask_msix_allvectors(device_t dev);
117 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pcie_slotimpl(const pcicfgregs *);
122 static void		pci_print_verbose_expr(const pcicfgregs *);
123 
124 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_subvendor(device_t, int, int,
130 			    pcicfgregs *);
131 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
132 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
133 
134 static device_method_t pci_methods[] = {
135 	/* Device interface */
136 	DEVMETHOD(device_probe,		pci_probe),
137 	DEVMETHOD(device_attach,	pci_attach),
138 	DEVMETHOD(device_detach,	bus_generic_detach),
139 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
140 	DEVMETHOD(device_suspend,	pci_suspend),
141 	DEVMETHOD(device_resume,	pci_resume),
142 
143 	/* Bus interface */
144 	DEVMETHOD(bus_print_child,	pci_print_child),
145 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
146 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
147 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
148 	DEVMETHOD(bus_driver_added,	pci_driver_added),
149 	DEVMETHOD(bus_child_detached,	pci_child_detached),
150 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
151 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
152 
153 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
154 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
155 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
156 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
157 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
158 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
159 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
160 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 
164 	/* PCI interface */
165 	DEVMETHOD(pci_read_config,	pci_read_config_method),
166 	DEVMETHOD(pci_write_config,	pci_write_config_method),
167 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
168 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
169 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
170 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
171 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
172 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
173 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
174 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
175 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
179 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
180 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
181 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
182 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
183 
184 	DEVMETHOD_END
185 };
186 
187 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
188 
189 static devclass_t pci_devclass;
190 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
191 MODULE_VERSION(pci, 1);
192 
193 static char	*pci_vendordata;
194 static size_t	pci_vendordata_size;
195 
196 
197 static const struct pci_read_cap {
198 	int		cap;
199 	pci_read_cap_t	read_cap;
200 } pci_read_caps[] = {
201 	{ PCIY_PMG,		pci_read_cap_pmgt },
202 	{ PCIY_HT,		pci_read_cap_ht },
203 	{ PCIY_MSI,		pci_read_cap_msi },
204 	{ PCIY_MSIX,		pci_read_cap_msix },
205 	{ PCIY_VPD,		pci_read_cap_vpd },
206 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
207 	{ PCIY_PCIX,		pci_read_cap_pcix },
208 	{ PCIY_EXPRESS,		pci_read_cap_express },
209 	{ 0, NULL } /* required last entry */
210 };
211 
212 struct pci_quirk {
213 	uint32_t devid;	/* Vendor/device of the card */
214 	int	type;
215 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
216 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
217 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
218 	int	arg1;
219 	int	arg2;
220 };
221 
222 struct pci_quirk pci_quirks[] = {
223 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
224 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
227 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 
229 	/*
230 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
231 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
232 	 */
233 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work on earlier Intel chipsets including
238 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
239 	 */
240 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	/*
249 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
250 	 * bridge.
251 	 */
252 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
256 	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
257 	 * of the command register is set.
258 	 */
259 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
260 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
261 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
262 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
263 	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
264 
265 	{ 0 }
266 };
267 
268 /* map register information */
269 #define	PCI_MAPMEM	0x01	/* memory map */
270 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
271 #define	PCI_MAPPORT	0x04	/* port map */
272 
273 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
274 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
275 
276 struct devlist pci_devq;
277 uint32_t pci_generation;
278 uint32_t pci_numdevs = 0;
279 static int pcie_chipset, pcix_chipset;
280 
281 /* sysctl vars */
282 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
283 
284 static int pci_enable_io_modes = 1;
285 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
287     &pci_enable_io_modes, 1,
288     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
289 enable these bits correctly.  We'd like to do this all the time, but there\n\
290 are some peripherals that this causes problems with.");
291 
292 static int pci_do_power_nodriver = 0;
293 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
294 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
295     &pci_do_power_nodriver, 0,
296   "Place a function into D3 state when no driver attaches to it.  0 means\n\
297 disable.  1 means conservatively place devices into D3 state.  2 means\n\
298 aggressively place devices into D3 state.  3 means put absolutely everything\n\
299 in D3 state.");
300 
301 static int pci_do_power_resume = 1;
302 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
303 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
304     &pci_do_power_resume, 1,
305   "Transition from D3 -> D0 on resume.");
306 
307 static int pci_do_msi = 1;
308 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
309 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
310     "Enable support for MSI interrupts");
311 
312 static int pci_do_msix = 1;
313 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
314 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
315     "Enable support for MSI-X interrupts");
316 
317 static int pci_honor_msi_blacklist = 1;
318 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
319 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
320     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
321 
322 #if defined(__x86_64__)
323 static int pci_usb_takeover = 1;
324 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
325 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
326     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
327 Disable this if you depend on BIOS emulation of USB devices, that is\n\
328 you use USB devices (like keyboard or mouse) but do not load USB drivers");
329 #endif
330 
331 static int pci_msi_cpuid;
332 
333 static int
334 pci_has_quirk(uint32_t devid, int quirk)
335 {
336 	const struct pci_quirk *q;
337 
338 	for (q = &pci_quirks[0]; q->devid; q++) {
339 		if (q->devid == devid && q->type == quirk)
340 			return (1);
341 	}
342 	return (0);
343 }
344 
345 /* Find a device_t by bus/slot/function in domain 0 */
346 
347 device_t
348 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
349 {
350 
351 	return (pci_find_dbsf(0, bus, slot, func));
352 }
353 
354 /* Find a device_t by domain/bus/slot/function */
355 
356 device_t
357 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
358 {
359 	struct pci_devinfo *dinfo;
360 
361 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
362 		if ((dinfo->cfg.domain == domain) &&
363 		    (dinfo->cfg.bus == bus) &&
364 		    (dinfo->cfg.slot == slot) &&
365 		    (dinfo->cfg.func == func)) {
366 			return (dinfo->cfg.dev);
367 		}
368 	}
369 
370 	return (NULL);
371 }
372 
373 /* Find a device_t by vendor/device ID */
374 
375 device_t
376 pci_find_device(uint16_t vendor, uint16_t device)
377 {
378 	struct pci_devinfo *dinfo;
379 
380 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
381 		if ((dinfo->cfg.vendor == vendor) &&
382 		    (dinfo->cfg.device == device)) {
383 			return (dinfo->cfg.dev);
384 		}
385 	}
386 
387 	return (NULL);
388 }
389 
390 device_t
391 pci_find_class(uint8_t class, uint8_t subclass)
392 {
393 	struct pci_devinfo *dinfo;
394 
395 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
396 		if (dinfo->cfg.baseclass == class &&
397 		    dinfo->cfg.subclass == subclass) {
398 			return (dinfo->cfg.dev);
399 		}
400 	}
401 
402 	return (NULL);
403 }
404 
405 device_t
406 pci_iterate_class(struct pci_devinfo **dinfop, uint8_t class, uint8_t subclass)
407 {
408 	struct pci_devinfo *dinfo;
409 
410 	if (*dinfop)
411 		dinfo = STAILQ_NEXT(*dinfop, pci_links);
412 	else
413 		dinfo = STAILQ_FIRST(&pci_devq);
414 
415 	while (dinfo) {
416 		if (dinfo->cfg.baseclass == class &&
417 		    dinfo->cfg.subclass == subclass) {
418 			*dinfop = dinfo;
419 			return (dinfo->cfg.dev);
420 		}
421 		dinfo = STAILQ_NEXT(dinfo, pci_links);
422 	}
423 	*dinfop = NULL;
424 	return (NULL);
425 }
426 
427 /* return base address of memory or port map */
428 
429 static uint32_t
430 pci_mapbase(uint32_t mapreg)
431 {
432 
433 	if (PCI_BAR_MEM(mapreg))
434 		return (mapreg & PCIM_BAR_MEM_BASE);
435 	else
436 		return (mapreg & PCIM_BAR_IO_BASE);
437 }
438 
439 /* return map type of memory or port map */
440 
441 static const char *
442 pci_maptype(unsigned mapreg)
443 {
444 
445 	if (PCI_BAR_IO(mapreg))
446 		return ("I/O Port");
447 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
448 		return ("Prefetchable Memory");
449 	return ("Memory");
450 }
451 
452 /* return log2 of map size decoded for memory or port map */
453 
454 static int
455 pci_mapsize(uint32_t testval)
456 {
457 	int ln2size;
458 
459 	testval = pci_mapbase(testval);
460 	ln2size = 0;
461 	if (testval != 0) {
462 		while ((testval & 1) == 0)
463 		{
464 			ln2size++;
465 			testval >>= 1;
466 		}
467 	}
468 	return (ln2size);
469 }
470 
471 /* return log2 of address range supported by map register */
472 
473 static int
474 pci_maprange(unsigned mapreg)
475 {
476 	int ln2range = 0;
477 
478 	if (PCI_BAR_IO(mapreg))
479 		ln2range = 32;
480 	else
481 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
482 		case PCIM_BAR_MEM_32:
483 			ln2range = 32;
484 			break;
485 		case PCIM_BAR_MEM_1MB:
486 			ln2range = 20;
487 			break;
488 		case PCIM_BAR_MEM_64:
489 			ln2range = 64;
490 			break;
491 		}
492 	return (ln2range);
493 }
494 
495 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
496 
497 static void
498 pci_fixancient(pcicfgregs *cfg)
499 {
500 	if (cfg->hdrtype != 0)
501 		return;
502 
503 	/* PCI to PCI bridges use header type 1 */
504 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
505 		cfg->hdrtype = 1;
506 }
507 
508 /* extract header type specific config data */
509 
510 static void
511 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
512 {
513 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
514 	switch (cfg->hdrtype) {
515 	case 0:
516 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
517 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
518 		cfg->nummaps	    = PCI_MAXMAPS_0;
519 		break;
520 	case 1:
521 		cfg->nummaps	    = PCI_MAXMAPS_1;
522 		break;
523 	case 2:
524 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
525 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
526 		cfg->nummaps	    = PCI_MAXMAPS_2;
527 		break;
528 	}
529 #undef REG
530 }
531 
532 /* read configuration header into pcicfgregs structure */
533 struct pci_devinfo *
534 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
535 {
536 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
537 	pcicfgregs *cfg = NULL;
538 	struct pci_devinfo *devlist_entry;
539 	struct devlist *devlist_head;
540 
541 	devlist_head = &pci_devq;
542 
543 	devlist_entry = NULL;
544 
545 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
546 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
547 
548 		cfg = &devlist_entry->cfg;
549 
550 		cfg->domain		= d;
551 		cfg->bus		= b;
552 		cfg->slot		= s;
553 		cfg->func		= f;
554 		cfg->vendor		= REG(PCIR_VENDOR, 2);
555 		cfg->device		= REG(PCIR_DEVICE, 2);
556 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
557 		cfg->statreg		= REG(PCIR_STATUS, 2);
558 		cfg->baseclass		= REG(PCIR_CLASS, 1);
559 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
560 		cfg->progif		= REG(PCIR_PROGIF, 1);
561 		cfg->revid		= REG(PCIR_REVID, 1);
562 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
563 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
564 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
565 		cfg->intpin		= REG(PCIR_INTPIN, 1);
566 		cfg->intline		= REG(PCIR_INTLINE, 1);
567 
568 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
569 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
570 
571 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
572 		cfg->hdrtype		&= ~PCIM_MFDEV;
573 
574 		pci_fixancient(cfg);
575 		pci_hdrtypedata(pcib, b, s, f, cfg);
576 
577 		pci_read_capabilities(pcib, cfg);
578 
579 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
580 
581 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
582 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
583 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
584 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
585 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
586 
587 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
588 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
589 		devlist_entry->conf.pc_vendor = cfg->vendor;
590 		devlist_entry->conf.pc_device = cfg->device;
591 
592 		devlist_entry->conf.pc_class = cfg->baseclass;
593 		devlist_entry->conf.pc_subclass = cfg->subclass;
594 		devlist_entry->conf.pc_progif = cfg->progif;
595 		devlist_entry->conf.pc_revid = cfg->revid;
596 
597 		pci_numdevs++;
598 		pci_generation++;
599 	}
600 	return (devlist_entry);
601 #undef REG
602 }
603 
604 static int
605 pci_fixup_nextptr(int *nextptr0)
606 {
607 	int nextptr = *nextptr0;
608 
609 	/* "Next pointer" is only one byte */
610 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
611 
612 	if (nextptr & 0x3) {
613 		/*
614 		 * PCI local bus spec 3.0:
615 		 *
616 		 * "... The bottom two bits of all pointers are reserved
617 		 *  and must be implemented as 00b although software must
618 		 *  mask them to allow for future uses of these bits ..."
619 		 */
620 		if (bootverbose) {
621 			kprintf("Illegal PCI extended capability "
622 				"offset, fixup 0x%02x -> 0x%02x\n",
623 				nextptr, nextptr & ~0x3);
624 		}
625 		nextptr &= ~0x3;
626 	}
627 	*nextptr0 = nextptr;
628 
629 	if (nextptr < 0x40) {
630 		if (nextptr != 0) {
631 			kprintf("Illegal PCI extended capability "
632 				"offset 0x%02x", nextptr);
633 		}
634 		return 0;
635 	}
636 	return 1;
637 }
638 
639 static void
640 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
641 {
642 #define REG(n, w)	\
643 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
644 
645 	struct pcicfg_pp *pp = &cfg->pp;
646 
647 	if (pp->pp_cap)
648 		return;
649 
650 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
651 	pp->pp_status = ptr + PCIR_POWER_STATUS;
652 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
653 
654 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
655 		/*
656 		 * XXX
657 		 * We should write to data_select and read back from
658 		 * data_scale to determine whether data register is
659 		 * implemented.
660 		 */
661 #ifdef foo
662 		pp->pp_data = ptr + PCIR_POWER_DATA;
663 #else
664 		pp->pp_data = 0;
665 #endif
666 	}
667 
668 #undef REG
669 }
670 
671 static void
672 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
673 {
674 #if defined(__x86_64__)
675 
676 #define REG(n, w)	\
677 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
678 
679 	struct pcicfg_ht *ht = &cfg->ht;
680 	uint64_t addr;
681 	uint32_t val;
682 
683 	/* Determine HT-specific capability type. */
684 	val = REG(ptr + PCIR_HT_COMMAND, 2);
685 
686 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
687 		cfg->ht.ht_slave = ptr;
688 
689 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
690 		return;
691 
692 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
693 		/* Sanity check the mapping window. */
694 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
695 		addr <<= 32;
696 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
697 		if (addr != MSI_X86_ADDR_BASE) {
698 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
699 				"has non-default MSI window 0x%llx\n",
700 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
701 				(long long)addr);
702 		}
703 	} else {
704 		addr = MSI_X86_ADDR_BASE;
705 	}
706 
707 	ht->ht_msimap = ptr;
708 	ht->ht_msictrl = val;
709 	ht->ht_msiaddr = addr;
710 
711 #undef REG
712 
713 #endif	/* __x86_64__ */
714 }
715 
716 static void
717 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
718 {
719 #define REG(n, w)	\
720 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
721 
722 	struct pcicfg_msi *msi = &cfg->msi;
723 
724 	msi->msi_location = ptr;
725 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
726 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
727 
728 #undef REG
729 }
730 
731 static void
732 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
733 {
734 #define REG(n, w)	\
735 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
736 
737 	struct pcicfg_msix *msix = &cfg->msix;
738 	uint32_t val;
739 
740 	msix->msix_location = ptr;
741 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
742 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
743 
744 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
745 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
746 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
747 
748 	val = REG(ptr + PCIR_MSIX_PBA, 4);
749 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
750 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
751 
752 	TAILQ_INIT(&msix->msix_vectors);
753 
754 #undef REG
755 }
756 
757 static void
758 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 	cfg->vpd.vpd_reg = ptr;
761 }
762 
763 static void
764 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
765 {
766 #define REG(n, w)	\
767 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
768 
769 	/* Should always be true. */
770 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
771 		uint32_t val;
772 
773 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
774 		cfg->subvendor = val & 0xffff;
775 		cfg->subdevice = val >> 16;
776 	}
777 
778 #undef REG
779 }
780 
781 static void
782 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
783 {
784 	/*
785 	 * Assume we have a PCI-X chipset if we have
786 	 * at least one PCI-PCI bridge with a PCI-X
787 	 * capability.  Note that some systems with
788 	 * PCI-express or HT chipsets might match on
789 	 * this check as well.
790 	 */
791 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
792 		pcix_chipset = 1;
793 
794 	cfg->pcix.pcix_ptr = ptr;
795 }
796 
797 static int
798 pcie_slotimpl(const pcicfgregs *cfg)
799 {
800 	const struct pcicfg_expr *expr = &cfg->expr;
801 	uint16_t port_type;
802 
803 	/*
804 	 * - Slot implemented bit is meaningful iff current port is
805 	 *   root port or down stream port.
806 	 * - Testing for root port or down stream port is meanningful
807 	 *   iff PCI configure has type 1 header.
808 	 */
809 
810 	if (cfg->hdrtype != 1)
811 		return 0;
812 
813 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
814 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
815 		return 0;
816 
817 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
818 		return 0;
819 
820 	return 1;
821 }
822 
823 static void
824 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
825 {
826 #define REG(n, w)	\
827 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
828 
829 	struct pcicfg_expr *expr = &cfg->expr;
830 
831 	/*
832 	 * Assume we have a PCI-express chipset if we have
833 	 * at least one PCI-express device.
834 	 */
835 	pcie_chipset = 1;
836 
837 	expr->expr_ptr = ptr;
838 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
839 
840 	/*
841 	 * Read slot capabilities.  Slot capabilities exists iff
842 	 * current port's slot is implemented
843 	 */
844 	if (pcie_slotimpl(cfg))
845 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
846 
847 #undef REG
848 }
849 
850 static void
851 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
852 {
853 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
854 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
855 
856 	uint32_t val;
857 	int nextptr, ptrptr;
858 
859 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
860 		/* No capabilities */
861 		return;
862 	}
863 
864 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
865 	case 0:
866 	case 1:
867 		ptrptr = PCIR_CAP_PTR;
868 		break;
869 	case 2:
870 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
871 		break;
872 	default:
873 		return;				/* no capabilities support */
874 	}
875 	nextptr = REG(ptrptr, 1);	/* sanity check? */
876 
877 	/*
878 	 * Read capability entries.
879 	 */
880 	while (pci_fixup_nextptr(&nextptr)) {
881 		const struct pci_read_cap *rc;
882 		int ptr = nextptr;
883 
884 		/* Find the next entry */
885 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
886 
887 		/* Process this entry */
888 		val = REG(ptr + PCICAP_ID, 1);
889 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
890 			if (rc->cap == val) {
891 				rc->read_cap(pcib, ptr, nextptr, cfg);
892 				break;
893 			}
894 		}
895 	}
896 
897 #if defined(__x86_64__)
898 	/*
899 	 * Enable the MSI mapping window for all HyperTransport
900 	 * slaves.  PCI-PCI bridges have their windows enabled via
901 	 * PCIB_MAP_MSI().
902 	 */
903 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
904 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
905 		device_printf(pcib,
906 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
907 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
908 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
909 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
910 		     2);
911 	}
912 #endif
913 
914 /* REG and WREG use carry through to next functions */
915 }
916 
917 /*
918  * PCI Vital Product Data
919  */
920 
921 #define	PCI_VPD_TIMEOUT		1000000
922 
923 static int
924 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
925 {
926 	int count = PCI_VPD_TIMEOUT;
927 
928 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
929 
930 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
931 
932 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
933 		if (--count < 0)
934 			return (ENXIO);
935 		DELAY(1);	/* limit looping */
936 	}
937 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
938 
939 	return (0);
940 }
941 
942 #if 0
943 static int
944 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
945 {
946 	int count = PCI_VPD_TIMEOUT;
947 
948 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
949 
950 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
951 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
952 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
953 		if (--count < 0)
954 			return (ENXIO);
955 		DELAY(1);	/* limit looping */
956 	}
957 
958 	return (0);
959 }
960 #endif
961 
962 #undef PCI_VPD_TIMEOUT
963 
964 struct vpd_readstate {
965 	device_t	pcib;
966 	pcicfgregs	*cfg;
967 	uint32_t	val;
968 	int		bytesinval;
969 	int		off;
970 	uint8_t		cksum;
971 };
972 
973 static int
974 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
975 {
976 	uint32_t reg;
977 	uint8_t byte;
978 
979 	if (vrs->bytesinval == 0) {
980 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
981 			return (ENXIO);
982 		vrs->val = le32toh(reg);
983 		vrs->off += 4;
984 		byte = vrs->val & 0xff;
985 		vrs->bytesinval = 3;
986 	} else {
987 		vrs->val = vrs->val >> 8;
988 		byte = vrs->val & 0xff;
989 		vrs->bytesinval--;
990 	}
991 
992 	vrs->cksum += byte;
993 	*data = byte;
994 	return (0);
995 }
996 
997 int
998 pcie_slot_implemented(device_t dev)
999 {
1000 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1001 
1002 	return pcie_slotimpl(&dinfo->cfg);
1003 }
1004 
1005 void
1006 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
1007 {
1008 	uint8_t expr_ptr;
1009 	uint16_t val;
1010 
1011 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
1012 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
1013 		panic("%s: invalid max read request size 0x%02x",
1014 		      device_get_nameunit(dev), rqsize);
1015 	}
1016 
1017 	expr_ptr = pci_get_pciecap_ptr(dev);
1018 	if (!expr_ptr)
1019 		panic("%s: not PCIe device", device_get_nameunit(dev));
1020 
1021 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1022 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
1023 		if (bootverbose)
1024 			device_printf(dev, "adjust device control 0x%04x", val);
1025 
1026 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
1027 		val |= rqsize;
1028 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
1029 
1030 		if (bootverbose)
1031 			kprintf(" -> 0x%04x\n", val);
1032 	}
1033 }
1034 
1035 uint16_t
1036 pcie_get_max_readrq(device_t dev)
1037 {
1038 	uint8_t expr_ptr;
1039 	uint16_t val;
1040 
1041 	expr_ptr = pci_get_pciecap_ptr(dev);
1042 	if (!expr_ptr)
1043 		panic("%s: not PCIe device", device_get_nameunit(dev));
1044 
1045 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1046 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1047 }
1048 
1049 static void
1050 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1051 {
1052 	struct vpd_readstate vrs;
1053 	int state;
1054 	int name;
1055 	int remain;
1056 	int i;
1057 	int alloc, off;		/* alloc/off for RO/W arrays */
1058 	int cksumvalid;
1059 	int dflen;
1060 	uint8_t byte;
1061 	uint8_t byte2;
1062 
1063 	/* init vpd reader */
1064 	vrs.bytesinval = 0;
1065 	vrs.off = 0;
1066 	vrs.pcib = pcib;
1067 	vrs.cfg = cfg;
1068 	vrs.cksum = 0;
1069 
1070 	state = 0;
1071 	name = remain = i = 0;	/* shut up stupid gcc */
1072 	alloc = off = 0;	/* shut up stupid gcc */
1073 	dflen = 0;		/* shut up stupid gcc */
1074 	cksumvalid = -1;
1075 	while (state >= 0) {
1076 		if (vpd_nextbyte(&vrs, &byte)) {
1077 			state = -2;
1078 			break;
1079 		}
1080 #if 0
1081 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1082 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1083 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1084 #endif
1085 		switch (state) {
1086 		case 0:		/* item name */
1087 			if (byte & 0x80) {
1088 				if (vpd_nextbyte(&vrs, &byte2)) {
1089 					state = -2;
1090 					break;
1091 				}
1092 				remain = byte2;
1093 				if (vpd_nextbyte(&vrs, &byte2)) {
1094 					state = -2;
1095 					break;
1096 				}
1097 				remain |= byte2 << 8;
1098 				if (remain > (0x7f*4 - vrs.off)) {
1099 					state = -1;
1100 					kprintf(
1101 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1102 					    cfg->domain, cfg->bus, cfg->slot,
1103 					    cfg->func, remain);
1104 				}
1105 				name = byte & 0x7f;
1106 			} else {
1107 				remain = byte & 0x7;
1108 				name = (byte >> 3) & 0xf;
1109 			}
1110 			switch (name) {
1111 			case 0x2:	/* String */
1112 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1113 				    M_DEVBUF, M_WAITOK);
1114 				i = 0;
1115 				state = 1;
1116 				break;
1117 			case 0xf:	/* End */
1118 				state = -1;
1119 				break;
1120 			case 0x10:	/* VPD-R */
1121 				alloc = 8;
1122 				off = 0;
1123 				cfg->vpd.vpd_ros = kmalloc(alloc *
1124 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1125 				    M_WAITOK | M_ZERO);
1126 				state = 2;
1127 				break;
1128 			case 0x11:	/* VPD-W */
1129 				alloc = 8;
1130 				off = 0;
1131 				cfg->vpd.vpd_w = kmalloc(alloc *
1132 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1133 				    M_WAITOK | M_ZERO);
1134 				state = 5;
1135 				break;
1136 			default:	/* Invalid data, abort */
1137 				state = -1;
1138 				break;
1139 			}
1140 			break;
1141 
1142 		case 1:	/* Identifier String */
1143 			cfg->vpd.vpd_ident[i++] = byte;
1144 			remain--;
1145 			if (remain == 0)  {
1146 				cfg->vpd.vpd_ident[i] = '\0';
1147 				state = 0;
1148 			}
1149 			break;
1150 
1151 		case 2:	/* VPD-R Keyword Header */
1152 			if (off == alloc) {
1153 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1154 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1155 				    M_DEVBUF, M_WAITOK | M_ZERO);
1156 			}
1157 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1158 			if (vpd_nextbyte(&vrs, &byte2)) {
1159 				state = -2;
1160 				break;
1161 			}
1162 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1163 			if (vpd_nextbyte(&vrs, &byte2)) {
1164 				state = -2;
1165 				break;
1166 			}
1167 			dflen = byte2;
1168 			if (dflen == 0 &&
1169 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1170 			    2) == 0) {
1171 				/*
1172 				 * if this happens, we can't trust the rest
1173 				 * of the VPD.
1174 				 */
1175 				kprintf(
1176 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1177 				    cfg->domain, cfg->bus, cfg->slot,
1178 				    cfg->func, dflen);
1179 				cksumvalid = 0;
1180 				state = -1;
1181 				break;
1182 			} else if (dflen == 0) {
1183 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1184 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1185 				    M_DEVBUF, M_WAITOK);
1186 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1187 			} else
1188 				cfg->vpd.vpd_ros[off].value = kmalloc(
1189 				    (dflen + 1) *
1190 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1191 				    M_DEVBUF, M_WAITOK);
1192 			remain -= 3;
1193 			i = 0;
1194 			/* keep in sync w/ state 3's transistions */
1195 			if (dflen == 0 && remain == 0)
1196 				state = 0;
1197 			else if (dflen == 0)
1198 				state = 2;
1199 			else
1200 				state = 3;
1201 			break;
1202 
1203 		case 3:	/* VPD-R Keyword Value */
1204 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1205 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1206 			    "RV", 2) == 0 && cksumvalid == -1) {
1207 				if (vrs.cksum == 0)
1208 					cksumvalid = 1;
1209 				else {
1210 					if (bootverbose)
1211 						kprintf(
1212 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1213 						    cfg->domain, cfg->bus,
1214 						    cfg->slot, cfg->func,
1215 						    vrs.cksum);
1216 					cksumvalid = 0;
1217 					state = -1;
1218 					break;
1219 				}
1220 			}
1221 			dflen--;
1222 			remain--;
1223 			/* keep in sync w/ state 2's transistions */
1224 			if (dflen == 0)
1225 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1226 			if (dflen == 0 && remain == 0) {
1227 				cfg->vpd.vpd_rocnt = off;
1228 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1229 				    off * sizeof(*cfg->vpd.vpd_ros),
1230 				    M_DEVBUF, M_WAITOK | M_ZERO);
1231 				state = 0;
1232 			} else if (dflen == 0)
1233 				state = 2;
1234 			break;
1235 
1236 		case 4:
1237 			remain--;
1238 			if (remain == 0)
1239 				state = 0;
1240 			break;
1241 
1242 		case 5:	/* VPD-W Keyword Header */
1243 			if (off == alloc) {
1244 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1245 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1246 				    M_DEVBUF, M_WAITOK | M_ZERO);
1247 			}
1248 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1249 			if (vpd_nextbyte(&vrs, &byte2)) {
1250 				state = -2;
1251 				break;
1252 			}
1253 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1254 			if (vpd_nextbyte(&vrs, &byte2)) {
1255 				state = -2;
1256 				break;
1257 			}
1258 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1259 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1260 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1261 			    sizeof(*cfg->vpd.vpd_w[off].value),
1262 			    M_DEVBUF, M_WAITOK);
1263 			remain -= 3;
1264 			i = 0;
1265 			/* keep in sync w/ state 6's transistions */
1266 			if (dflen == 0 && remain == 0)
1267 				state = 0;
1268 			else if (dflen == 0)
1269 				state = 5;
1270 			else
1271 				state = 6;
1272 			break;
1273 
1274 		case 6:	/* VPD-W Keyword Value */
1275 			cfg->vpd.vpd_w[off].value[i++] = byte;
1276 			dflen--;
1277 			remain--;
1278 			/* keep in sync w/ state 5's transistions */
1279 			if (dflen == 0)
1280 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1281 			if (dflen == 0 && remain == 0) {
1282 				cfg->vpd.vpd_wcnt = off;
1283 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1284 				    off * sizeof(*cfg->vpd.vpd_w),
1285 				    M_DEVBUF, M_WAITOK | M_ZERO);
1286 				state = 0;
1287 			} else if (dflen == 0)
1288 				state = 5;
1289 			break;
1290 
1291 		default:
1292 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1293 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1294 			    state);
1295 			state = -1;
1296 			break;
1297 		}
1298 	}
1299 
1300 	if (cksumvalid == 0 || state < -1) {
1301 		/* read-only data bad, clean up */
1302 		if (cfg->vpd.vpd_ros != NULL) {
1303 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1304 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1305 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1306 			cfg->vpd.vpd_ros = NULL;
1307 		}
1308 	}
1309 	if (state < -1) {
1310 		/* I/O error, clean up */
1311 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1312 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1313 		if (cfg->vpd.vpd_ident != NULL) {
1314 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1315 			cfg->vpd.vpd_ident = NULL;
1316 		}
1317 		if (cfg->vpd.vpd_w != NULL) {
1318 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1319 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1320 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1321 			cfg->vpd.vpd_w = NULL;
1322 		}
1323 	}
1324 	cfg->vpd.vpd_cached = 1;
1325 #undef REG
1326 #undef WREG
1327 }
1328 
1329 int
1330 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1331 {
1332 	struct pci_devinfo *dinfo = device_get_ivars(child);
1333 	pcicfgregs *cfg = &dinfo->cfg;
1334 
1335 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1336 		pci_read_vpd(device_get_parent(dev), cfg);
1337 
1338 	*identptr = cfg->vpd.vpd_ident;
1339 
1340 	if (*identptr == NULL)
1341 		return (ENXIO);
1342 
1343 	return (0);
1344 }
1345 
1346 int
1347 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1348 	const char **vptr)
1349 {
1350 	struct pci_devinfo *dinfo = device_get_ivars(child);
1351 	pcicfgregs *cfg = &dinfo->cfg;
1352 	int i;
1353 
1354 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1355 		pci_read_vpd(device_get_parent(dev), cfg);
1356 
1357 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1358 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1359 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1360 			*vptr = cfg->vpd.vpd_ros[i].value;
1361 		}
1362 
1363 	if (i != cfg->vpd.vpd_rocnt)
1364 		return (0);
1365 
1366 	*vptr = NULL;
1367 	return (ENXIO);
1368 }
1369 
1370 /*
1371  * Return the offset in configuration space of the requested extended
1372  * capability entry or 0 if the specified capability was not found.
1373  */
1374 int
1375 pci_find_extcap_method(device_t dev, device_t child, int capability,
1376     int *capreg)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(child);
1379 	pcicfgregs *cfg = &dinfo->cfg;
1380 	u_int32_t status;
1381 	u_int8_t ptr;
1382 
1383 	/*
1384 	 * Check the CAP_LIST bit of the PCI status register first.
1385 	 */
1386 	status = pci_read_config(child, PCIR_STATUS, 2);
1387 	if (!(status & PCIM_STATUS_CAPPRESENT))
1388 		return (ENXIO);
1389 
1390 	/*
1391 	 * Determine the start pointer of the capabilities list.
1392 	 */
1393 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1394 	case 0:
1395 	case 1:
1396 		ptr = PCIR_CAP_PTR;
1397 		break;
1398 	case 2:
1399 		ptr = PCIR_CAP_PTR_2;
1400 		break;
1401 	default:
1402 		/* XXX: panic? */
1403 		return (ENXIO);		/* no extended capabilities support */
1404 	}
1405 	ptr = pci_read_config(child, ptr, 1);
1406 
1407 	/*
1408 	 * Traverse the capabilities list.
1409 	 */
1410 	while (ptr != 0) {
1411 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1412 			if (capreg != NULL)
1413 				*capreg = ptr;
1414 			return (0);
1415 		}
1416 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1417 	}
1418 
1419 	return (ENOENT);
1420 }
1421 
1422 /*
1423  * Support for MSI-X message interrupts.
1424  */
1425 static void
1426 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1427     uint32_t data)
1428 {
1429 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431 	uint32_t offset;
1432 
1433 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1434 	offset = msix->msix_table_offset + index * 16;
1435 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1436 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1437 	bus_write_4(msix->msix_table_res, offset + 8, data);
1438 
1439 	/* Enable MSI -> HT mapping. */
1440 	pci_ht_map_msi(dev, address);
1441 }
1442 
1443 static void
1444 pci_mask_msix_vector(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, val;
1449 
1450 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1451 	offset = msix->msix_table_offset + index * 16 + 12;
1452 	val = bus_read_4(msix->msix_table_res, offset);
1453 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1454 		val |= PCIM_MSIX_VCTRL_MASK;
1455 		bus_write_4(msix->msix_table_res, offset, val);
1456 	}
1457 }
1458 
1459 static void
1460 pci_unmask_msix_vector(device_t dev, u_int index)
1461 {
1462 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1463 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1464 	uint32_t offset, val;
1465 
1466 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1467 	offset = msix->msix_table_offset + index * 16 + 12;
1468 	val = bus_read_4(msix->msix_table_res, offset);
1469 	if (val & PCIM_MSIX_VCTRL_MASK) {
1470 		val &= ~PCIM_MSIX_VCTRL_MASK;
1471 		bus_write_4(msix->msix_table_res, offset, val);
1472 	}
1473 }
1474 
1475 int
1476 pci_pending_msix_vector(device_t dev, u_int index)
1477 {
1478 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1479 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1480 	uint32_t offset, bit;
1481 
1482 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1483 	    ("MSI-X is not setup yet"));
1484 
1485 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1486 	offset = msix->msix_pba_offset + (index / 32) * 4;
1487 	bit = 1 << index % 32;
1488 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1489 }
1490 
1491 /*
1492  * Restore MSI-X registers and table during resume.  If MSI-X is
1493  * enabled then walk the virtual table to restore the actual MSI-X
1494  * table.
1495  */
1496 static void
1497 pci_resume_msix(device_t dev)
1498 {
1499 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1500 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1501 
1502 	if (msix->msix_table_res != NULL) {
1503 		const struct msix_vector *mv;
1504 
1505 		pci_mask_msix_allvectors(dev);
1506 
1507 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1508 			u_int vector;
1509 
1510 			if (mv->mv_address == 0)
1511 				continue;
1512 
1513 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1514 			pci_setup_msix_vector(dev, vector,
1515 			    mv->mv_address, mv->mv_data);
1516 			pci_unmask_msix_vector(dev, vector);
1517 		}
1518 	}
1519 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1520 	    msix->msix_ctrl, 2);
1521 }
1522 
1523 /*
1524  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1525  *
1526  * After this function returns, the MSI-X's rid will be saved in rid0.
1527  */
1528 int
1529 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1530     int *rid0, int cpuid)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 	struct msix_vector *mv;
1535 	struct resource_list_entry *rle;
1536 	int error, irq, rid;
1537 
1538 	KASSERT(msix->msix_table_res != NULL &&
1539 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1540 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1541 	KASSERT(vector < msix->msix_msgnum,
1542 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1543 
1544 	if (bootverbose) {
1545 		device_printf(child,
1546 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1547 		    vector, msix->msix_msgnum);
1548 	}
1549 
1550 	/* Set rid according to vector number */
1551 	rid = PCI_MSIX_VEC2RID(vector);
1552 
1553 	/* Vector has already been allocated */
1554 	mv = pci_find_msix_vector(child, rid);
1555 	if (mv != NULL)
1556 		return EBUSY;
1557 
1558 	/* Allocate a message. */
1559 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1560 	if (error)
1561 		return error;
1562 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1563 	    irq, irq, 1, cpuid);
1564 
1565 	if (bootverbose) {
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1567 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1568 		    rle->start, cpuid);
1569 	}
1570 
1571 	/* Update counts of alloc'd messages. */
1572 	msix->msix_alloc++;
1573 
1574 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1575 	mv->mv_rid = rid;
1576 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1577 
1578 	*rid0 = rid;
1579 	return 0;
1580 }
1581 
1582 int
1583 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1584 {
1585 	struct pci_devinfo *dinfo = device_get_ivars(child);
1586 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1587 	struct resource_list_entry *rle;
1588 	struct msix_vector *mv;
1589 	int irq, cpuid;
1590 
1591 	KASSERT(msix->msix_table_res != NULL &&
1592 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1593 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1594 	KASSERT(rid > 0, ("invalid rid %d", rid));
1595 
1596 	mv = pci_find_msix_vector(child, rid);
1597 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1598 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1599 
1600 	/* Make sure resource is no longer allocated. */
1601 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1602 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1603 	KASSERT(rle->res == NULL,
1604 	    ("MSI-X resource is still allocated, rid %d", rid));
1605 
1606 	irq = rle->start;
1607 	cpuid = rle->cpuid;
1608 
1609 	/* Free the resource list entries. */
1610 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1611 
1612 	/* Release the IRQ. */
1613 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1614 
1615 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1616 	kfree(mv, M_DEVBUF);
1617 
1618 	msix->msix_alloc--;
1619 	return (0);
1620 }
1621 
1622 /*
1623  * Return the max supported MSI-X messages this device supports.
1624  * Basically, assuming the MD code can alloc messages, this function
1625  * should return the maximum value that pci_alloc_msix() can return.
1626  * Thus, it is subject to the tunables, etc.
1627  */
1628 int
1629 pci_msix_count_method(device_t dev, device_t child)
1630 {
1631 	struct pci_devinfo *dinfo = device_get_ivars(child);
1632 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1633 
1634 	if (pci_do_msix && msix->msix_location != 0)
1635 		return (msix->msix_msgnum);
1636 	return (0);
1637 }
1638 
1639 int
1640 pci_setup_msix(device_t dev)
1641 {
1642 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1643 	pcicfgregs *cfg = &dinfo->cfg;
1644 	struct resource_list_entry *rle;
1645 	struct resource *table_res, *pba_res;
1646 
1647 	KASSERT(cfg->msix.msix_table_res == NULL &&
1648 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1649 
1650 	/* If rid 0 is allocated, then fail. */
1651 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1652 	if (rle != NULL && rle->res != NULL)
1653 		return (ENXIO);
1654 
1655 	/* Already have allocated MSIs? */
1656 	if (cfg->msi.msi_alloc != 0)
1657 		return (ENXIO);
1658 
1659 	/* If MSI is blacklisted for this system, fail. */
1660 	if (pci_msi_blacklisted())
1661 		return (ENXIO);
1662 
1663 	/* MSI-X capability present? */
1664 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1665 	    !pci_do_msix)
1666 		return (ENODEV);
1667 
1668 	KASSERT(cfg->msix.msix_alloc == 0 &&
1669 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1670 	    ("MSI-X vector has been allocated"));
1671 
1672 	/* Make sure the appropriate BARs are mapped. */
1673 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1674 	    cfg->msix.msix_table_bar);
1675 	if (rle == NULL || rle->res == NULL ||
1676 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1677 		return (ENXIO);
1678 	table_res = rle->res;
1679 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1680 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1681 		    cfg->msix.msix_pba_bar);
1682 		if (rle == NULL || rle->res == NULL ||
1683 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1684 			return (ENXIO);
1685 	}
1686 	pba_res = rle->res;
1687 
1688 	cfg->msix.msix_table_res = table_res;
1689 	cfg->msix.msix_pba_res = pba_res;
1690 
1691 	pci_mask_msix_allvectors(dev);
1692 
1693 	return 0;
1694 }
1695 
1696 void
1697 pci_teardown_msix(device_t dev)
1698 {
1699 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1701 
1702 	KASSERT(msix->msix_table_res != NULL &&
1703 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1704 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1705 	    ("MSI-X vector is still allocated"));
1706 
1707 	pci_mask_msix_allvectors(dev);
1708 
1709 	msix->msix_table_res = NULL;
1710 	msix->msix_pba_res = NULL;
1711 }
1712 
1713 void
1714 pci_enable_msix(device_t dev)
1715 {
1716 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1717 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1718 
1719 	KASSERT(msix->msix_table_res != NULL &&
1720 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1721 
1722 	/* Update control register to enable MSI-X. */
1723 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1724 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1725 	    msix->msix_ctrl, 2);
1726 }
1727 
1728 void
1729 pci_disable_msix(device_t dev)
1730 {
1731 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1732 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1733 
1734 	KASSERT(msix->msix_table_res != NULL &&
1735 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1736 
1737 	/* Disable MSI -> HT mapping. */
1738 	pci_ht_map_msi(dev, 0);
1739 
1740 	/* Update control register to disable MSI-X. */
1741 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1742 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1743 	    msix->msix_ctrl, 2);
1744 }
1745 
1746 static void
1747 pci_mask_msix_allvectors(device_t dev)
1748 {
1749 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1750 	u_int i;
1751 
1752 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1753 		pci_mask_msix_vector(dev, i);
1754 }
1755 
1756 static struct msix_vector *
1757 pci_find_msix_vector(device_t dev, int rid)
1758 {
1759 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1760 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1761 	struct msix_vector *mv;
1762 
1763 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1764 		if (mv->mv_rid == rid)
1765 			return mv;
1766 	}
1767 	return NULL;
1768 }
1769 
1770 /*
1771  * HyperTransport MSI mapping control
1772  */
1773 void
1774 pci_ht_map_msi(device_t dev, uint64_t addr)
1775 {
1776 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1777 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1778 
1779 	if (!ht->ht_msimap)
1780 		return;
1781 
1782 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1783 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1784 		/* Enable MSI -> HT mapping. */
1785 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1786 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1787 		    ht->ht_msictrl, 2);
1788 	}
1789 
1790 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1791 		/* Disable MSI -> HT mapping. */
1792 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1793 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1794 		    ht->ht_msictrl, 2);
1795 	}
1796 }
1797 
1798 /*
1799  * Support for MSI message signalled interrupts.
1800  */
1801 static void
1802 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1803 {
1804 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1805 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1806 
1807 	/* Write data and address values. */
1808 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1809 	    address & 0xffffffff, 4);
1810 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1811 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1812 		    address >> 32, 4);
1813 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1814 		    data, 2);
1815 	} else
1816 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1817 		    2);
1818 
1819 	/* Enable MSI in the control register. */
1820 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1821 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1822 	    2);
1823 
1824 	/* Enable MSI -> HT mapping. */
1825 	pci_ht_map_msi(dev, address);
1826 }
1827 
1828 static void
1829 pci_disable_msi(device_t dev)
1830 {
1831 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1832 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1833 
1834 	/* Disable MSI -> HT mapping. */
1835 	pci_ht_map_msi(dev, 0);
1836 
1837 	/* Disable MSI in the control register. */
1838 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1839 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1840 	    2);
1841 }
1842 
1843 /*
1844  * Restore MSI registers during resume.  If MSI is enabled then
1845  * restore the data and address registers in addition to the control
1846  * register.
1847  */
1848 static void
1849 pci_resume_msi(device_t dev)
1850 {
1851 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1852 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1853 	uint64_t address;
1854 	uint16_t data;
1855 
1856 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1857 		address = msi->msi_addr;
1858 		data = msi->msi_data;
1859 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1860 		    address & 0xffffffff, 4);
1861 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1862 			pci_write_config(dev, msi->msi_location +
1863 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1864 			pci_write_config(dev, msi->msi_location +
1865 			    PCIR_MSI_DATA_64BIT, data, 2);
1866 		} else
1867 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1868 			    data, 2);
1869 	}
1870 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1871 	    2);
1872 }
1873 
1874 /*
1875  * Returns true if the specified device is blacklisted because MSI
1876  * doesn't work.
1877  */
1878 int
1879 pci_msi_device_blacklisted(device_t dev)
1880 {
1881 	struct pci_quirk *q;
1882 
1883 	if (!pci_honor_msi_blacklist)
1884 		return (0);
1885 
1886 	for (q = &pci_quirks[0]; q->devid; q++) {
1887 		if (q->devid == pci_get_devid(dev) &&
1888 		    q->type == PCI_QUIRK_DISABLE_MSI)
1889 			return (1);
1890 	}
1891 	return (0);
1892 }
1893 
1894 /*
1895  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1896  * we just check for blacklisted chipsets as represented by the
1897  * host-PCI bridge at device 0:0:0.  In the future, it may become
1898  * necessary to check other system attributes, such as the kenv values
1899  * that give the motherboard manufacturer and model number.
1900  */
1901 static int
1902 pci_msi_blacklisted(void)
1903 {
1904 	device_t dev;
1905 
1906 	if (!pci_honor_msi_blacklist)
1907 		return (0);
1908 
1909 	/*
1910 	 * Always assume that MSI-X works in virtual machines. This is
1911 	 * for example needed for most (or all) qemu based setups, since
1912 	 * the emulated chipsets tend to be very old.
1913 	 */
1914 	if (vmm_guest != VMM_GUEST_NONE)
1915 		return (0);
1916 
1917 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1918 	if (!(pcie_chipset || pcix_chipset))
1919 		return (1);
1920 
1921 	dev = pci_find_bsf(0, 0, 0);
1922 	if (dev != NULL)
1923 		return (pci_msi_device_blacklisted(dev));
1924 	return (0);
1925 }
1926 
1927 /*
1928  * Attempt to allocate count MSI messages on start_cpuid.
1929  *
1930  * If start_cpuid < 0, then the MSI messages' target CPU will be
1931  * selected automaticly.
1932  *
1933  * If the caller explicitly specified the MSI messages' target CPU,
1934  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1935  * messages on the specified CPU, if the allocation fails due to MD
1936  * does not have enough vectors (EMSGSIZE), then we will try next
1937  * available CPU, until the allocation fails on all CPUs.
1938  *
1939  * EMSGSIZE will be returned, if all available CPUs does not have
1940  * enough vectors for the requested amount of MSI messages.  Caller
1941  * should either reduce the amount of MSI messages to be requested,
1942  * or simply giving up using MSI.
1943  *
1944  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1945  * returned in 'rid' array, if the allocation succeeds.
1946  */
1947 int
1948 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1949     int start_cpuid)
1950 {
1951 	struct pci_devinfo *dinfo = device_get_ivars(child);
1952 	pcicfgregs *cfg = &dinfo->cfg;
1953 	struct resource_list_entry *rle;
1954 	int error, i, irqs[32], cpuid = 0;
1955 	uint16_t ctrl;
1956 
1957 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1958 	    ("invalid MSI count %d", count));
1959 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1960 
1961 	/* If rid 0 is allocated, then fail. */
1962 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1963 	if (rle != NULL && rle->res != NULL)
1964 		return (ENXIO);
1965 
1966 	/* Already have allocated messages? */
1967 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1968 		return (ENXIO);
1969 
1970 	/* If MSI is blacklisted for this system, fail. */
1971 	if (pci_msi_blacklisted())
1972 		return (ENXIO);
1973 
1974 	/* MSI capability present? */
1975 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1976 	    !pci_do_msi)
1977 		return (ENODEV);
1978 
1979 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1980 	    count, cfg->msi.msi_msgnum));
1981 
1982 	if (bootverbose) {
1983 		device_printf(child,
1984 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1985 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1986 	}
1987 
1988 	if (start_cpuid < 0)
1989 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1990 
1991 	error = EINVAL;
1992 	for (i = 0; i < ncpus; ++i) {
1993 		cpuid = (start_cpuid + i) % ncpus;
1994 
1995 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1996 		    cfg->msi.msi_msgnum, irqs, cpuid);
1997 		if (error == 0)
1998 			break;
1999 		else if (error != EMSGSIZE)
2000 			return error;
2001 	}
2002 	if (error)
2003 		return error;
2004 
2005 	/*
2006 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
2007 	 * the irqs[] array, so add new resources starting at rid 1.
2008 	 */
2009 	for (i = 0; i < count; i++) {
2010 		rid[i] = i + 1;
2011 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2012 		    irqs[i], irqs[i], 1, cpuid);
2013 	}
2014 
2015 	if (bootverbose) {
2016 		if (count == 1) {
2017 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
2018 			    irqs[0], cpuid);
2019 		} else {
2020 			int run;
2021 
2022 			/*
2023 			 * Be fancy and try to print contiguous runs
2024 			 * of IRQ values as ranges.  'run' is true if
2025 			 * we are in a range.
2026 			 */
2027 			device_printf(child, "using IRQs %d", irqs[0]);
2028 			run = 0;
2029 			for (i = 1; i < count; i++) {
2030 
2031 				/* Still in a run? */
2032 				if (irqs[i] == irqs[i - 1] + 1) {
2033 					run = 1;
2034 					continue;
2035 				}
2036 
2037 				/* Finish previous range. */
2038 				if (run) {
2039 					kprintf("-%d", irqs[i - 1]);
2040 					run = 0;
2041 				}
2042 
2043 				/* Start new range. */
2044 				kprintf(",%d", irqs[i]);
2045 			}
2046 
2047 			/* Unfinished range? */
2048 			if (run)
2049 				kprintf("-%d", irqs[count - 1]);
2050 			kprintf(" for MSI on cpu%d\n", cpuid);
2051 		}
2052 	}
2053 
2054 	/* Update control register with count. */
2055 	ctrl = cfg->msi.msi_ctrl;
2056 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2057 	ctrl |= (ffs(count) - 1) << 4;
2058 	cfg->msi.msi_ctrl = ctrl;
2059 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2060 
2061 	/* Update counts of alloc'd messages. */
2062 	cfg->msi.msi_alloc = count;
2063 	cfg->msi.msi_handlers = 0;
2064 	return (0);
2065 }
2066 
2067 /* Release the MSI messages associated with this device. */
2068 int
2069 pci_release_msi_method(device_t dev, device_t child)
2070 {
2071 	struct pci_devinfo *dinfo = device_get_ivars(child);
2072 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2073 	struct resource_list_entry *rle;
2074 	int i, irqs[32], cpuid = -1;
2075 
2076 	/* Do we have any messages to release? */
2077 	if (msi->msi_alloc == 0)
2078 		return (ENODEV);
2079 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2080 
2081 	/* Make sure none of the resources are allocated. */
2082 	if (msi->msi_handlers > 0)
2083 		return (EBUSY);
2084 	for (i = 0; i < msi->msi_alloc; i++) {
2085 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2086 		KASSERT(rle != NULL, ("missing MSI resource"));
2087 		if (rle->res != NULL)
2088 			return (EBUSY);
2089 		if (i == 0) {
2090 			cpuid = rle->cpuid;
2091 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2092 			    ("invalid MSI target cpuid %d", cpuid));
2093 		} else {
2094 			KASSERT(rle->cpuid == cpuid,
2095 			    ("MSI targets different cpus, "
2096 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2097 		}
2098 		irqs[i] = rle->start;
2099 	}
2100 
2101 	/* Update control register with 0 count. */
2102 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2103 	    ("%s: MSI still enabled", __func__));
2104 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2105 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2106 	    msi->msi_ctrl, 2);
2107 
2108 	/* Release the messages. */
2109 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2110 	    cpuid);
2111 	for (i = 0; i < msi->msi_alloc; i++)
2112 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2113 
2114 	/* Update alloc count. */
2115 	msi->msi_alloc = 0;
2116 	msi->msi_addr = 0;
2117 	msi->msi_data = 0;
2118 	return (0);
2119 }
2120 
2121 /*
2122  * Return the max supported MSI messages this device supports.
2123  * Basically, assuming the MD code can alloc messages, this function
2124  * should return the maximum value that pci_alloc_msi() can return.
2125  * Thus, it is subject to the tunables, etc.
2126  */
2127 int
2128 pci_msi_count_method(device_t dev, device_t child)
2129 {
2130 	struct pci_devinfo *dinfo = device_get_ivars(child);
2131 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2132 
2133 	if (pci_do_msi && msi->msi_location != 0)
2134 		return (msi->msi_msgnum);
2135 	return (0);
2136 }
2137 
2138 /* kfree pcicfgregs structure and all depending data structures */
2139 
2140 int
2141 pci_freecfg(struct pci_devinfo *dinfo)
2142 {
2143 	struct devlist *devlist_head;
2144 	int i;
2145 
2146 	devlist_head = &pci_devq;
2147 
2148 	if (dinfo->cfg.vpd.vpd_reg) {
2149 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2150 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2151 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2152 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2153 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2154 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2155 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2156 	}
2157 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2158 	kfree(dinfo, M_DEVBUF);
2159 
2160 	/* increment the generation count */
2161 	pci_generation++;
2162 
2163 	/* we're losing one device */
2164 	pci_numdevs--;
2165 	return (0);
2166 }
2167 
2168 /*
2169  * PCI power manangement
2170  */
2171 int
2172 pci_set_powerstate_method(device_t dev, device_t child, int state)
2173 {
2174 	struct pci_devinfo *dinfo = device_get_ivars(child);
2175 	pcicfgregs *cfg = &dinfo->cfg;
2176 	uint16_t status;
2177 	int oldstate, highest, delay;
2178 
2179 	if (cfg->pp.pp_cap == 0)
2180 		return (EOPNOTSUPP);
2181 
2182 	/*
2183 	 * Optimize a no state change request away.  While it would be OK to
2184 	 * write to the hardware in theory, some devices have shown odd
2185 	 * behavior when going from D3 -> D3.
2186 	 */
2187 	oldstate = pci_get_powerstate(child);
2188 	if (oldstate == state)
2189 		return (0);
2190 
2191 	/*
2192 	 * The PCI power management specification states that after a state
2193 	 * transition between PCI power states, system software must
2194 	 * guarantee a minimal delay before the function accesses the device.
2195 	 * Compute the worst case delay that we need to guarantee before we
2196 	 * access the device.  Many devices will be responsive much more
2197 	 * quickly than this delay, but there are some that don't respond
2198 	 * instantly to state changes.  Transitions to/from D3 state require
2199 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2200 	 * is done below with DELAY rather than a sleeper function because
2201 	 * this function can be called from contexts where we cannot sleep.
2202 	 */
2203 	highest = (oldstate > state) ? oldstate : state;
2204 	if (highest == PCI_POWERSTATE_D3)
2205 	    delay = 10000;
2206 	else if (highest == PCI_POWERSTATE_D2)
2207 	    delay = 200;
2208 	else
2209 	    delay = 0;
2210 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2211 	    & ~PCIM_PSTAT_DMASK;
2212 	switch (state) {
2213 	case PCI_POWERSTATE_D0:
2214 		status |= PCIM_PSTAT_D0;
2215 		break;
2216 	case PCI_POWERSTATE_D1:
2217 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2218 			return (EOPNOTSUPP);
2219 		status |= PCIM_PSTAT_D1;
2220 		break;
2221 	case PCI_POWERSTATE_D2:
2222 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2223 			return (EOPNOTSUPP);
2224 		status |= PCIM_PSTAT_D2;
2225 		break;
2226 	case PCI_POWERSTATE_D3:
2227 		status |= PCIM_PSTAT_D3;
2228 		break;
2229 	default:
2230 		return (EINVAL);
2231 	}
2232 
2233 	if (bootverbose)
2234 		kprintf(
2235 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2236 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2237 		    dinfo->cfg.func, oldstate, state);
2238 
2239 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2240 	if (delay)
2241 		DELAY(delay);
2242 	return (0);
2243 }
2244 
2245 int
2246 pci_get_powerstate_method(device_t dev, device_t child)
2247 {
2248 	struct pci_devinfo *dinfo = device_get_ivars(child);
2249 	pcicfgregs *cfg = &dinfo->cfg;
2250 	uint16_t status;
2251 	int result;
2252 
2253 	if (cfg->pp.pp_cap != 0) {
2254 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2255 		switch (status & PCIM_PSTAT_DMASK) {
2256 		case PCIM_PSTAT_D0:
2257 			result = PCI_POWERSTATE_D0;
2258 			break;
2259 		case PCIM_PSTAT_D1:
2260 			result = PCI_POWERSTATE_D1;
2261 			break;
2262 		case PCIM_PSTAT_D2:
2263 			result = PCI_POWERSTATE_D2;
2264 			break;
2265 		case PCIM_PSTAT_D3:
2266 			result = PCI_POWERSTATE_D3;
2267 			break;
2268 		default:
2269 			result = PCI_POWERSTATE_UNKNOWN;
2270 			break;
2271 		}
2272 	} else {
2273 		/* No support, device is always at D0 */
2274 		result = PCI_POWERSTATE_D0;
2275 	}
2276 	return (result);
2277 }
2278 
2279 /*
2280  * Some convenience functions for PCI device drivers.
2281  */
2282 
2283 static __inline void
2284 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2285 {
2286 	uint16_t	command;
2287 
2288 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2289 	command |= bit;
2290 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2291 }
2292 
2293 static __inline void
2294 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2295 {
2296 	uint16_t	command;
2297 
2298 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2299 	command &= ~bit;
2300 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2301 }
2302 
2303 int
2304 pci_enable_busmaster_method(device_t dev, device_t child)
2305 {
2306 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2307 	return (0);
2308 }
2309 
2310 int
2311 pci_disable_busmaster_method(device_t dev, device_t child)
2312 {
2313 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2314 	return (0);
2315 }
2316 
2317 int
2318 pci_enable_io_method(device_t dev, device_t child, int space)
2319 {
2320 	uint16_t command;
2321 	uint16_t bit;
2322 	char *error;
2323 
2324 	bit = 0;
2325 	error = NULL;
2326 
2327 	switch(space) {
2328 	case SYS_RES_IOPORT:
2329 		bit = PCIM_CMD_PORTEN;
2330 		error = "port";
2331 		break;
2332 	case SYS_RES_MEMORY:
2333 		bit = PCIM_CMD_MEMEN;
2334 		error = "memory";
2335 		break;
2336 	default:
2337 		return (EINVAL);
2338 	}
2339 	pci_set_command_bit(dev, child, bit);
2340 	/* Some devices seem to need a brief stall here, what do to? */
2341 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2342 	if (command & bit)
2343 		return (0);
2344 	device_printf(child, "failed to enable %s mapping!\n", error);
2345 	return (ENXIO);
2346 }
2347 
2348 int
2349 pci_disable_io_method(device_t dev, device_t child, int space)
2350 {
2351 	uint16_t command;
2352 	uint16_t bit;
2353 	char *error;
2354 
2355 	bit = 0;
2356 	error = NULL;
2357 
2358 	switch(space) {
2359 	case SYS_RES_IOPORT:
2360 		bit = PCIM_CMD_PORTEN;
2361 		error = "port";
2362 		break;
2363 	case SYS_RES_MEMORY:
2364 		bit = PCIM_CMD_MEMEN;
2365 		error = "memory";
2366 		break;
2367 	default:
2368 		return (EINVAL);
2369 	}
2370 	pci_clear_command_bit(dev, child, bit);
2371 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2372 	if (command & bit) {
2373 		device_printf(child, "failed to disable %s mapping!\n", error);
2374 		return (ENXIO);
2375 	}
2376 	return (0);
2377 }
2378 
2379 /*
2380  * New style pci driver.  Parent device is either a pci-host-bridge or a
2381  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2382  */
2383 
2384 void
2385 pci_print_verbose(struct pci_devinfo *dinfo)
2386 {
2387 
2388 	if (bootverbose) {
2389 		pcicfgregs *cfg = &dinfo->cfg;
2390 
2391 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2392 		    cfg->vendor, cfg->device, cfg->revid);
2393 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2394 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2395 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2396 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2397 		    cfg->mfdev);
2398 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2399 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2400 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2401 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2402 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2403 		if (cfg->intpin > 0)
2404 			kprintf("\tintpin=%c, irq=%d\n",
2405 			    cfg->intpin +'a' -1, cfg->intline);
2406 		if (cfg->pp.pp_cap) {
2407 			uint16_t status;
2408 
2409 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2410 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2411 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2412 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2413 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2414 			    status & PCIM_PSTAT_DMASK);
2415 		}
2416 		if (cfg->msi.msi_location) {
2417 			int ctrl;
2418 
2419 			ctrl = cfg->msi.msi_ctrl;
2420 			kprintf("\tMSI supports %d message%s%s%s\n",
2421 			    cfg->msi.msi_msgnum,
2422 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2423 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2424 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2425 		}
2426 		if (cfg->msix.msix_location) {
2427 			kprintf("\tMSI-X supports %d message%s ",
2428 			    cfg->msix.msix_msgnum,
2429 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2430 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2431 				kprintf("in map 0x%x\n",
2432 				    cfg->msix.msix_table_bar);
2433 			else
2434 				kprintf("in maps 0x%x and 0x%x\n",
2435 				    cfg->msix.msix_table_bar,
2436 				    cfg->msix.msix_pba_bar);
2437 		}
2438 		pci_print_verbose_expr(cfg);
2439 	}
2440 }
2441 
2442 static void
2443 pci_print_verbose_expr(const pcicfgregs *cfg)
2444 {
2445 	const struct pcicfg_expr *expr = &cfg->expr;
2446 	const char *port_name;
2447 	uint16_t port_type;
2448 
2449 	if (!bootverbose)
2450 		return;
2451 
2452 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2453 		return;
2454 
2455 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2456 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2457 
2458 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2459 
2460 	switch (port_type) {
2461 	case PCIE_END_POINT:
2462 		port_name = "DEVICE";
2463 		break;
2464 	case PCIE_LEG_END_POINT:
2465 		port_name = "LEGDEV";
2466 		break;
2467 	case PCIE_ROOT_PORT:
2468 		port_name = "ROOT";
2469 		break;
2470 	case PCIE_UP_STREAM_PORT:
2471 		port_name = "UPSTREAM";
2472 		break;
2473 	case PCIE_DOWN_STREAM_PORT:
2474 		port_name = "DOWNSTRM";
2475 		break;
2476 	case PCIE_PCIE2PCI_BRIDGE:
2477 		port_name = "PCIE2PCI";
2478 		break;
2479 	case PCIE_PCI2PCIE_BRIDGE:
2480 		port_name = "PCI2PCIE";
2481 		break;
2482 	case PCIE_ROOT_END_POINT:
2483 		port_name = "ROOTDEV";
2484 		break;
2485 	case PCIE_ROOT_EVT_COLL:
2486 		port_name = "ROOTEVTC";
2487 		break;
2488 	default:
2489 		port_name = NULL;
2490 		break;
2491 	}
2492 	if ((port_type == PCIE_ROOT_PORT ||
2493 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2494 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2495 		port_name = NULL;
2496 	if (port_name != NULL)
2497 		kprintf("[%s]", port_name);
2498 
2499 	if (pcie_slotimpl(cfg)) {
2500 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2501 		if (expr->expr_slotcap & PCIEM_SLOTCAP_HP_CAP)
2502 			kprintf("[HOTPLUG]");
2503 	}
2504 	kprintf("\n");
2505 }
2506 
2507 static int
2508 pci_porten(device_t pcib, int b, int s, int f)
2509 {
2510 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2511 		& PCIM_CMD_PORTEN) != 0;
2512 }
2513 
2514 static int
2515 pci_memen(device_t pcib, int b, int s, int f)
2516 {
2517 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2518 		& PCIM_CMD_MEMEN) != 0;
2519 }
2520 
2521 /*
2522  * Add a resource based on a pci map register. Return 1 if the map
2523  * register is a 32bit map register or 2 if it is a 64bit register.
2524  */
2525 static int
2526 pci_add_map(device_t pcib, device_t bus, device_t dev,
2527     int b, int s, int f, int reg, struct resource_list *rl, int force,
2528     int prefetch)
2529 {
2530 	uint32_t map;
2531 	uint16_t old_cmd;
2532 	pci_addr_t base;
2533 	pci_addr_t start, end, count;
2534 	uint8_t ln2size;
2535 	uint8_t ln2range;
2536 	uint32_t testval;
2537 	uint16_t cmd;
2538 	int type;
2539 	int barlen;
2540 	struct resource *res;
2541 
2542 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2543 
2544         /* Disable access to device memory */
2545 	old_cmd = 0;
2546 	if (PCI_BAR_MEM(map)) {
2547 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2548 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2549 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2550 	}
2551 
2552 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2553 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2554 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2555 
2556         /* Restore memory access mode */
2557 	if (PCI_BAR_MEM(map)) {
2558 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2559 	}
2560 
2561 	if (PCI_BAR_MEM(map)) {
2562 		type = SYS_RES_MEMORY;
2563 		if (map & PCIM_BAR_MEM_PREFETCH)
2564 			prefetch = 1;
2565 	} else
2566 		type = SYS_RES_IOPORT;
2567 	ln2size = pci_mapsize(testval);
2568 	ln2range = pci_maprange(testval);
2569 	base = pci_mapbase(map);
2570 	barlen = ln2range == 64 ? 2 : 1;
2571 
2572 	/*
2573 	 * For I/O registers, if bottom bit is set, and the next bit up
2574 	 * isn't clear, we know we have a BAR that doesn't conform to the
2575 	 * spec, so ignore it.  Also, sanity check the size of the data
2576 	 * areas to the type of memory involved.  Memory must be at least
2577 	 * 16 bytes in size, while I/O ranges must be at least 4.
2578 	 */
2579 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2580 		return (barlen);
2581 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2582 	    (type == SYS_RES_IOPORT && ln2size < 2))
2583 		return (barlen);
2584 
2585 	if (ln2range == 64)
2586 		/* Read the other half of a 64bit map register */
2587 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2588 	if (bootverbose) {
2589 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2590 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2591 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2592 			kprintf(", port disabled\n");
2593 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2594 			kprintf(", memory disabled\n");
2595 		else
2596 			kprintf(", enabled\n");
2597 	}
2598 
2599 	/*
2600 	 * If base is 0, then we have problems.  It is best to ignore
2601 	 * such entries for the moment.  These will be allocated later if
2602 	 * the driver specifically requests them.  However, some
2603 	 * removable busses look better when all resources are allocated,
2604 	 * so allow '0' to be overridden.
2605 	 *
2606 	 * Similarly treat maps whose values is the same as the test value
2607 	 * read back.  These maps have had all f's written to them by the
2608 	 * BIOS in an attempt to disable the resources.
2609 	 */
2610 	if (!force && (base == 0 || map == testval))
2611 		return (barlen);
2612 	if ((u_long)base != base) {
2613 		device_printf(bus,
2614 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2615 		    pci_get_domain(dev), b, s, f, reg);
2616 		return (barlen);
2617 	}
2618 
2619 	/*
2620 	 * This code theoretically does the right thing, but has
2621 	 * undesirable side effects in some cases where peripherals
2622 	 * respond oddly to having these bits enabled.  Let the user
2623 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2624 	 * default).
2625 	 */
2626 	if (pci_enable_io_modes) {
2627 		/* Turn on resources that have been left off by a lazy BIOS */
2628 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2629 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2630 			cmd |= PCIM_CMD_PORTEN;
2631 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2632 		}
2633 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2634 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2635 			cmd |= PCIM_CMD_MEMEN;
2636 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2637 		}
2638 	} else {
2639 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2640 			return (barlen);
2641 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2642 			return (barlen);
2643 	}
2644 
2645 	count = 1 << ln2size;
2646 	if (base == 0 || base == pci_mapbase(testval)) {
2647 		start = 0;	/* Let the parent decide. */
2648 		end = ~0ULL;
2649 	} else {
2650 		start = base;
2651 		end = base + (1 << ln2size) - 1;
2652 	}
2653 	resource_list_add(rl, type, reg, start, end, count, -1);
2654 
2655 	/*
2656 	 * Try to allocate the resource for this BAR from our parent
2657 	 * so that this resource range is already reserved.  The
2658 	 * driver for this device will later inherit this resource in
2659 	 * pci_alloc_resource().
2660 	 */
2661 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2662 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2663 	if (res == NULL) {
2664 		/*
2665 		 * If the allocation fails, delete the resource list
2666 		 * entry to force pci_alloc_resource() to allocate
2667 		 * resources from the parent.
2668 		 */
2669 		resource_list_delete(rl, type, reg);
2670 #ifdef PCI_BAR_CLEAR
2671 		/* Clear the BAR */
2672 		start = 0;
2673 #else	/* !PCI_BAR_CLEAR */
2674 		/*
2675 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2676 		 * PCI function, clearing the BAR causes HPET timer
2677 		 * stop ticking.
2678 		 */
2679 		if (bootverbose) {
2680 			kprintf("pci:%d:%d:%d: resource reservation failed "
2681 				"%#jx - %#jx\n", b, s, f,
2682 				(intmax_t)start, (intmax_t)end);
2683 		}
2684 		return (barlen);
2685 #endif	/* PCI_BAR_CLEAR */
2686 	} else {
2687 		start = rman_get_start(res);
2688 	}
2689 	pci_write_config(dev, reg, start, 4);
2690 	if (ln2range == 64)
2691 		pci_write_config(dev, reg + 4, start >> 32, 4);
2692 	return (barlen);
2693 }
2694 
2695 /*
2696  * For ATA devices we need to decide early what addressing mode to use.
2697  * Legacy demands that the primary and secondary ATA ports sits on the
2698  * same addresses that old ISA hardware did. This dictates that we use
2699  * those addresses and ignore the BAR's if we cannot set PCI native
2700  * addressing mode.
2701  */
2702 static void
2703 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2704     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2705 {
2706 	int rid, type, progif;
2707 #if 0
2708 	/* if this device supports PCI native addressing use it */
2709 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2710 	if ((progif & 0x8a) == 0x8a) {
2711 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2712 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2713 			kprintf("Trying ATA native PCI addressing mode\n");
2714 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2715 		}
2716 	}
2717 #endif
2718 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2719 	type = SYS_RES_IOPORT;
2720 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2721 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2722 		    prefetchmask & (1 << 0));
2723 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2724 		    prefetchmask & (1 << 1));
2725 	} else {
2726 		rid = PCIR_BAR(0);
2727 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2728 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2729 		    0, -1);
2730 		rid = PCIR_BAR(1);
2731 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2732 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2733 		    0, -1);
2734 	}
2735 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2736 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2737 		    prefetchmask & (1 << 2));
2738 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2739 		    prefetchmask & (1 << 3));
2740 	} else {
2741 		rid = PCIR_BAR(2);
2742 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2743 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2744 		    0, -1);
2745 		rid = PCIR_BAR(3);
2746 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2747 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2748 		    0, -1);
2749 	}
2750 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2751 	    prefetchmask & (1 << 4));
2752 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2753 	    prefetchmask & (1 << 5));
2754 }
2755 
2756 static void
2757 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2758 {
2759 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2760 	pcicfgregs *cfg = &dinfo->cfg;
2761 	char tunable_name[64];
2762 	int irq;
2763 
2764 	/* Has to have an intpin to have an interrupt. */
2765 	if (cfg->intpin == 0)
2766 		return;
2767 
2768 	/* Let the user override the IRQ with a tunable. */
2769 	irq = PCI_INVALID_IRQ;
2770 	ksnprintf(tunable_name, sizeof(tunable_name),
2771 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2772 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2773 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2774 		if (irq >= 255 || irq <= 0) {
2775 			irq = PCI_INVALID_IRQ;
2776 		} else {
2777 			if (machintr_legacy_intr_find(irq,
2778 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2779 				device_printf(dev,
2780 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2781 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2782 				    cfg->intpin + 'A' - 1, irq);
2783 				irq = PCI_INVALID_IRQ;
2784 			} else {
2785 				BUS_CONFIG_INTR(bus, dev, irq,
2786 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2787 			}
2788 		}
2789 	}
2790 
2791 	/*
2792 	 * If we didn't get an IRQ via the tunable, then we either use the
2793 	 * IRQ value in the intline register or we ask the bus to route an
2794 	 * interrupt for us.  If force_route is true, then we only use the
2795 	 * value in the intline register if the bus was unable to assign an
2796 	 * IRQ.
2797 	 */
2798 	if (!PCI_INTERRUPT_VALID(irq)) {
2799 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2800 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2801 		if (!PCI_INTERRUPT_VALID(irq))
2802 			irq = cfg->intline;
2803 	}
2804 
2805 	/* If after all that we don't have an IRQ, just bail. */
2806 	if (!PCI_INTERRUPT_VALID(irq))
2807 		return;
2808 
2809 	/* Update the config register if it changed. */
2810 	if (irq != cfg->intline) {
2811 		cfg->intline = irq;
2812 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2813 	}
2814 
2815 	/* Add this IRQ as rid 0 interrupt resource. */
2816 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2817 	    machintr_legacy_intr_cpuid(irq));
2818 }
2819 
2820 /* Perform early OHCI takeover from SMM. */
2821 static void
2822 ohci_early_takeover(device_t self)
2823 {
2824 	struct resource *res;
2825 	uint32_t ctl;
2826 	int rid;
2827 	int i;
2828 
2829 	rid = PCIR_BAR(0);
2830 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2831 	if (res == NULL)
2832 		return;
2833 
2834 	ctl = bus_read_4(res, OHCI_CONTROL);
2835 	if (ctl & OHCI_IR) {
2836 		if (bootverbose)
2837 			kprintf("ohci early: "
2838 			    "SMM active, request owner change\n");
2839 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2840 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2841 			DELAY(1000);
2842 			ctl = bus_read_4(res, OHCI_CONTROL);
2843 		}
2844 		if (ctl & OHCI_IR) {
2845 			if (bootverbose)
2846 				kprintf("ohci early: "
2847 				    "SMM does not respond, resetting\n");
2848 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2849 		}
2850 		/* Disable interrupts */
2851 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2852 	}
2853 
2854 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2855 }
2856 
2857 /* Perform early UHCI takeover from SMM. */
2858 static void
2859 uhci_early_takeover(device_t self)
2860 {
2861 	struct resource *res;
2862 	int rid;
2863 
2864 	/*
2865 	 * Set the PIRQD enable bit and switch off all the others. We don't
2866 	 * want legacy support to interfere with us XXX Does this also mean
2867 	 * that the BIOS won't touch the keyboard anymore if it is connected
2868 	 * to the ports of the root hub?
2869 	 */
2870 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2871 
2872 	/* Disable interrupts */
2873 	rid = PCI_UHCI_BASE_REG;
2874 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2875 	if (res != NULL) {
2876 		bus_write_2(res, UHCI_INTR, 0);
2877 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2878 	}
2879 }
2880 
2881 /* Perform early EHCI takeover from SMM. */
2882 static void
2883 ehci_early_takeover(device_t self)
2884 {
2885 	struct resource *res;
2886 	uint32_t cparams;
2887 	uint32_t eec;
2888 	uint32_t eecp;
2889 	uint32_t bios_sem;
2890 	uint32_t offs;
2891 	int rid;
2892 	int i;
2893 
2894 	rid = PCIR_BAR(0);
2895 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2896 	if (res == NULL)
2897 		return;
2898 
2899 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2900 
2901 	/* Synchronise with the BIOS if it owns the controller. */
2902 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2903 	    eecp = EHCI_EECP_NEXT(eec)) {
2904 		eec = pci_read_config(self, eecp, 4);
2905 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2906 			continue;
2907 		}
2908 		bios_sem = pci_read_config(self, eecp +
2909 		    EHCI_LEGSUP_BIOS_SEM, 1);
2910 		if (bios_sem == 0) {
2911 			continue;
2912 		}
2913 		if (bootverbose)
2914 			kprintf("ehci early: "
2915 			    "SMM active, request owner change\n");
2916 
2917 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2918 
2919 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2920 			DELAY(1000);
2921 			bios_sem = pci_read_config(self, eecp +
2922 			    EHCI_LEGSUP_BIOS_SEM, 1);
2923 		}
2924 
2925 		if (bios_sem != 0) {
2926 			if (bootverbose)
2927 				kprintf("ehci early: "
2928 				    "SMM does not respond\n");
2929 		}
2930 		/* Disable interrupts */
2931 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2932 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2933 	}
2934 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2935 }
2936 
2937 /* Perform early XHCI takeover from SMM. */
2938 static void
2939 xhci_early_takeover(device_t self)
2940 {
2941 	struct resource *res;
2942 	uint32_t cparams;
2943 	uint32_t eec;
2944 	uint32_t eecp;
2945 	uint32_t bios_sem;
2946 	uint32_t offs;
2947 	int rid;
2948 	int i;
2949 
2950 	rid = PCIR_BAR(0);
2951 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2952 	if (res == NULL)
2953 		return;
2954 
2955 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2956 
2957 	eec = -1;
2958 
2959 	/* Synchronise with the BIOS if it owns the controller. */
2960 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2961 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2962 		eec = bus_read_4(res, eecp);
2963 
2964 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2965 			continue;
2966 
2967 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2968 
2969 		if (bios_sem == 0) {
2970 			if (bootverbose)
2971 				kprintf("xhci early: xhci is not owned by SMM\n");
2972 
2973 			continue;
2974 		}
2975 
2976 		if (bootverbose)
2977 			kprintf("xhci early: "
2978 			    "SMM active, request owner change\n");
2979 
2980 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2981 
2982 		/* wait a maximum of 5 seconds */
2983 
2984 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2985 			DELAY(1000);
2986 
2987 			bios_sem = bus_read_1(res, eecp +
2988 			    XHCI_XECP_BIOS_SEM);
2989 		}
2990 
2991 		if (bios_sem != 0) {
2992 			if (bootverbose) {
2993 				kprintf("xhci early: "
2994 				    "SMM does not respond\n");
2995 				kprintf("xhci early: "
2996 				    "taking xhci by force\n");
2997 			}
2998 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2999 		} else {
3000 			if (bootverbose)
3001 				kprintf("xhci early: "
3002 				    "handover successful\n");
3003 		}
3004 
3005 		/* Disable interrupts */
3006 		offs = bus_read_1(res, XHCI_CAPLENGTH);
3007 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3008 		bus_read_4(res, offs + XHCI_USBSTS);
3009 	}
3010 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3011 }
3012 
3013 void
3014 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
3015 {
3016 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3017 	pcicfgregs *cfg = &dinfo->cfg;
3018 	struct resource_list *rl = &dinfo->resources;
3019 	struct pci_quirk *q;
3020 	int b, i, f, s;
3021 
3022 	b = cfg->bus;
3023 	s = cfg->slot;
3024 	f = cfg->func;
3025 
3026 	/* ATA devices needs special map treatment */
3027 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3028 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3029 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3030 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3031 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3032 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
3033 	else
3034 		for (i = 0; i < cfg->nummaps;)
3035 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
3036 			    rl, force, prefetchmask & (1 << i));
3037 
3038 	/*
3039 	 * Add additional, quirked resources.
3040 	 */
3041 	for (q = &pci_quirks[0]; q->devid; q++) {
3042 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3043 		    && q->type == PCI_QUIRK_MAP_REG)
3044 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3045 			  force, 0);
3046 	}
3047 
3048 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3049 		/*
3050 		 * Try to re-route interrupts. Sometimes the BIOS or
3051 		 * firmware may leave bogus values in these registers.
3052 		 * If the re-route fails, then just stick with what we
3053 		 * have.
3054 		 */
3055 		pci_assign_interrupt(bus, dev, 1);
3056 	}
3057 
3058 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3059 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3060 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3061 			xhci_early_takeover(dev);
3062 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3063 			ehci_early_takeover(dev);
3064 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3065 			ohci_early_takeover(dev);
3066 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3067 			uhci_early_takeover(dev);
3068 	}
3069 }
3070 
3071 void
3072 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3073 {
3074 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3075 	device_t pcib = device_get_parent(dev);
3076 	struct pci_devinfo *dinfo;
3077 	int maxslots;
3078 	int s, f, pcifunchigh;
3079 	uint8_t hdrtype;
3080 
3081 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3082 	    ("dinfo_size too small"));
3083 	maxslots = PCIB_MAXSLOTS(pcib);
3084 	for (s = 0; s <= maxslots; s++) {
3085 		pcifunchigh = 0;
3086 		f = 0;
3087 		DELAY(1);
3088 		hdrtype = REG(PCIR_HDRTYPE, 1);
3089 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3090 			continue;
3091 		if (hdrtype & PCIM_MFDEV)
3092 			pcifunchigh = PCI_FUNCMAX;
3093 		for (f = 0; f <= pcifunchigh; f++) {
3094 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3095 			    dinfo_size);
3096 			if (dinfo != NULL) {
3097 				pci_add_child(dev, dinfo);
3098 			}
3099 		}
3100 	}
3101 #undef REG
3102 }
3103 
3104 void
3105 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3106 {
3107 	device_t pcib;
3108 
3109 	pcib = device_get_parent(bus);
3110 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3111 	device_set_ivars(dinfo->cfg.dev, dinfo);
3112 	resource_list_init(&dinfo->resources);
3113 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3114 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3115 	pci_print_verbose(dinfo);
3116 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3117 }
3118 
3119 static int
3120 pci_probe(device_t dev)
3121 {
3122 	device_set_desc(dev, "PCI bus");
3123 
3124 	/* Allow other subclasses to override this driver. */
3125 	return (-1000);
3126 }
3127 
3128 static int
3129 pci_attach(device_t dev)
3130 {
3131 	int busno, domain;
3132 
3133 	/*
3134 	 * Since there can be multiple independantly numbered PCI
3135 	 * busses on systems with multiple PCI domains, we can't use
3136 	 * the unit number to decide which bus we are probing. We ask
3137 	 * the parent pcib what our domain and bus numbers are.
3138 	 */
3139 	domain = pcib_get_domain(dev);
3140 	busno = pcib_get_bus(dev);
3141 	if (bootverbose)
3142 		device_printf(dev, "domain=%d, physical bus=%d\n",
3143 		    domain, busno);
3144 
3145 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3146 
3147 	return (bus_generic_attach(dev));
3148 }
3149 
3150 int
3151 pci_suspend(device_t dev)
3152 {
3153 	int dstate, error, i, numdevs;
3154 	device_t acpi_dev, child, *devlist;
3155 	struct pci_devinfo *dinfo;
3156 
3157 	/*
3158 	 * Save the PCI configuration space for each child and set the
3159 	 * device in the appropriate power state for this sleep state.
3160 	 */
3161 	acpi_dev = NULL;
3162 	if (pci_do_power_resume)
3163 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3164 	device_get_children(dev, &devlist, &numdevs);
3165 	for (i = 0; i < numdevs; i++) {
3166 		child = devlist[i];
3167 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3168 		pci_cfg_save(child, dinfo, 0);
3169 	}
3170 
3171 	/* Suspend devices before potentially powering them down. */
3172 	error = bus_generic_suspend(dev);
3173 	if (error) {
3174 		kfree(devlist, M_TEMP);
3175 		return (error);
3176 	}
3177 
3178 	/*
3179 	 * Always set the device to D3.  If ACPI suggests a different
3180 	 * power state, use it instead.  If ACPI is not present, the
3181 	 * firmware is responsible for managing device power.  Skip
3182 	 * children who aren't attached since they are powered down
3183 	 * separately.  Only manage type 0 devices for now.
3184 	 */
3185 	for (i = 0; acpi_dev && i < numdevs; i++) {
3186 		child = devlist[i];
3187 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3188 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3189 			dstate = PCI_POWERSTATE_D3;
3190 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3191 			pci_set_powerstate(child, dstate);
3192 		}
3193 	}
3194 	kfree(devlist, M_TEMP);
3195 	return (0);
3196 }
3197 
3198 int
3199 pci_resume(device_t dev)
3200 {
3201 	int i, numdevs;
3202 	device_t acpi_dev, child, *devlist;
3203 	struct pci_devinfo *dinfo;
3204 
3205 	/*
3206 	 * Set each child to D0 and restore its PCI configuration space.
3207 	 */
3208 	acpi_dev = NULL;
3209 	if (pci_do_power_resume)
3210 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3211 	device_get_children(dev, &devlist, &numdevs);
3212 	for (i = 0; i < numdevs; i++) {
3213 		/*
3214 		 * Notify ACPI we're going to D0 but ignore the result.  If
3215 		 * ACPI is not present, the firmware is responsible for
3216 		 * managing device power.  Only manage type 0 devices for now.
3217 		 */
3218 		child = devlist[i];
3219 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3220 		if (acpi_dev && device_is_attached(child) &&
3221 		    dinfo->cfg.hdrtype == 0) {
3222 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3223 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3224 		}
3225 
3226 		/* Now the device is powered up, restore its config space. */
3227 		pci_cfg_restore(child, dinfo);
3228 	}
3229 	kfree(devlist, M_TEMP);
3230 	return (bus_generic_resume(dev));
3231 }
3232 
3233 static void
3234 pci_load_vendor_data(void)
3235 {
3236 	caddr_t vendordata, info;
3237 
3238 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3239 		info = preload_search_info(vendordata, MODINFO_ADDR);
3240 		pci_vendordata = *(char **)info;
3241 		info = preload_search_info(vendordata, MODINFO_SIZE);
3242 		pci_vendordata_size = *(size_t *)info;
3243 		/* terminate the database */
3244 		pci_vendordata[pci_vendordata_size] = '\n';
3245 	}
3246 }
3247 
3248 void
3249 pci_driver_added(device_t dev, driver_t *driver)
3250 {
3251 	int numdevs;
3252 	device_t *devlist;
3253 	device_t child;
3254 	struct pci_devinfo *dinfo;
3255 	int i;
3256 
3257 	if (bootverbose)
3258 		device_printf(dev, "driver added\n");
3259 	DEVICE_IDENTIFY(driver, dev);
3260 	device_get_children(dev, &devlist, &numdevs);
3261 	for (i = 0; i < numdevs; i++) {
3262 		child = devlist[i];
3263 		if (device_get_state(child) != DS_NOTPRESENT)
3264 			continue;
3265 		dinfo = device_get_ivars(child);
3266 		pci_print_verbose(dinfo);
3267 		if (bootverbose)
3268 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3269 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3270 			    dinfo->cfg.func);
3271 		pci_cfg_restore(child, dinfo);
3272 		if (device_probe_and_attach(child) != 0)
3273 			pci_cfg_save(child, dinfo, 1);
3274 	}
3275 	kfree(devlist, M_TEMP);
3276 }
3277 
3278 static void
3279 pci_child_detached(device_t parent __unused, device_t child)
3280 {
3281 	/* Turn child's power off */
3282 	pci_cfg_save(child, device_get_ivars(child), 1);
3283 }
3284 
3285 int
3286 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3287     driver_intr_t *intr, void *arg, void **cookiep,
3288     lwkt_serialize_t serializer, const char *desc)
3289 {
3290 	int rid, error;
3291 	void *cookie;
3292 
3293 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3294 	    arg, &cookie, serializer, desc);
3295 	if (error)
3296 		return (error);
3297 
3298 	/* If this is not a direct child, just bail out. */
3299 	if (device_get_parent(child) != dev) {
3300 		*cookiep = cookie;
3301 		return(0);
3302 	}
3303 
3304 	rid = rman_get_rid(irq);
3305 	if (rid == 0) {
3306 		/* Make sure that INTx is enabled */
3307 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3308 	} else {
3309 		struct pci_devinfo *dinfo = device_get_ivars(child);
3310 		uint64_t addr;
3311 		uint32_t data;
3312 
3313 		/*
3314 		 * Check to see if the interrupt is MSI or MSI-X.
3315 		 * Ask our parent to map the MSI and give
3316 		 * us the address and data register values.
3317 		 * If we fail for some reason, teardown the
3318 		 * interrupt handler.
3319 		 */
3320 		if (dinfo->cfg.msi.msi_alloc > 0) {
3321 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3322 
3323 			if (msi->msi_addr == 0) {
3324 				KASSERT(msi->msi_handlers == 0,
3325 			    ("MSI has handlers, but vectors not mapped"));
3326 				error = PCIB_MAP_MSI(device_get_parent(dev),
3327 				    child, rman_get_start(irq), &addr, &data,
3328 				    rman_get_cpuid(irq));
3329 				if (error)
3330 					goto bad;
3331 				msi->msi_addr = addr;
3332 				msi->msi_data = data;
3333 				pci_enable_msi(child, addr, data);
3334 			}
3335 			msi->msi_handlers++;
3336 		} else {
3337 			struct msix_vector *mv;
3338 			u_int vector;
3339 
3340 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3341 			    ("No MSI-X or MSI rid %d allocated", rid));
3342 
3343 			mv = pci_find_msix_vector(child, rid);
3344 			KASSERT(mv != NULL,
3345 			    ("MSI-X rid %d is not allocated", rid));
3346 			KASSERT(mv->mv_address == 0,
3347 			    ("MSI-X rid %d has been setup", rid));
3348 
3349 			error = PCIB_MAP_MSI(device_get_parent(dev),
3350 			    child, rman_get_start(irq), &addr, &data,
3351 			    rman_get_cpuid(irq));
3352 			if (error)
3353 				goto bad;
3354 			mv->mv_address = addr;
3355 			mv->mv_data = data;
3356 
3357 			vector = PCI_MSIX_RID2VEC(rid);
3358 			pci_setup_msix_vector(child, vector,
3359 			    mv->mv_address, mv->mv_data);
3360 			pci_unmask_msix_vector(child, vector);
3361 		}
3362 
3363 		/*
3364 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3365 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3366 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3367 		 */
3368 		if (!pci_has_quirk(pci_get_devid(child),
3369 		    PCI_QUIRK_MSI_INTX_BUG))
3370 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3371 		else
3372 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3373 	bad:
3374 		if (error) {
3375 			(void)bus_generic_teardown_intr(dev, child, irq,
3376 			    cookie);
3377 			return (error);
3378 		}
3379 	}
3380 	*cookiep = cookie;
3381 	return (0);
3382 }
3383 
3384 int
3385 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3386     void *cookie)
3387 {
3388 	int rid, error;
3389 
3390 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3391 		return (EINVAL);
3392 
3393 	/* If this isn't a direct child, just bail out */
3394 	if (device_get_parent(child) != dev)
3395 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3396 
3397 	rid = rman_get_rid(irq);
3398 	if (rid == 0) {
3399 		/* Mask INTx */
3400 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3401 	} else {
3402 		struct pci_devinfo *dinfo = device_get_ivars(child);
3403 
3404 		/*
3405 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3406 		 * decrement the appropriate handlers count and mask the
3407 		 * MSI-X message, or disable MSI messages if the count
3408 		 * drops to 0.
3409 		 */
3410 		if (dinfo->cfg.msi.msi_alloc > 0) {
3411 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3412 
3413 			KASSERT(rid <= msi->msi_alloc,
3414 			    ("MSI-X index too high"));
3415 			KASSERT(msi->msi_handlers > 0,
3416 			    ("MSI rid %d is not setup", rid));
3417 
3418 			msi->msi_handlers--;
3419 			if (msi->msi_handlers == 0)
3420 				pci_disable_msi(child);
3421 		} else {
3422 			struct msix_vector *mv;
3423 
3424 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3425 			    ("No MSI or MSI-X rid %d allocated", rid));
3426 
3427 			mv = pci_find_msix_vector(child, rid);
3428 			KASSERT(mv != NULL,
3429 			    ("MSI-X rid %d is not allocated", rid));
3430 			KASSERT(mv->mv_address != 0,
3431 			    ("MSI-X rid %d has not been setup", rid));
3432 
3433 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3434 			mv->mv_address = 0;
3435 			mv->mv_data = 0;
3436 		}
3437 	}
3438 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3439 	if (rid > 0)
3440 		KASSERT(error == 0,
3441 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3442 	return (error);
3443 }
3444 
3445 int
3446 pci_print_child(device_t dev, device_t child)
3447 {
3448 	struct pci_devinfo *dinfo;
3449 	struct resource_list *rl;
3450 	int retval = 0;
3451 
3452 	dinfo = device_get_ivars(child);
3453 	rl = &dinfo->resources;
3454 
3455 	retval += bus_print_child_header(dev, child);
3456 
3457 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3458 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3459 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3460 	if (device_get_flags(dev))
3461 		retval += kprintf(" flags %#x", device_get_flags(dev));
3462 
3463 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3464 	    pci_get_function(child));
3465 
3466 	retval += bus_print_child_footer(dev, child);
3467 
3468 	return (retval);
3469 }
3470 
3471 static struct
3472 {
3473 	int	class;
3474 	int	subclass;
3475 	char	*desc;
3476 } pci_nomatch_tab[] = {
3477 	{PCIC_OLD,		-1,			"old"},
3478 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3479 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3480 	{PCIC_STORAGE,		-1,			"mass storage"},
3481 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3482 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3483 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3484 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3485 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3486 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3487 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3488 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3489 	{PCIC_NETWORK,		-1,			"network"},
3490 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3491 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3492 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3493 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3494 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3495 	{PCIC_DISPLAY,		-1,			"display"},
3496 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3497 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3498 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3499 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3500 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3501 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3502 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3503 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3504 	{PCIC_MEMORY,		-1,			"memory"},
3505 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3506 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3507 	{PCIC_BRIDGE,		-1,			"bridge"},
3508 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3509 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3510 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3511 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3512 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3513 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3514 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3515 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3516 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3517 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3518 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3519 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3520 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3521 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3522 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3523 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3524 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3525 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3526 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3527 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3528 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3529 	{PCIC_INPUTDEV,		-1,			"input device"},
3530 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3531 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3532 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3533 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3534 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3535 	{PCIC_DOCKING,		-1,			"docking station"},
3536 	{PCIC_PROCESSOR,	-1,			"processor"},
3537 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3538 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3539 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3540 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3541 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3542 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3543 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3544 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3545 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3546 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3547 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3548 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3549 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3550 	{PCIC_SATCOM,		-1,			"satellite communication"},
3551 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3552 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3553 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3554 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3555 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3556 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3557 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3558 	{PCIC_DASP,		-1,			"dasp"},
3559 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3560 	{0, 0,		NULL}
3561 };
3562 
3563 void
3564 pci_probe_nomatch(device_t dev, device_t child)
3565 {
3566 	int	i;
3567 	char	*cp, *scp, *device;
3568 
3569 	/*
3570 	 * Look for a listing for this device in a loaded device database.
3571 	 */
3572 	if ((device = pci_describe_device(child)) != NULL) {
3573 		device_printf(dev, "<%s>", device);
3574 		kfree(device, M_DEVBUF);
3575 	} else {
3576 		/*
3577 		 * Scan the class/subclass descriptions for a general
3578 		 * description.
3579 		 */
3580 		cp = "unknown";
3581 		scp = NULL;
3582 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3583 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3584 				if (pci_nomatch_tab[i].subclass == -1) {
3585 					cp = pci_nomatch_tab[i].desc;
3586 				} else if (pci_nomatch_tab[i].subclass ==
3587 				    pci_get_subclass(child)) {
3588 					scp = pci_nomatch_tab[i].desc;
3589 				}
3590 			}
3591 		}
3592 		device_printf(dev, "<%s%s%s>",
3593 		    cp ? cp : "",
3594 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3595 		    scp ? scp : "");
3596 	}
3597 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3598 		pci_get_vendor(child), pci_get_device(child),
3599 		pci_get_slot(child), pci_get_function(child));
3600 	if (pci_get_intpin(child) > 0) {
3601 		int irq;
3602 
3603 		irq = pci_get_irq(child);
3604 		if (PCI_INTERRUPT_VALID(irq))
3605 			kprintf(" irq %d", irq);
3606 	}
3607 	kprintf("\n");
3608 
3609 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3610 }
3611 
3612 /*
3613  * Parse the PCI device database, if loaded, and return a pointer to a
3614  * description of the device.
3615  *
3616  * The database is flat text formatted as follows:
3617  *
3618  * Any line not in a valid format is ignored.
3619  * Lines are terminated with newline '\n' characters.
3620  *
3621  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3622  * the vendor name.
3623  *
3624  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3625  * - devices cannot be listed without a corresponding VENDOR line.
3626  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3627  * another TAB, then the device name.
3628  */
3629 
3630 /*
3631  * Assuming (ptr) points to the beginning of a line in the database,
3632  * return the vendor or device and description of the next entry.
3633  * The value of (vendor) or (device) inappropriate for the entry type
3634  * is set to -1.  Returns nonzero at the end of the database.
3635  *
3636  * Note that this is slightly unrobust in the face of corrupt data;
3637  * we attempt to safeguard against this by spamming the end of the
3638  * database with a newline when we initialise.
3639  */
3640 static int
3641 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3642 {
3643 	char	*cp = *ptr;
3644 	int	left;
3645 
3646 	*device = -1;
3647 	*vendor = -1;
3648 	**desc = '\0';
3649 	for (;;) {
3650 		left = pci_vendordata_size - (cp - pci_vendordata);
3651 		if (left <= 0) {
3652 			*ptr = cp;
3653 			return(1);
3654 		}
3655 
3656 		/* vendor entry? */
3657 		if (*cp != '\t' &&
3658 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3659 			break;
3660 		/* device entry? */
3661 		if (*cp == '\t' &&
3662 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3663 			break;
3664 
3665 		/* skip to next line */
3666 		while (*cp != '\n' && left > 0) {
3667 			cp++;
3668 			left--;
3669 		}
3670 		if (*cp == '\n') {
3671 			cp++;
3672 			left--;
3673 		}
3674 	}
3675 	/* skip to next line */
3676 	while (*cp != '\n' && left > 0) {
3677 		cp++;
3678 		left--;
3679 	}
3680 	if (*cp == '\n' && left > 0)
3681 		cp++;
3682 	*ptr = cp;
3683 	return(0);
3684 }
3685 
3686 static char *
3687 pci_describe_device(device_t dev)
3688 {
3689 	int	vendor, device;
3690 	char	*desc, *vp, *dp, *line;
3691 
3692 	desc = vp = dp = NULL;
3693 
3694 	/*
3695 	 * If we have no vendor data, we can't do anything.
3696 	 */
3697 	if (pci_vendordata == NULL)
3698 		goto out;
3699 
3700 	/*
3701 	 * Scan the vendor data looking for this device
3702 	 */
3703 	line = pci_vendordata;
3704 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3705 		goto out;
3706 	for (;;) {
3707 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3708 			goto out;
3709 		if (vendor == pci_get_vendor(dev))
3710 			break;
3711 	}
3712 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3713 		goto out;
3714 	for (;;) {
3715 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3716 			*dp = 0;
3717 			break;
3718 		}
3719 		if (vendor != -1) {
3720 			*dp = 0;
3721 			break;
3722 		}
3723 		if (device == pci_get_device(dev))
3724 			break;
3725 	}
3726 	if (dp[0] == '\0')
3727 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3728 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3729 	    NULL)
3730 		ksprintf(desc, "%s, %s", vp, dp);
3731  out:
3732 	if (vp != NULL)
3733 		kfree(vp, M_DEVBUF);
3734 	if (dp != NULL)
3735 		kfree(dp, M_DEVBUF);
3736 	return(desc);
3737 }
3738 
3739 int
3740 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3741 {
3742 	struct pci_devinfo *dinfo;
3743 	pcicfgregs *cfg;
3744 
3745 	dinfo = device_get_ivars(child);
3746 	cfg = &dinfo->cfg;
3747 
3748 	switch (which) {
3749 	case PCI_IVAR_ETHADDR:
3750 		/*
3751 		 * The generic accessor doesn't deal with failure, so
3752 		 * we set the return value, then return an error.
3753 		 */
3754 		*((uint8_t **) result) = NULL;
3755 		return (EINVAL);
3756 	case PCI_IVAR_SUBVENDOR:
3757 		*result = cfg->subvendor;
3758 		break;
3759 	case PCI_IVAR_SUBDEVICE:
3760 		*result = cfg->subdevice;
3761 		break;
3762 	case PCI_IVAR_VENDOR:
3763 		*result = cfg->vendor;
3764 		break;
3765 	case PCI_IVAR_DEVICE:
3766 		*result = cfg->device;
3767 		break;
3768 	case PCI_IVAR_DEVID:
3769 		*result = (cfg->device << 16) | cfg->vendor;
3770 		break;
3771 	case PCI_IVAR_CLASS:
3772 		*result = cfg->baseclass;
3773 		break;
3774 	case PCI_IVAR_SUBCLASS:
3775 		*result = cfg->subclass;
3776 		break;
3777 	case PCI_IVAR_PROGIF:
3778 		*result = cfg->progif;
3779 		break;
3780 	case PCI_IVAR_REVID:
3781 		*result = cfg->revid;
3782 		break;
3783 	case PCI_IVAR_INTPIN:
3784 		*result = cfg->intpin;
3785 		break;
3786 	case PCI_IVAR_IRQ:
3787 		*result = cfg->intline;
3788 		break;
3789 	case PCI_IVAR_DOMAIN:
3790 		*result = cfg->domain;
3791 		break;
3792 	case PCI_IVAR_BUS:
3793 		*result = cfg->bus;
3794 		break;
3795 	case PCI_IVAR_SLOT:
3796 		*result = cfg->slot;
3797 		break;
3798 	case PCI_IVAR_FUNCTION:
3799 		*result = cfg->func;
3800 		break;
3801 	case PCI_IVAR_CMDREG:
3802 		*result = cfg->cmdreg;
3803 		break;
3804 	case PCI_IVAR_CACHELNSZ:
3805 		*result = cfg->cachelnsz;
3806 		break;
3807 	case PCI_IVAR_MINGNT:
3808 		*result = cfg->mingnt;
3809 		break;
3810 	case PCI_IVAR_MAXLAT:
3811 		*result = cfg->maxlat;
3812 		break;
3813 	case PCI_IVAR_LATTIMER:
3814 		*result = cfg->lattimer;
3815 		break;
3816 	case PCI_IVAR_PCIXCAP_PTR:
3817 		*result = cfg->pcix.pcix_ptr;
3818 		break;
3819 	case PCI_IVAR_PCIECAP_PTR:
3820 		*result = cfg->expr.expr_ptr;
3821 		break;
3822 	case PCI_IVAR_VPDCAP_PTR:
3823 		*result = cfg->vpd.vpd_reg;
3824 		break;
3825 	default:
3826 		return (ENOENT);
3827 	}
3828 	return (0);
3829 }
3830 
3831 int
3832 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3833 {
3834 	struct pci_devinfo *dinfo;
3835 
3836 	dinfo = device_get_ivars(child);
3837 
3838 	switch (which) {
3839 	case PCI_IVAR_INTPIN:
3840 		dinfo->cfg.intpin = value;
3841 		return (0);
3842 	case PCI_IVAR_ETHADDR:
3843 	case PCI_IVAR_SUBVENDOR:
3844 	case PCI_IVAR_SUBDEVICE:
3845 	case PCI_IVAR_VENDOR:
3846 	case PCI_IVAR_DEVICE:
3847 	case PCI_IVAR_DEVID:
3848 	case PCI_IVAR_CLASS:
3849 	case PCI_IVAR_SUBCLASS:
3850 	case PCI_IVAR_PROGIF:
3851 	case PCI_IVAR_REVID:
3852 	case PCI_IVAR_IRQ:
3853 	case PCI_IVAR_DOMAIN:
3854 	case PCI_IVAR_BUS:
3855 	case PCI_IVAR_SLOT:
3856 	case PCI_IVAR_FUNCTION:
3857 		return (EINVAL);	/* disallow for now */
3858 
3859 	default:
3860 		return (ENOENT);
3861 	}
3862 }
3863 #ifdef notyet
3864 #include "opt_ddb.h"
3865 #ifdef DDB
3866 #include <ddb/ddb.h>
3867 #include <sys/cons.h>
3868 
3869 /*
3870  * List resources based on pci map registers, used for within ddb
3871  */
3872 
3873 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3874 {
3875 	struct pci_devinfo *dinfo;
3876 	struct devlist *devlist_head;
3877 	struct pci_conf *p;
3878 	const char *name;
3879 	int i, error, none_count;
3880 
3881 	none_count = 0;
3882 	/* get the head of the device queue */
3883 	devlist_head = &pci_devq;
3884 
3885 	/*
3886 	 * Go through the list of devices and print out devices
3887 	 */
3888 	for (error = 0, i = 0,
3889 	     dinfo = STAILQ_FIRST(devlist_head);
3890 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3891 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3892 
3893 		/* Populate pd_name and pd_unit */
3894 		name = NULL;
3895 		if (dinfo->cfg.dev)
3896 			name = device_get_name(dinfo->cfg.dev);
3897 
3898 		p = &dinfo->conf;
3899 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3900 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3901 			(name && *name) ? name : "none",
3902 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3903 			none_count++,
3904 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3905 			p->pc_sel.pc_func, (p->pc_class << 16) |
3906 			(p->pc_subclass << 8) | p->pc_progif,
3907 			(p->pc_subdevice << 16) | p->pc_subvendor,
3908 			(p->pc_device << 16) | p->pc_vendor,
3909 			p->pc_revid, p->pc_hdr);
3910 	}
3911 }
3912 #endif /* DDB */
3913 #endif
3914 
3915 static struct resource *
3916 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3917     u_long start, u_long end, u_long count, u_int flags)
3918 {
3919 	struct pci_devinfo *dinfo = device_get_ivars(child);
3920 	struct resource_list *rl = &dinfo->resources;
3921 	struct resource_list_entry *rle;
3922 	struct resource *res;
3923 	pci_addr_t map, testval;
3924 	int mapsize;
3925 
3926 	/*
3927 	 * Weed out the bogons, and figure out how large the BAR/map
3928 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3929 	 * Note: atapci in legacy mode are special and handled elsewhere
3930 	 * in the code.  If you have a atapci device in legacy mode and
3931 	 * it fails here, that other code is broken.
3932 	 */
3933 	res = NULL;
3934 	map = pci_read_config(child, *rid, 4);
3935 	pci_write_config(child, *rid, 0xffffffff, 4);
3936 	testval = pci_read_config(child, *rid, 4);
3937 	if (pci_maprange(testval) == 64)
3938 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3939 	if (pci_mapbase(testval) == 0)
3940 		goto out;
3941 
3942 	/*
3943 	 * Restore the original value of the BAR.  We may have reprogrammed
3944 	 * the BAR of the low-level console device and when booting verbose,
3945 	 * we need the console device addressable.
3946 	 */
3947 	pci_write_config(child, *rid, map, 4);
3948 
3949 	if (PCI_BAR_MEM(testval)) {
3950 		if (type != SYS_RES_MEMORY) {
3951 			if (bootverbose)
3952 				device_printf(dev,
3953 				    "child %s requested type %d for rid %#x,"
3954 				    " but the BAR says it is an memio\n",
3955 				    device_get_nameunit(child), type, *rid);
3956 			goto out;
3957 		}
3958 	} else {
3959 		if (type != SYS_RES_IOPORT) {
3960 			if (bootverbose)
3961 				device_printf(dev,
3962 				    "child %s requested type %d for rid %#x,"
3963 				    " but the BAR says it is an ioport\n",
3964 				    device_get_nameunit(child), type, *rid);
3965 			goto out;
3966 		}
3967 	}
3968 	/*
3969 	 * For real BARs, we need to override the size that
3970 	 * the driver requests, because that's what the BAR
3971 	 * actually uses and we would otherwise have a
3972 	 * situation where we might allocate the excess to
3973 	 * another driver, which won't work.
3974 	 */
3975 	mapsize = pci_mapsize(testval);
3976 	count = 1UL << mapsize;
3977 	if (RF_ALIGNMENT(flags) < mapsize)
3978 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3979 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3980 		flags |= RF_PREFETCHABLE;
3981 
3982 	/*
3983 	 * Allocate enough resource, and then write back the
3984 	 * appropriate bar for that resource.
3985 	 */
3986 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3987 	    start, end, count, flags, -1);
3988 	if (res == NULL) {
3989 		device_printf(child,
3990 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3991 		    count, *rid, type, start, end);
3992 		goto out;
3993 	}
3994 	resource_list_add(rl, type, *rid, start, end, count, -1);
3995 	rle = resource_list_find(rl, type, *rid);
3996 	if (rle == NULL)
3997 		panic("pci_alloc_map: unexpectedly can't find resource.");
3998 	rle->res = res;
3999 	rle->start = rman_get_start(res);
4000 	rle->end = rman_get_end(res);
4001 	rle->count = count;
4002 	if (bootverbose)
4003 		device_printf(child,
4004 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
4005 		    count, *rid, type, rman_get_start(res));
4006 	map = rman_get_start(res);
4007 out:;
4008 	pci_write_config(child, *rid, map, 4);
4009 	if (pci_maprange(testval) == 64)
4010 		pci_write_config(child, *rid + 4, map >> 32, 4);
4011 	return (res);
4012 }
4013 
4014 
4015 struct resource *
4016 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4017     u_long start, u_long end, u_long count, u_int flags, int cpuid)
4018 {
4019 	struct pci_devinfo *dinfo = device_get_ivars(child);
4020 	struct resource_list *rl = &dinfo->resources;
4021 	struct resource_list_entry *rle;
4022 	pcicfgregs *cfg = &dinfo->cfg;
4023 
4024 	/*
4025 	 * Perform lazy resource allocation
4026 	 */
4027 	if (device_get_parent(child) == dev) {
4028 		switch (type) {
4029 		case SYS_RES_IRQ:
4030 			/*
4031 			 * Can't alloc legacy interrupt once MSI messages
4032 			 * have been allocated.
4033 			 */
4034 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4035 			    cfg->msix.msix_alloc > 0))
4036 				return (NULL);
4037 			/*
4038 			 * If the child device doesn't have an
4039 			 * interrupt routed and is deserving of an
4040 			 * interrupt, try to assign it one.
4041 			 */
4042 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4043 			    (cfg->intpin != 0))
4044 				pci_assign_interrupt(dev, child, 0);
4045 			break;
4046 		case SYS_RES_IOPORT:
4047 		case SYS_RES_MEMORY:
4048 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4049 				/*
4050 				 * Enable the I/O mode.  We should
4051 				 * also be assigning resources too
4052 				 * when none are present.  The
4053 				 * resource_list_alloc kind of sorta does
4054 				 * this...
4055 				 */
4056 				if (PCI_ENABLE_IO(dev, child, type))
4057 					return (NULL);
4058 			}
4059 			rle = resource_list_find(rl, type, *rid);
4060 			if (rle == NULL)
4061 				return (pci_alloc_map(dev, child, type, rid,
4062 				    start, end, count, flags));
4063 			break;
4064 		}
4065 		/*
4066 		 * If we've already allocated the resource, then
4067 		 * return it now.  But first we may need to activate
4068 		 * it, since we don't allocate the resource as active
4069 		 * above.  Normally this would be done down in the
4070 		 * nexus, but since we short-circuit that path we have
4071 		 * to do its job here.  Not sure if we should kfree the
4072 		 * resource if it fails to activate.
4073 		 */
4074 		rle = resource_list_find(rl, type, *rid);
4075 		if (rle != NULL && rle->res != NULL) {
4076 			if (bootverbose)
4077 				device_printf(child,
4078 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4079 				    rman_get_size(rle->res), *rid, type,
4080 				    rman_get_start(rle->res));
4081 			if ((flags & RF_ACTIVE) &&
4082 			    bus_generic_activate_resource(dev, child, type,
4083 			    *rid, rle->res) != 0)
4084 				return (NULL);
4085 			return (rle->res);
4086 		}
4087 	}
4088 	return (resource_list_alloc(rl, dev, child, type, rid,
4089 	    start, end, count, flags, cpuid));
4090 }
4091 
4092 void
4093 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4094 {
4095 	struct pci_devinfo *dinfo;
4096 	struct resource_list *rl;
4097 	struct resource_list_entry *rle;
4098 
4099 	if (device_get_parent(child) != dev)
4100 		return;
4101 
4102 	dinfo = device_get_ivars(child);
4103 	rl = &dinfo->resources;
4104 	rle = resource_list_find(rl, type, rid);
4105 	if (rle) {
4106 		if (rle->res) {
4107 			if (rman_get_device(rle->res) != dev ||
4108 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4109 				device_printf(dev, "delete_resource: "
4110 				    "Resource still owned by child, oops. "
4111 				    "(type=%d, rid=%d, addr=%lx)\n",
4112 				    rle->type, rle->rid,
4113 				    rman_get_start(rle->res));
4114 				return;
4115 			}
4116 			bus_release_resource(dev, type, rid, rle->res);
4117 		}
4118 		resource_list_delete(rl, type, rid);
4119 	}
4120 	/*
4121 	 * Why do we turn off the PCI configuration BAR when we delete a
4122 	 * resource? -- imp
4123 	 */
4124 	pci_write_config(child, rid, 0, 4);
4125 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4126 }
4127 
4128 struct resource_list *
4129 pci_get_resource_list (device_t dev, device_t child)
4130 {
4131 	struct pci_devinfo *dinfo = device_get_ivars(child);
4132 
4133 	if (dinfo == NULL)
4134 		return (NULL);
4135 
4136 	return (&dinfo->resources);
4137 }
4138 
4139 uint32_t
4140 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4141 {
4142 	struct pci_devinfo *dinfo = device_get_ivars(child);
4143 	pcicfgregs *cfg = &dinfo->cfg;
4144 
4145 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4146 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4147 }
4148 
4149 void
4150 pci_write_config_method(device_t dev, device_t child, int reg,
4151     uint32_t val, int width)
4152 {
4153 	struct pci_devinfo *dinfo = device_get_ivars(child);
4154 	pcicfgregs *cfg = &dinfo->cfg;
4155 
4156 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4157 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4158 }
4159 
4160 int
4161 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4162     size_t buflen)
4163 {
4164 
4165 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4166 	    pci_get_function(child));
4167 	return (0);
4168 }
4169 
4170 int
4171 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4172     size_t buflen)
4173 {
4174 	struct pci_devinfo *dinfo;
4175 	pcicfgregs *cfg;
4176 
4177 	dinfo = device_get_ivars(child);
4178 	cfg = &dinfo->cfg;
4179 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4180 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4181 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4182 	    cfg->progif);
4183 	return (0);
4184 }
4185 
4186 int
4187 pci_assign_interrupt_method(device_t dev, device_t child)
4188 {
4189 	struct pci_devinfo *dinfo = device_get_ivars(child);
4190 	pcicfgregs *cfg = &dinfo->cfg;
4191 
4192 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4193 	    cfg->intpin));
4194 }
4195 
4196 static int
4197 pci_modevent(module_t mod, int what, void *arg)
4198 {
4199 	static struct cdev *pci_cdev;
4200 
4201 	switch (what) {
4202 	case MOD_LOAD:
4203 		STAILQ_INIT(&pci_devq);
4204 		pci_generation = 0;
4205 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4206 				    "pci");
4207 		pci_load_vendor_data();
4208 		break;
4209 
4210 	case MOD_UNLOAD:
4211 		destroy_dev(pci_cdev);
4212 		break;
4213 	}
4214 
4215 	return (0);
4216 }
4217 
4218 void
4219 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4220 {
4221 	int i;
4222 
4223 	/*
4224 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4225 	 * which we know need special treatment.  Type 2 devices are
4226 	 * cardbus bridges which also require special treatment.
4227 	 * Other types are unknown, and we err on the side of safety
4228 	 * by ignoring them.
4229 	 */
4230 	if (dinfo->cfg.hdrtype != 0)
4231 		return;
4232 
4233 	/*
4234 	 * Restore the device to full power mode.  We must do this
4235 	 * before we restore the registers because moving from D3 to
4236 	 * D0 will cause the chip's BARs and some other registers to
4237 	 * be reset to some unknown power on reset values.  Cut down
4238 	 * the noise on boot by doing nothing if we are already in
4239 	 * state D0.
4240 	 */
4241 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4242 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4243 	}
4244 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4245 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4246 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4247 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4248 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4249 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4250 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4251 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4252 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4253 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4254 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4255 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4256 
4257 	/* Restore MSI and MSI-X configurations if they are present. */
4258 	if (dinfo->cfg.msi.msi_location != 0)
4259 		pci_resume_msi(dev);
4260 	if (dinfo->cfg.msix.msix_location != 0)
4261 		pci_resume_msix(dev);
4262 }
4263 
4264 void
4265 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4266 {
4267 	int i;
4268 	uint32_t cls;
4269 	int ps;
4270 
4271 	/*
4272 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4273 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4274 	 * which also require special treatment.  Other types are unknown, and
4275 	 * we err on the side of safety by ignoring them.  Powering down
4276 	 * bridges should not be undertaken lightly.
4277 	 */
4278 	if (dinfo->cfg.hdrtype != 0)
4279 		return;
4280 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4281 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4282 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4283 
4284 	/*
4285 	 * Some drivers apparently write to these registers w/o updating our
4286 	 * cached copy.  No harm happens if we update the copy, so do so here
4287 	 * so we can restore them.  The COMMAND register is modified by the
4288 	 * bus w/o updating the cache.  This should represent the normally
4289 	 * writable portion of the 'defined' part of type 0 headers.  In
4290 	 * theory we also need to save/restore the PCI capability structures
4291 	 * we know about, but apart from power we don't know any that are
4292 	 * writable.
4293 	 */
4294 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4295 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4296 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4297 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4298 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4299 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4300 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4301 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4302 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4303 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4304 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4305 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4306 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4307 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4308 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4309 
4310 	/*
4311 	 * don't set the state for display devices, base peripherals and
4312 	 * memory devices since bad things happen when they are powered down.
4313 	 * We should (a) have drivers that can easily detach and (b) use
4314 	 * generic drivers for these devices so that some device actually
4315 	 * attaches.  We need to make sure that when we implement (a) we don't
4316 	 * power the device down on a reattach.
4317 	 */
4318 	cls = pci_get_class(dev);
4319 	if (!setstate)
4320 		return;
4321 	switch (pci_do_power_nodriver)
4322 	{
4323 		case 0:		/* NO powerdown at all */
4324 			return;
4325 		case 1:		/* Conservative about what to power down */
4326 			if (cls == PCIC_STORAGE)
4327 				return;
4328 			/*FALLTHROUGH*/
4329 		case 2:		/* Agressive about what to power down */
4330 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4331 			    cls == PCIC_BASEPERIPH)
4332 				return;
4333 			/*FALLTHROUGH*/
4334 		case 3:		/* Power down everything */
4335 			break;
4336 	}
4337 	/*
4338 	 * PCI spec says we can only go into D3 state from D0 state.
4339 	 * Transition from D[12] into D0 before going to D3 state.
4340 	 */
4341 	ps = pci_get_powerstate(dev);
4342 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4343 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4344 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4345 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4346 }
4347 
4348 int
4349 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4350 {
4351 	int rid, type;
4352 	u_int flags;
4353 
4354 	rid = 0;
4355 	type = PCI_INTR_TYPE_LEGACY;
4356 	flags = RF_SHAREABLE | RF_ACTIVE;
4357 
4358 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4359 	if (msi_enable) {
4360 		int cpu;
4361 
4362 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4363 		if (cpu >= ncpus)
4364 			cpu = ncpus - 1;
4365 
4366 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4367 			flags &= ~RF_SHAREABLE;
4368 			type = PCI_INTR_TYPE_MSI;
4369 		}
4370 	}
4371 
4372 	*rid0 = rid;
4373 	*flags0 = flags;
4374 
4375 	return type;
4376 }
4377 
4378 /* Wrapper APIs suitable for device driver use. */
4379 void
4380 pci_save_state(device_t dev)
4381 {
4382 	struct pci_devinfo *dinfo;
4383 
4384 	dinfo = device_get_ivars(dev);
4385 	pci_cfg_save(dev, dinfo, 0);
4386 }
4387 
4388 void
4389 pci_restore_state(device_t dev)
4390 {
4391 	struct pci_devinfo *dinfo;
4392 
4393 	dinfo = device_get_ivars(dev);
4394 	pci_cfg_restore(dev, dinfo);
4395 }
4396