xref: /dragonfly/sys/bus/pci/pci.c (revision 20c2db9a)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 #include "opt_compat_oldpci.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/linker.h>
39 #include <sys/fcntl.h>
40 #include <sys/conf.h>
41 #include <sys/kernel.h>
42 #include <sys/queue.h>
43 #include <sys/sysctl.h>
44 #include <sys/endian.h>
45 #include <sys/machintr.h>
46 
47 #include <machine/msi_machdep.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_extern.h>
52 
53 #include <sys/bus.h>
54 #include <sys/rman.h>
55 #include <sys/device.h>
56 
57 #include <sys/pciio.h>
58 #include <bus/pci/pcireg.h>
59 #include <bus/pci/pcivar.h>
60 #include <bus/pci/pci_private.h>
61 
62 #include "pcib_if.h"
63 #include "pci_if.h"
64 
65 #ifdef __HAVE_ACPI
66 #include <contrib/dev/acpica/acpi.h>
67 #include "acpi_if.h"
68 #else
69 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
70 #endif
71 
72 extern struct dev_ops pcic_ops;	/* XXX */
73 
74 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
75 
76 static uint32_t		pci_mapbase(unsigned mapreg);
77 static const char	*pci_maptype(unsigned mapreg);
78 static int		pci_mapsize(unsigned testval);
79 static int		pci_maprange(unsigned mapreg);
80 static void		pci_fixancient(pcicfgregs *cfg);
81 
82 static int		pci_porten(device_t pcib, int b, int s, int f);
83 static int		pci_memen(device_t pcib, int b, int s, int f);
84 static void		pci_assign_interrupt(device_t bus, device_t dev,
85 			    int force_route);
86 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
87 			    int b, int s, int f, int reg,
88 			    struct resource_list *rl, int force, int prefetch);
89 static int		pci_probe(device_t dev);
90 static int		pci_attach(device_t dev);
91 static void		pci_child_detached(device_t, device_t);
92 static void		pci_load_vendor_data(void);
93 static int		pci_describe_parse_line(char **ptr, int *vendor,
94 			    int *device, char **desc);
95 static char		*pci_describe_device(device_t dev);
96 static int		pci_modevent(module_t mod, int what, void *arg);
97 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
98 			    pcicfgregs *cfg);
99 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
100 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
101 			    int reg, uint32_t *data);
102 #if 0
103 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
104 			    int reg, uint32_t data);
105 #endif
106 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
107 static void		pci_disable_msi(device_t dev);
108 static void		pci_enable_msi(device_t dev, uint64_t address,
109 			    uint16_t data);
110 static void		pci_setup_msix_vector(device_t dev, u_int index,
111 			    uint64_t address, uint32_t data);
112 static void		pci_mask_msix_vector(device_t dev, u_int index);
113 static void		pci_unmask_msix_vector(device_t dev, u_int index);
114 static void		pci_mask_msix_allvectors(device_t dev);
115 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
116 static int		pci_msi_blacklisted(void);
117 static void		pci_resume_msi(device_t dev);
118 static void		pci_resume_msix(device_t dev);
119 static int		pcie_slotimpl(const pcicfgregs *);
120 static void		pci_print_verbose_expr(const pcicfgregs *);
121 
122 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_subvendor(device_t, int, int,
128 			    pcicfgregs *);
129 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
130 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
131 
132 static device_method_t pci_methods[] = {
133 	/* Device interface */
134 	DEVMETHOD(device_probe,		pci_probe),
135 	DEVMETHOD(device_attach,	pci_attach),
136 	DEVMETHOD(device_detach,	bus_generic_detach),
137 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
138 	DEVMETHOD(device_suspend,	pci_suspend),
139 	DEVMETHOD(device_resume,	pci_resume),
140 
141 	/* Bus interface */
142 	DEVMETHOD(bus_print_child,	pci_print_child),
143 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
144 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
145 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
146 	DEVMETHOD(bus_driver_added,	pci_driver_added),
147 	DEVMETHOD(bus_child_detached,	pci_child_detached),
148 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
149 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
150 
151 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
152 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
153 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
154 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
155 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
156 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
157 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
158 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
159 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
160 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
161 
162 	/* PCI interface */
163 	DEVMETHOD(pci_read_config,	pci_read_config_method),
164 	DEVMETHOD(pci_write_config,	pci_write_config_method),
165 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
166 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
167 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
168 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
169 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
170 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
171 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
172 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
173 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
174 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
175 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
176 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
177 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
178 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
179 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
180 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
181 
182 	DEVMETHOD_END
183 };
184 
185 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
186 
187 static devclass_t pci_devclass;
188 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
189 MODULE_VERSION(pci, 1);
190 
191 static char	*pci_vendordata;
192 static size_t	pci_vendordata_size;
193 
194 
195 static const struct pci_read_cap {
196 	int		cap;
197 	pci_read_cap_t	read_cap;
198 } pci_read_caps[] = {
199 	{ PCIY_PMG,		pci_read_cap_pmgt },
200 	{ PCIY_HT,		pci_read_cap_ht },
201 	{ PCIY_MSI,		pci_read_cap_msi },
202 	{ PCIY_MSIX,		pci_read_cap_msix },
203 	{ PCIY_VPD,		pci_read_cap_vpd },
204 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
205 	{ PCIY_PCIX,		pci_read_cap_pcix },
206 	{ PCIY_EXPRESS,		pci_read_cap_express },
207 	{ 0, NULL } /* required last entry */
208 };
209 
210 struct pci_quirk {
211 	uint32_t devid;	/* Vendor/device of the card */
212 	int	type;
213 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
214 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
215 	int	arg1;
216 	int	arg2;
217 };
218 
219 struct pci_quirk pci_quirks[] = {
220 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
221 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
223 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
224 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 
226 	/*
227 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
228 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
229 	 */
230 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
232 
233 	/*
234 	 * MSI doesn't work on earlier Intel chipsets including
235 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
236 	 */
237 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 
245 	/*
246 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
247 	 * bridge.
248 	 */
249 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
250 
251 	{ 0 }
252 };
253 
254 /* map register information */
255 #define	PCI_MAPMEM	0x01	/* memory map */
256 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
257 #define	PCI_MAPPORT	0x04	/* port map */
258 
259 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
260 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
261 
262 struct devlist pci_devq;
263 uint32_t pci_generation;
264 uint32_t pci_numdevs = 0;
265 static int pcie_chipset, pcix_chipset;
266 
267 /* sysctl vars */
268 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
269 
270 static int pci_enable_io_modes = 1;
271 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
272 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
273     &pci_enable_io_modes, 1,
274     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
275 enable these bits correctly.  We'd like to do this all the time, but there\n\
276 are some peripherals that this causes problems with.");
277 
278 static int pci_do_power_nodriver = 0;
279 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
280 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
281     &pci_do_power_nodriver, 0,
282   "Place a function into D3 state when no driver attaches to it.  0 means\n\
283 disable.  1 means conservatively place devices into D3 state.  2 means\n\
284 aggressively place devices into D3 state.  3 means put absolutely everything\n\
285 in D3 state.");
286 
287 static int pci_do_power_resume = 1;
288 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
289 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
290     &pci_do_power_resume, 1,
291   "Transition from D3 -> D0 on resume.");
292 
293 static int pci_do_msi = 1;
294 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
295 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
296     "Enable support for MSI interrupts");
297 
298 static int pci_do_msix = 1;
299 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
300 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
301     "Enable support for MSI-X interrupts");
302 
303 static int pci_honor_msi_blacklist = 1;
304 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
305 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
306     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
307 
308 static int pci_msi_cpuid;
309 
310 /* Find a device_t by bus/slot/function in domain 0 */
311 
312 device_t
313 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
314 {
315 
316 	return (pci_find_dbsf(0, bus, slot, func));
317 }
318 
319 /* Find a device_t by domain/bus/slot/function */
320 
321 device_t
322 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
323 {
324 	struct pci_devinfo *dinfo;
325 
326 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
327 		if ((dinfo->cfg.domain == domain) &&
328 		    (dinfo->cfg.bus == bus) &&
329 		    (dinfo->cfg.slot == slot) &&
330 		    (dinfo->cfg.func == func)) {
331 			return (dinfo->cfg.dev);
332 		}
333 	}
334 
335 	return (NULL);
336 }
337 
338 /* Find a device_t by vendor/device ID */
339 
340 device_t
341 pci_find_device(uint16_t vendor, uint16_t device)
342 {
343 	struct pci_devinfo *dinfo;
344 
345 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
346 		if ((dinfo->cfg.vendor == vendor) &&
347 		    (dinfo->cfg.device == device)) {
348 			return (dinfo->cfg.dev);
349 		}
350 	}
351 
352 	return (NULL);
353 }
354 
355 /* return base address of memory or port map */
356 
357 static uint32_t
358 pci_mapbase(uint32_t mapreg)
359 {
360 
361 	if (PCI_BAR_MEM(mapreg))
362 		return (mapreg & PCIM_BAR_MEM_BASE);
363 	else
364 		return (mapreg & PCIM_BAR_IO_BASE);
365 }
366 
367 /* return map type of memory or port map */
368 
369 static const char *
370 pci_maptype(unsigned mapreg)
371 {
372 
373 	if (PCI_BAR_IO(mapreg))
374 		return ("I/O Port");
375 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
376 		return ("Prefetchable Memory");
377 	return ("Memory");
378 }
379 
380 /* return log2 of map size decoded for memory or port map */
381 
382 static int
383 pci_mapsize(uint32_t testval)
384 {
385 	int ln2size;
386 
387 	testval = pci_mapbase(testval);
388 	ln2size = 0;
389 	if (testval != 0) {
390 		while ((testval & 1) == 0)
391 		{
392 			ln2size++;
393 			testval >>= 1;
394 		}
395 	}
396 	return (ln2size);
397 }
398 
399 /* return log2 of address range supported by map register */
400 
401 static int
402 pci_maprange(unsigned mapreg)
403 {
404 	int ln2range = 0;
405 
406 	if (PCI_BAR_IO(mapreg))
407 		ln2range = 32;
408 	else
409 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
410 		case PCIM_BAR_MEM_32:
411 			ln2range = 32;
412 			break;
413 		case PCIM_BAR_MEM_1MB:
414 			ln2range = 20;
415 			break;
416 		case PCIM_BAR_MEM_64:
417 			ln2range = 64;
418 			break;
419 		}
420 	return (ln2range);
421 }
422 
423 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
424 
425 static void
426 pci_fixancient(pcicfgregs *cfg)
427 {
428 	if (cfg->hdrtype != 0)
429 		return;
430 
431 	/* PCI to PCI bridges use header type 1 */
432 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
433 		cfg->hdrtype = 1;
434 }
435 
436 /* extract header type specific config data */
437 
438 static void
439 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
440 {
441 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
442 	switch (cfg->hdrtype) {
443 	case 0:
444 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
445 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
446 		cfg->nummaps	    = PCI_MAXMAPS_0;
447 		break;
448 	case 1:
449 		cfg->nummaps	    = PCI_MAXMAPS_1;
450 #ifdef COMPAT_OLDPCI
451 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
452 #endif
453 		break;
454 	case 2:
455 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
456 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
457 		cfg->nummaps	    = PCI_MAXMAPS_2;
458 #ifdef COMPAT_OLDPCI
459 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
460 #endif
461 		break;
462 	}
463 #undef REG
464 }
465 
466 /* read configuration header into pcicfgregs structure */
467 struct pci_devinfo *
468 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
469 {
470 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
471 	pcicfgregs *cfg = NULL;
472 	struct pci_devinfo *devlist_entry;
473 	struct devlist *devlist_head;
474 
475 	devlist_head = &pci_devq;
476 
477 	devlist_entry = NULL;
478 
479 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
480 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
481 
482 		cfg = &devlist_entry->cfg;
483 
484 		cfg->domain		= d;
485 		cfg->bus		= b;
486 		cfg->slot		= s;
487 		cfg->func		= f;
488 		cfg->vendor		= REG(PCIR_VENDOR, 2);
489 		cfg->device		= REG(PCIR_DEVICE, 2);
490 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
491 		cfg->statreg		= REG(PCIR_STATUS, 2);
492 		cfg->baseclass		= REG(PCIR_CLASS, 1);
493 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
494 		cfg->progif		= REG(PCIR_PROGIF, 1);
495 		cfg->revid		= REG(PCIR_REVID, 1);
496 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
497 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
498 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
499 		cfg->intpin		= REG(PCIR_INTPIN, 1);
500 		cfg->intline		= REG(PCIR_INTLINE, 1);
501 
502 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
503 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
504 
505 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
506 		cfg->hdrtype		&= ~PCIM_MFDEV;
507 
508 		pci_fixancient(cfg);
509 		pci_hdrtypedata(pcib, b, s, f, cfg);
510 
511 		pci_read_capabilities(pcib, cfg);
512 
513 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
514 
515 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
516 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
517 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
518 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
519 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
520 
521 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
522 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
523 		devlist_entry->conf.pc_vendor = cfg->vendor;
524 		devlist_entry->conf.pc_device = cfg->device;
525 
526 		devlist_entry->conf.pc_class = cfg->baseclass;
527 		devlist_entry->conf.pc_subclass = cfg->subclass;
528 		devlist_entry->conf.pc_progif = cfg->progif;
529 		devlist_entry->conf.pc_revid = cfg->revid;
530 
531 		pci_numdevs++;
532 		pci_generation++;
533 	}
534 	return (devlist_entry);
535 #undef REG
536 }
537 
538 static int
539 pci_fixup_nextptr(int *nextptr0)
540 {
541 	int nextptr = *nextptr0;
542 
543 	/* "Next pointer" is only one byte */
544 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
545 
546 	if (nextptr & 0x3) {
547 		/*
548 		 * PCI local bus spec 3.0:
549 		 *
550 		 * "... The bottom two bits of all pointers are reserved
551 		 *  and must be implemented as 00b although software must
552 		 *  mask them to allow for future uses of these bits ..."
553 		 */
554 		if (bootverbose) {
555 			kprintf("Illegal PCI extended capability "
556 				"offset, fixup 0x%02x -> 0x%02x\n",
557 				nextptr, nextptr & ~0x3);
558 		}
559 		nextptr &= ~0x3;
560 	}
561 	*nextptr0 = nextptr;
562 
563 	if (nextptr < 0x40) {
564 		if (nextptr != 0) {
565 			kprintf("Illegal PCI extended capability "
566 				"offset 0x%02x", nextptr);
567 		}
568 		return 0;
569 	}
570 	return 1;
571 }
572 
573 static void
574 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
575 {
576 #define REG(n, w)	\
577 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
578 
579 	struct pcicfg_pp *pp = &cfg->pp;
580 
581 	if (pp->pp_cap)
582 		return;
583 
584 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
585 	pp->pp_status = ptr + PCIR_POWER_STATUS;
586 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
587 
588 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
589 		/*
590 		 * XXX
591 		 * We should write to data_select and read back from
592 		 * data_scale to determine whether data register is
593 		 * implemented.
594 		 */
595 #ifdef foo
596 		pp->pp_data = ptr + PCIR_POWER_DATA;
597 #else
598 		pp->pp_data = 0;
599 #endif
600 	}
601 
602 #undef REG
603 }
604 
605 static void
606 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
607 {
608 #if defined(__i386__) || defined(__x86_64__)
609 
610 #define REG(n, w)	\
611 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
612 
613 	struct pcicfg_ht *ht = &cfg->ht;
614 	uint64_t addr;
615 	uint32_t val;
616 
617 	/* Determine HT-specific capability type. */
618 	val = REG(ptr + PCIR_HT_COMMAND, 2);
619 
620 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
621 		cfg->ht.ht_slave = ptr;
622 
623 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
624 		return;
625 
626 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
627 		/* Sanity check the mapping window. */
628 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
629 		addr <<= 32;
630 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
631 		if (addr != MSI_X86_ADDR_BASE) {
632 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
633 				"has non-default MSI window 0x%llx\n",
634 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
635 				(long long)addr);
636 		}
637 	} else {
638 		addr = MSI_X86_ADDR_BASE;
639 	}
640 
641 	ht->ht_msimap = ptr;
642 	ht->ht_msictrl = val;
643 	ht->ht_msiaddr = addr;
644 
645 #undef REG
646 
647 #endif	/* __i386__ || __x86_64__ */
648 }
649 
650 static void
651 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
652 {
653 #define REG(n, w)	\
654 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
655 
656 	struct pcicfg_msi *msi = &cfg->msi;
657 
658 	msi->msi_location = ptr;
659 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
660 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
661 
662 #undef REG
663 }
664 
665 static void
666 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
667 {
668 #define REG(n, w)	\
669 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
670 
671 	struct pcicfg_msix *msix = &cfg->msix;
672 	uint32_t val;
673 
674 	msix->msix_location = ptr;
675 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
676 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
677 
678 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
679 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
680 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
681 
682 	val = REG(ptr + PCIR_MSIX_PBA, 4);
683 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
684 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
685 
686 	TAILQ_INIT(&msix->msix_vectors);
687 
688 #undef REG
689 }
690 
691 static void
692 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
693 {
694 	cfg->vpd.vpd_reg = ptr;
695 }
696 
697 static void
698 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
699 {
700 #define REG(n, w)	\
701 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
702 
703 	/* Should always be true. */
704 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
705 		uint32_t val;
706 
707 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
708 		cfg->subvendor = val & 0xffff;
709 		cfg->subdevice = val >> 16;
710 	}
711 
712 #undef REG
713 }
714 
715 static void
716 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
717 {
718 	/*
719 	 * Assume we have a PCI-X chipset if we have
720 	 * at least one PCI-PCI bridge with a PCI-X
721 	 * capability.  Note that some systems with
722 	 * PCI-express or HT chipsets might match on
723 	 * this check as well.
724 	 */
725 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
726 		pcix_chipset = 1;
727 
728 	cfg->pcix.pcix_ptr = ptr;
729 }
730 
731 static int
732 pcie_slotimpl(const pcicfgregs *cfg)
733 {
734 	const struct pcicfg_expr *expr = &cfg->expr;
735 	uint16_t port_type;
736 
737 	/*
738 	 * - Slot implemented bit is meaningful iff current port is
739 	 *   root port or down stream port.
740 	 * - Testing for root port or down stream port is meanningful
741 	 *   iff PCI configure has type 1 header.
742 	 */
743 
744 	if (cfg->hdrtype != 1)
745 		return 0;
746 
747 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
748 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
749 		return 0;
750 
751 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
752 		return 0;
753 
754 	return 1;
755 }
756 
757 static void
758 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 #define REG(n, w)	\
761 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
762 
763 	struct pcicfg_expr *expr = &cfg->expr;
764 
765 	/*
766 	 * Assume we have a PCI-express chipset if we have
767 	 * at least one PCI-express device.
768 	 */
769 	pcie_chipset = 1;
770 
771 	expr->expr_ptr = ptr;
772 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
773 
774 	/*
775 	 * Read slot capabilities.  Slot capabilities exists iff
776 	 * current port's slot is implemented
777 	 */
778 	if (pcie_slotimpl(cfg))
779 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
780 
781 #undef REG
782 }
783 
784 static void
785 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
786 {
787 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
788 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
789 
790 	uint32_t val;
791 	int nextptr, ptrptr;
792 
793 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
794 		/* No capabilities */
795 		return;
796 	}
797 
798 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
799 	case 0:
800 	case 1:
801 		ptrptr = PCIR_CAP_PTR;
802 		break;
803 	case 2:
804 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
805 		break;
806 	default:
807 		return;				/* no capabilities support */
808 	}
809 	nextptr = REG(ptrptr, 1);	/* sanity check? */
810 
811 	/*
812 	 * Read capability entries.
813 	 */
814 	while (pci_fixup_nextptr(&nextptr)) {
815 		const struct pci_read_cap *rc;
816 		int ptr = nextptr;
817 
818 		/* Find the next entry */
819 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
820 
821 		/* Process this entry */
822 		val = REG(ptr + PCICAP_ID, 1);
823 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
824 			if (rc->cap == val) {
825 				rc->read_cap(pcib, ptr, nextptr, cfg);
826 				break;
827 			}
828 		}
829 	}
830 
831 #if defined(__i386__) || defined(__x86_64__)
832 	/*
833 	 * Enable the MSI mapping window for all HyperTransport
834 	 * slaves.  PCI-PCI bridges have their windows enabled via
835 	 * PCIB_MAP_MSI().
836 	 */
837 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
838 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
839 		device_printf(pcib,
840 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
841 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
842 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
843 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
844 		     2);
845 	}
846 #endif
847 
848 /* REG and WREG use carry through to next functions */
849 }
850 
851 /*
852  * PCI Vital Product Data
853  */
854 
855 #define	PCI_VPD_TIMEOUT		1000000
856 
857 static int
858 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
859 {
860 	int count = PCI_VPD_TIMEOUT;
861 
862 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
863 
864 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
865 
866 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
867 		if (--count < 0)
868 			return (ENXIO);
869 		DELAY(1);	/* limit looping */
870 	}
871 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
872 
873 	return (0);
874 }
875 
876 #if 0
877 static int
878 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
879 {
880 	int count = PCI_VPD_TIMEOUT;
881 
882 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
883 
884 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
885 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
886 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
887 		if (--count < 0)
888 			return (ENXIO);
889 		DELAY(1);	/* limit looping */
890 	}
891 
892 	return (0);
893 }
894 #endif
895 
896 #undef PCI_VPD_TIMEOUT
897 
898 struct vpd_readstate {
899 	device_t	pcib;
900 	pcicfgregs	*cfg;
901 	uint32_t	val;
902 	int		bytesinval;
903 	int		off;
904 	uint8_t		cksum;
905 };
906 
907 static int
908 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
909 {
910 	uint32_t reg;
911 	uint8_t byte;
912 
913 	if (vrs->bytesinval == 0) {
914 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
915 			return (ENXIO);
916 		vrs->val = le32toh(reg);
917 		vrs->off += 4;
918 		byte = vrs->val & 0xff;
919 		vrs->bytesinval = 3;
920 	} else {
921 		vrs->val = vrs->val >> 8;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval--;
924 	}
925 
926 	vrs->cksum += byte;
927 	*data = byte;
928 	return (0);
929 }
930 
931 int
932 pcie_slot_implemented(device_t dev)
933 {
934 	struct pci_devinfo *dinfo = device_get_ivars(dev);
935 
936 	return pcie_slotimpl(&dinfo->cfg);
937 }
938 
939 void
940 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
941 {
942 	uint8_t expr_ptr;
943 	uint16_t val;
944 
945 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
946 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
947 		panic("%s: invalid max read request size 0x%02x",
948 		      device_get_nameunit(dev), rqsize);
949 	}
950 
951 	expr_ptr = pci_get_pciecap_ptr(dev);
952 	if (!expr_ptr)
953 		panic("%s: not PCIe device", device_get_nameunit(dev));
954 
955 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
956 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
957 		if (bootverbose)
958 			device_printf(dev, "adjust device control 0x%04x", val);
959 
960 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
961 		val |= rqsize;
962 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
963 
964 		if (bootverbose)
965 			kprintf(" -> 0x%04x\n", val);
966 	}
967 }
968 
969 uint16_t
970 pcie_get_max_readrq(device_t dev)
971 {
972 	uint8_t expr_ptr;
973 	uint16_t val;
974 
975 	expr_ptr = pci_get_pciecap_ptr(dev);
976 	if (!expr_ptr)
977 		panic("%s: not PCIe device", device_get_nameunit(dev));
978 
979 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
980 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
981 }
982 
983 static void
984 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
985 {
986 	struct vpd_readstate vrs;
987 	int state;
988 	int name;
989 	int remain;
990 	int i;
991 	int alloc, off;		/* alloc/off for RO/W arrays */
992 	int cksumvalid;
993 	int dflen;
994 	uint8_t byte;
995 	uint8_t byte2;
996 
997 	/* init vpd reader */
998 	vrs.bytesinval = 0;
999 	vrs.off = 0;
1000 	vrs.pcib = pcib;
1001 	vrs.cfg = cfg;
1002 	vrs.cksum = 0;
1003 
1004 	state = 0;
1005 	name = remain = i = 0;	/* shut up stupid gcc */
1006 	alloc = off = 0;	/* shut up stupid gcc */
1007 	dflen = 0;		/* shut up stupid gcc */
1008 	cksumvalid = -1;
1009 	while (state >= 0) {
1010 		if (vpd_nextbyte(&vrs, &byte)) {
1011 			state = -2;
1012 			break;
1013 		}
1014 #if 0
1015 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1016 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1017 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1018 #endif
1019 		switch (state) {
1020 		case 0:		/* item name */
1021 			if (byte & 0x80) {
1022 				if (vpd_nextbyte(&vrs, &byte2)) {
1023 					state = -2;
1024 					break;
1025 				}
1026 				remain = byte2;
1027 				if (vpd_nextbyte(&vrs, &byte2)) {
1028 					state = -2;
1029 					break;
1030 				}
1031 				remain |= byte2 << 8;
1032 				if (remain > (0x7f*4 - vrs.off)) {
1033 					state = -1;
1034 					kprintf(
1035 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1036 					    cfg->domain, cfg->bus, cfg->slot,
1037 					    cfg->func, remain);
1038 				}
1039 				name = byte & 0x7f;
1040 			} else {
1041 				remain = byte & 0x7;
1042 				name = (byte >> 3) & 0xf;
1043 			}
1044 			switch (name) {
1045 			case 0x2:	/* String */
1046 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1047 				    M_DEVBUF, M_WAITOK);
1048 				i = 0;
1049 				state = 1;
1050 				break;
1051 			case 0xf:	/* End */
1052 				state = -1;
1053 				break;
1054 			case 0x10:	/* VPD-R */
1055 				alloc = 8;
1056 				off = 0;
1057 				cfg->vpd.vpd_ros = kmalloc(alloc *
1058 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1059 				    M_WAITOK | M_ZERO);
1060 				state = 2;
1061 				break;
1062 			case 0x11:	/* VPD-W */
1063 				alloc = 8;
1064 				off = 0;
1065 				cfg->vpd.vpd_w = kmalloc(alloc *
1066 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1067 				    M_WAITOK | M_ZERO);
1068 				state = 5;
1069 				break;
1070 			default:	/* Invalid data, abort */
1071 				state = -1;
1072 				break;
1073 			}
1074 			break;
1075 
1076 		case 1:	/* Identifier String */
1077 			cfg->vpd.vpd_ident[i++] = byte;
1078 			remain--;
1079 			if (remain == 0)  {
1080 				cfg->vpd.vpd_ident[i] = '\0';
1081 				state = 0;
1082 			}
1083 			break;
1084 
1085 		case 2:	/* VPD-R Keyword Header */
1086 			if (off == alloc) {
1087 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1088 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1089 				    M_DEVBUF, M_WAITOK | M_ZERO);
1090 			}
1091 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1092 			if (vpd_nextbyte(&vrs, &byte2)) {
1093 				state = -2;
1094 				break;
1095 			}
1096 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1097 			if (vpd_nextbyte(&vrs, &byte2)) {
1098 				state = -2;
1099 				break;
1100 			}
1101 			dflen = byte2;
1102 			if (dflen == 0 &&
1103 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1104 			    2) == 0) {
1105 				/*
1106 				 * if this happens, we can't trust the rest
1107 				 * of the VPD.
1108 				 */
1109 				kprintf(
1110 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1111 				    cfg->domain, cfg->bus, cfg->slot,
1112 				    cfg->func, dflen);
1113 				cksumvalid = 0;
1114 				state = -1;
1115 				break;
1116 			} else if (dflen == 0) {
1117 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1118 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1119 				    M_DEVBUF, M_WAITOK);
1120 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1121 			} else
1122 				cfg->vpd.vpd_ros[off].value = kmalloc(
1123 				    (dflen + 1) *
1124 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1125 				    M_DEVBUF, M_WAITOK);
1126 			remain -= 3;
1127 			i = 0;
1128 			/* keep in sync w/ state 3's transistions */
1129 			if (dflen == 0 && remain == 0)
1130 				state = 0;
1131 			else if (dflen == 0)
1132 				state = 2;
1133 			else
1134 				state = 3;
1135 			break;
1136 
1137 		case 3:	/* VPD-R Keyword Value */
1138 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1139 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1140 			    "RV", 2) == 0 && cksumvalid == -1) {
1141 				if (vrs.cksum == 0)
1142 					cksumvalid = 1;
1143 				else {
1144 					if (bootverbose)
1145 						kprintf(
1146 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1147 						    cfg->domain, cfg->bus,
1148 						    cfg->slot, cfg->func,
1149 						    vrs.cksum);
1150 					cksumvalid = 0;
1151 					state = -1;
1152 					break;
1153 				}
1154 			}
1155 			dflen--;
1156 			remain--;
1157 			/* keep in sync w/ state 2's transistions */
1158 			if (dflen == 0)
1159 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1160 			if (dflen == 0 && remain == 0) {
1161 				cfg->vpd.vpd_rocnt = off;
1162 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1163 				    off * sizeof(*cfg->vpd.vpd_ros),
1164 				    M_DEVBUF, M_WAITOK | M_ZERO);
1165 				state = 0;
1166 			} else if (dflen == 0)
1167 				state = 2;
1168 			break;
1169 
1170 		case 4:
1171 			remain--;
1172 			if (remain == 0)
1173 				state = 0;
1174 			break;
1175 
1176 		case 5:	/* VPD-W Keyword Header */
1177 			if (off == alloc) {
1178 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1179 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1180 				    M_DEVBUF, M_WAITOK | M_ZERO);
1181 			}
1182 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1183 			if (vpd_nextbyte(&vrs, &byte2)) {
1184 				state = -2;
1185 				break;
1186 			}
1187 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1188 			if (vpd_nextbyte(&vrs, &byte2)) {
1189 				state = -2;
1190 				break;
1191 			}
1192 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1193 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1194 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1195 			    sizeof(*cfg->vpd.vpd_w[off].value),
1196 			    M_DEVBUF, M_WAITOK);
1197 			remain -= 3;
1198 			i = 0;
1199 			/* keep in sync w/ state 6's transistions */
1200 			if (dflen == 0 && remain == 0)
1201 				state = 0;
1202 			else if (dflen == 0)
1203 				state = 5;
1204 			else
1205 				state = 6;
1206 			break;
1207 
1208 		case 6:	/* VPD-W Keyword Value */
1209 			cfg->vpd.vpd_w[off].value[i++] = byte;
1210 			dflen--;
1211 			remain--;
1212 			/* keep in sync w/ state 5's transistions */
1213 			if (dflen == 0)
1214 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1215 			if (dflen == 0 && remain == 0) {
1216 				cfg->vpd.vpd_wcnt = off;
1217 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1218 				    off * sizeof(*cfg->vpd.vpd_w),
1219 				    M_DEVBUF, M_WAITOK | M_ZERO);
1220 				state = 0;
1221 			} else if (dflen == 0)
1222 				state = 5;
1223 			break;
1224 
1225 		default:
1226 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1227 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1228 			    state);
1229 			state = -1;
1230 			break;
1231 		}
1232 	}
1233 
1234 	if (cksumvalid == 0 || state < -1) {
1235 		/* read-only data bad, clean up */
1236 		if (cfg->vpd.vpd_ros != NULL) {
1237 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1238 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1239 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1240 			cfg->vpd.vpd_ros = NULL;
1241 		}
1242 	}
1243 	if (state < -1) {
1244 		/* I/O error, clean up */
1245 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1246 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1247 		if (cfg->vpd.vpd_ident != NULL) {
1248 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1249 			cfg->vpd.vpd_ident = NULL;
1250 		}
1251 		if (cfg->vpd.vpd_w != NULL) {
1252 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1253 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1254 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1255 			cfg->vpd.vpd_w = NULL;
1256 		}
1257 	}
1258 	cfg->vpd.vpd_cached = 1;
1259 #undef REG
1260 #undef WREG
1261 }
1262 
1263 int
1264 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1265 {
1266 	struct pci_devinfo *dinfo = device_get_ivars(child);
1267 	pcicfgregs *cfg = &dinfo->cfg;
1268 
1269 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1270 		pci_read_vpd(device_get_parent(dev), cfg);
1271 
1272 	*identptr = cfg->vpd.vpd_ident;
1273 
1274 	if (*identptr == NULL)
1275 		return (ENXIO);
1276 
1277 	return (0);
1278 }
1279 
1280 int
1281 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1282 	const char **vptr)
1283 {
1284 	struct pci_devinfo *dinfo = device_get_ivars(child);
1285 	pcicfgregs *cfg = &dinfo->cfg;
1286 	int i;
1287 
1288 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1289 		pci_read_vpd(device_get_parent(dev), cfg);
1290 
1291 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1292 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1293 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1294 			*vptr = cfg->vpd.vpd_ros[i].value;
1295 		}
1296 
1297 	if (i != cfg->vpd.vpd_rocnt)
1298 		return (0);
1299 
1300 	*vptr = NULL;
1301 	return (ENXIO);
1302 }
1303 
1304 /*
1305  * Return the offset in configuration space of the requested extended
1306  * capability entry or 0 if the specified capability was not found.
1307  */
1308 int
1309 pci_find_extcap_method(device_t dev, device_t child, int capability,
1310     int *capreg)
1311 {
1312 	struct pci_devinfo *dinfo = device_get_ivars(child);
1313 	pcicfgregs *cfg = &dinfo->cfg;
1314 	u_int32_t status;
1315 	u_int8_t ptr;
1316 
1317 	/*
1318 	 * Check the CAP_LIST bit of the PCI status register first.
1319 	 */
1320 	status = pci_read_config(child, PCIR_STATUS, 2);
1321 	if (!(status & PCIM_STATUS_CAPPRESENT))
1322 		return (ENXIO);
1323 
1324 	/*
1325 	 * Determine the start pointer of the capabilities list.
1326 	 */
1327 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1328 	case 0:
1329 	case 1:
1330 		ptr = PCIR_CAP_PTR;
1331 		break;
1332 	case 2:
1333 		ptr = PCIR_CAP_PTR_2;
1334 		break;
1335 	default:
1336 		/* XXX: panic? */
1337 		return (ENXIO);		/* no extended capabilities support */
1338 	}
1339 	ptr = pci_read_config(child, ptr, 1);
1340 
1341 	/*
1342 	 * Traverse the capabilities list.
1343 	 */
1344 	while (ptr != 0) {
1345 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1346 			if (capreg != NULL)
1347 				*capreg = ptr;
1348 			return (0);
1349 		}
1350 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1351 	}
1352 
1353 	return (ENOENT);
1354 }
1355 
1356 /*
1357  * Support for MSI-X message interrupts.
1358  */
1359 static void
1360 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1361     uint32_t data)
1362 {
1363 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1364 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1365 	uint32_t offset;
1366 
1367 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1368 	offset = msix->msix_table_offset + index * 16;
1369 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1370 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1371 	bus_write_4(msix->msix_table_res, offset + 8, data);
1372 
1373 	/* Enable MSI -> HT mapping. */
1374 	pci_ht_map_msi(dev, address);
1375 }
1376 
1377 static void
1378 pci_mask_msix_vector(device_t dev, u_int index)
1379 {
1380 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1381 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1382 	uint32_t offset, val;
1383 
1384 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1385 	offset = msix->msix_table_offset + index * 16 + 12;
1386 	val = bus_read_4(msix->msix_table_res, offset);
1387 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1388 		val |= PCIM_MSIX_VCTRL_MASK;
1389 		bus_write_4(msix->msix_table_res, offset, val);
1390 	}
1391 }
1392 
1393 static void
1394 pci_unmask_msix_vector(device_t dev, u_int index)
1395 {
1396 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1397 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1398 	uint32_t offset, val;
1399 
1400 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1401 	offset = msix->msix_table_offset + index * 16 + 12;
1402 	val = bus_read_4(msix->msix_table_res, offset);
1403 	if (val & PCIM_MSIX_VCTRL_MASK) {
1404 		val &= ~PCIM_MSIX_VCTRL_MASK;
1405 		bus_write_4(msix->msix_table_res, offset, val);
1406 	}
1407 }
1408 
1409 int
1410 pci_pending_msix_vector(device_t dev, u_int index)
1411 {
1412 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1413 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1414 	uint32_t offset, bit;
1415 
1416 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1417 	    ("MSI-X is not setup yet"));
1418 
1419 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1420 	offset = msix->msix_pba_offset + (index / 32) * 4;
1421 	bit = 1 << index % 32;
1422 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1423 }
1424 
1425 /*
1426  * Restore MSI-X registers and table during resume.  If MSI-X is
1427  * enabled then walk the virtual table to restore the actual MSI-X
1428  * table.
1429  */
1430 static void
1431 pci_resume_msix(device_t dev)
1432 {
1433 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1434 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1435 
1436 	if (msix->msix_table_res != NULL) {
1437 		const struct msix_vector *mv;
1438 
1439 		pci_mask_msix_allvectors(dev);
1440 
1441 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1442 			u_int vector;
1443 
1444 			if (mv->mv_address == 0)
1445 				continue;
1446 
1447 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1448 			pci_setup_msix_vector(dev, vector,
1449 			    mv->mv_address, mv->mv_data);
1450 			pci_unmask_msix_vector(dev, vector);
1451 		}
1452 	}
1453 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1454 	    msix->msix_ctrl, 2);
1455 }
1456 
1457 /*
1458  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1459  *
1460  * After this function returns, the MSI-X's rid will be saved in rid0.
1461  */
1462 int
1463 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1464     int *rid0, int cpuid)
1465 {
1466 	struct pci_devinfo *dinfo = device_get_ivars(child);
1467 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1468 	struct msix_vector *mv;
1469 	struct resource_list_entry *rle;
1470 	int error, irq, rid;
1471 
1472 	KASSERT(msix->msix_table_res != NULL &&
1473 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1474 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1475 	KASSERT(vector < msix->msix_msgnum,
1476 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1477 
1478 	if (bootverbose) {
1479 		device_printf(child,
1480 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1481 		    vector, msix->msix_msgnum);
1482 	}
1483 
1484 	/* Set rid according to vector number */
1485 	rid = PCI_MSIX_VEC2RID(vector);
1486 
1487 	/* Vector has already been allocated */
1488 	mv = pci_find_msix_vector(child, rid);
1489 	if (mv != NULL)
1490 		return EBUSY;
1491 
1492 	/* Allocate a message. */
1493 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1494 	if (error)
1495 		return error;
1496 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1497 	    irq, irq, 1, cpuid);
1498 
1499 	if (bootverbose) {
1500 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1501 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1502 		    rle->start, cpuid);
1503 	}
1504 
1505 	/* Update counts of alloc'd messages. */
1506 	msix->msix_alloc++;
1507 
1508 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1509 	mv->mv_rid = rid;
1510 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1511 
1512 	*rid0 = rid;
1513 	return 0;
1514 }
1515 
1516 int
1517 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1518 {
1519 	struct pci_devinfo *dinfo = device_get_ivars(child);
1520 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1521 	struct resource_list_entry *rle;
1522 	struct msix_vector *mv;
1523 	int irq, cpuid;
1524 
1525 	KASSERT(msix->msix_table_res != NULL &&
1526 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1527 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1528 	KASSERT(rid > 0, ("invalid rid %d", rid));
1529 
1530 	mv = pci_find_msix_vector(child, rid);
1531 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1532 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1533 
1534 	/* Make sure resource is no longer allocated. */
1535 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1536 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1537 	KASSERT(rle->res == NULL,
1538 	    ("MSI-X resource is still allocated, rid %d", rid));
1539 
1540 	irq = rle->start;
1541 	cpuid = rle->cpuid;
1542 
1543 	/* Free the resource list entries. */
1544 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1545 
1546 	/* Release the IRQ. */
1547 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1548 
1549 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1550 	kfree(mv, M_DEVBUF);
1551 
1552 	msix->msix_alloc--;
1553 	return (0);
1554 }
1555 
1556 /*
1557  * Return the max supported MSI-X messages this device supports.
1558  * Basically, assuming the MD code can alloc messages, this function
1559  * should return the maximum value that pci_alloc_msix() can return.
1560  * Thus, it is subject to the tunables, etc.
1561  */
1562 int
1563 pci_msix_count_method(device_t dev, device_t child)
1564 {
1565 	struct pci_devinfo *dinfo = device_get_ivars(child);
1566 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1567 
1568 	if (pci_do_msix && msix->msix_location != 0)
1569 		return (msix->msix_msgnum);
1570 	return (0);
1571 }
1572 
1573 int
1574 pci_setup_msix(device_t dev)
1575 {
1576 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1577 	pcicfgregs *cfg = &dinfo->cfg;
1578 	struct resource_list_entry *rle;
1579 	struct resource *table_res, *pba_res;
1580 
1581 	KASSERT(cfg->msix.msix_table_res == NULL &&
1582 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1583 
1584 	/* If rid 0 is allocated, then fail. */
1585 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1586 	if (rle != NULL && rle->res != NULL)
1587 		return (ENXIO);
1588 
1589 	/* Already have allocated MSIs? */
1590 	if (cfg->msi.msi_alloc != 0)
1591 		return (ENXIO);
1592 
1593 	/* If MSI is blacklisted for this system, fail. */
1594 	if (pci_msi_blacklisted())
1595 		return (ENXIO);
1596 
1597 	/* MSI-X capability present? */
1598 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1599 	    !pci_do_msix)
1600 		return (ENODEV);
1601 
1602 	KASSERT(cfg->msix.msix_alloc == 0 &&
1603 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1604 	    ("MSI-X vector has been allocated"));
1605 
1606 	/* Make sure the appropriate BARs are mapped. */
1607 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1608 	    cfg->msix.msix_table_bar);
1609 	if (rle == NULL || rle->res == NULL ||
1610 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1611 		return (ENXIO);
1612 	table_res = rle->res;
1613 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1614 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1615 		    cfg->msix.msix_pba_bar);
1616 		if (rle == NULL || rle->res == NULL ||
1617 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1618 			return (ENXIO);
1619 	}
1620 	pba_res = rle->res;
1621 
1622 	cfg->msix.msix_table_res = table_res;
1623 	cfg->msix.msix_pba_res = pba_res;
1624 
1625 	pci_mask_msix_allvectors(dev);
1626 
1627 	return 0;
1628 }
1629 
1630 void
1631 pci_teardown_msix(device_t dev)
1632 {
1633 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1634 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1635 
1636 	KASSERT(msix->msix_table_res != NULL &&
1637 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1638 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1639 	    ("MSI-X vector is still allocated"));
1640 
1641 	pci_mask_msix_allvectors(dev);
1642 
1643 	msix->msix_table_res = NULL;
1644 	msix->msix_pba_res = NULL;
1645 }
1646 
1647 void
1648 pci_enable_msix(device_t dev)
1649 {
1650 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1651 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1652 
1653 	KASSERT(msix->msix_table_res != NULL &&
1654 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1655 
1656 	/* Update control register to enable MSI-X. */
1657 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1658 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1659 	    msix->msix_ctrl, 2);
1660 }
1661 
1662 void
1663 pci_disable_msix(device_t dev)
1664 {
1665 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1666 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1667 
1668 	KASSERT(msix->msix_table_res != NULL &&
1669 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1670 
1671 	/* Disable MSI -> HT mapping. */
1672 	pci_ht_map_msi(dev, 0);
1673 
1674 	/* Update control register to disable MSI-X. */
1675 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1676 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1677 	    msix->msix_ctrl, 2);
1678 }
1679 
1680 static void
1681 pci_mask_msix_allvectors(device_t dev)
1682 {
1683 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1684 	u_int i;
1685 
1686 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1687 		pci_mask_msix_vector(dev, i);
1688 }
1689 
1690 static struct msix_vector *
1691 pci_find_msix_vector(device_t dev, int rid)
1692 {
1693 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1694 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1695 	struct msix_vector *mv;
1696 
1697 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1698 		if (mv->mv_rid == rid)
1699 			return mv;
1700 	}
1701 	return NULL;
1702 }
1703 
1704 /*
1705  * HyperTransport MSI mapping control
1706  */
1707 void
1708 pci_ht_map_msi(device_t dev, uint64_t addr)
1709 {
1710 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1711 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1712 
1713 	if (!ht->ht_msimap)
1714 		return;
1715 
1716 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1717 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1718 		/* Enable MSI -> HT mapping. */
1719 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1720 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1721 		    ht->ht_msictrl, 2);
1722 	}
1723 
1724 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1725 		/* Disable MSI -> HT mapping. */
1726 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1727 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1728 		    ht->ht_msictrl, 2);
1729 	}
1730 }
1731 
1732 /*
1733  * Support for MSI message signalled interrupts.
1734  */
1735 void
1736 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1737 {
1738 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1739 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1740 
1741 	/* Write data and address values. */
1742 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1743 	    address & 0xffffffff, 4);
1744 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1745 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1746 		    address >> 32, 4);
1747 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1748 		    data, 2);
1749 	} else
1750 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1751 		    2);
1752 
1753 	/* Enable MSI in the control register. */
1754 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1755 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1756 	    2);
1757 
1758 	/* Enable MSI -> HT mapping. */
1759 	pci_ht_map_msi(dev, address);
1760 }
1761 
1762 void
1763 pci_disable_msi(device_t dev)
1764 {
1765 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1766 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1767 
1768 	/* Disable MSI -> HT mapping. */
1769 	pci_ht_map_msi(dev, 0);
1770 
1771 	/* Disable MSI in the control register. */
1772 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1773 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1774 	    2);
1775 }
1776 
1777 /*
1778  * Restore MSI registers during resume.  If MSI is enabled then
1779  * restore the data and address registers in addition to the control
1780  * register.
1781  */
1782 static void
1783 pci_resume_msi(device_t dev)
1784 {
1785 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1786 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1787 	uint64_t address;
1788 	uint16_t data;
1789 
1790 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1791 		address = msi->msi_addr;
1792 		data = msi->msi_data;
1793 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1794 		    address & 0xffffffff, 4);
1795 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1796 			pci_write_config(dev, msi->msi_location +
1797 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1798 			pci_write_config(dev, msi->msi_location +
1799 			    PCIR_MSI_DATA_64BIT, data, 2);
1800 		} else
1801 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1802 			    data, 2);
1803 	}
1804 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1805 	    2);
1806 }
1807 
1808 /*
1809  * Returns true if the specified device is blacklisted because MSI
1810  * doesn't work.
1811  */
1812 int
1813 pci_msi_device_blacklisted(device_t dev)
1814 {
1815 	struct pci_quirk *q;
1816 
1817 	if (!pci_honor_msi_blacklist)
1818 		return (0);
1819 
1820 	for (q = &pci_quirks[0]; q->devid; q++) {
1821 		if (q->devid == pci_get_devid(dev) &&
1822 		    q->type == PCI_QUIRK_DISABLE_MSI)
1823 			return (1);
1824 	}
1825 	return (0);
1826 }
1827 
1828 /*
1829  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1830  * we just check for blacklisted chipsets as represented by the
1831  * host-PCI bridge at device 0:0:0.  In the future, it may become
1832  * necessary to check other system attributes, such as the kenv values
1833  * that give the motherboard manufacturer and model number.
1834  */
1835 static int
1836 pci_msi_blacklisted(void)
1837 {
1838 	device_t dev;
1839 
1840 	if (!pci_honor_msi_blacklist)
1841 		return (0);
1842 
1843 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1844 	if (!(pcie_chipset || pcix_chipset))
1845 		return (1);
1846 
1847 	dev = pci_find_bsf(0, 0, 0);
1848 	if (dev != NULL)
1849 		return (pci_msi_device_blacklisted(dev));
1850 	return (0);
1851 }
1852 
1853 /*
1854  * Attempt to allocate count MSI messages on start_cpuid.
1855  *
1856  * If start_cpuid < 0, then the MSI messages' target CPU will be
1857  * selected automaticly.
1858  *
1859  * If the caller explicitly specified the MSI messages' target CPU,
1860  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1861  * messages on the specified CPU, if the allocation fails due to MD
1862  * does not have enough vectors (EMSGSIZE), then we will try next
1863  * available CPU, until the allocation fails on all CPUs.
1864  *
1865  * EMSGSIZE will be returned, if all available CPUs does not have
1866  * enough vectors for the requested amount of MSI messages.  Caller
1867  * should either reduce the amount of MSI messages to be requested,
1868  * or simply giving up using MSI.
1869  *
1870  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1871  * returned in 'rid' array, if the allocation succeeds.
1872  */
1873 int
1874 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1875     int start_cpuid)
1876 {
1877 	struct pci_devinfo *dinfo = device_get_ivars(child);
1878 	pcicfgregs *cfg = &dinfo->cfg;
1879 	struct resource_list_entry *rle;
1880 	int error, i, irqs[32], cpuid = 0;
1881 	uint16_t ctrl;
1882 
1883 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1884 	    ("invalid MSI count %d", count));
1885 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1886 
1887 	/* If rid 0 is allocated, then fail. */
1888 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1889 	if (rle != NULL && rle->res != NULL)
1890 		return (ENXIO);
1891 
1892 	/* Already have allocated messages? */
1893 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1894 		return (ENXIO);
1895 
1896 	/* If MSI is blacklisted for this system, fail. */
1897 	if (pci_msi_blacklisted())
1898 		return (ENXIO);
1899 
1900 	/* MSI capability present? */
1901 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1902 	    !pci_do_msi)
1903 		return (ENODEV);
1904 
1905 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1906 	    count, cfg->msi.msi_msgnum));
1907 
1908 	if (bootverbose) {
1909 		device_printf(child,
1910 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1911 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1912 	}
1913 
1914 	if (start_cpuid < 0)
1915 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1916 
1917 	error = EINVAL;
1918 	for (i = 0; i < ncpus; ++i) {
1919 		cpuid = (start_cpuid + i) % ncpus;
1920 
1921 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1922 		    cfg->msi.msi_msgnum, irqs, cpuid);
1923 		if (error == 0)
1924 			break;
1925 		else if (error != EMSGSIZE)
1926 			return error;
1927 	}
1928 	if (error)
1929 		return error;
1930 
1931 	/*
1932 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1933 	 * the irqs[] array, so add new resources starting at rid 1.
1934 	 */
1935 	for (i = 0; i < count; i++) {
1936 		rid[i] = i + 1;
1937 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
1938 		    irqs[i], irqs[i], 1, cpuid);
1939 	}
1940 
1941 	if (bootverbose) {
1942 		if (count == 1) {
1943 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
1944 			    irqs[0], cpuid);
1945 		} else {
1946 			int run;
1947 
1948 			/*
1949 			 * Be fancy and try to print contiguous runs
1950 			 * of IRQ values as ranges.  'run' is true if
1951 			 * we are in a range.
1952 			 */
1953 			device_printf(child, "using IRQs %d", irqs[0]);
1954 			run = 0;
1955 			for (i = 1; i < count; i++) {
1956 
1957 				/* Still in a run? */
1958 				if (irqs[i] == irqs[i - 1] + 1) {
1959 					run = 1;
1960 					continue;
1961 				}
1962 
1963 				/* Finish previous range. */
1964 				if (run) {
1965 					kprintf("-%d", irqs[i - 1]);
1966 					run = 0;
1967 				}
1968 
1969 				/* Start new range. */
1970 				kprintf(",%d", irqs[i]);
1971 			}
1972 
1973 			/* Unfinished range? */
1974 			if (run)
1975 				kprintf("-%d", irqs[count - 1]);
1976 			kprintf(" for MSI on cpu%d\n", cpuid);
1977 		}
1978 	}
1979 
1980 	/* Update control register with count. */
1981 	ctrl = cfg->msi.msi_ctrl;
1982 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
1983 	ctrl |= (ffs(count) - 1) << 4;
1984 	cfg->msi.msi_ctrl = ctrl;
1985 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
1986 
1987 	/* Update counts of alloc'd messages. */
1988 	cfg->msi.msi_alloc = count;
1989 	cfg->msi.msi_handlers = 0;
1990 	return (0);
1991 }
1992 
1993 /* Release the MSI messages associated with this device. */
1994 int
1995 pci_release_msi_method(device_t dev, device_t child)
1996 {
1997 	struct pci_devinfo *dinfo = device_get_ivars(child);
1998 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1999 	struct resource_list_entry *rle;
2000 	int i, irqs[32], cpuid = -1;
2001 
2002 	/* Do we have any messages to release? */
2003 	if (msi->msi_alloc == 0)
2004 		return (ENODEV);
2005 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2006 
2007 	/* Make sure none of the resources are allocated. */
2008 	if (msi->msi_handlers > 0)
2009 		return (EBUSY);
2010 	for (i = 0; i < msi->msi_alloc; i++) {
2011 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2012 		KASSERT(rle != NULL, ("missing MSI resource"));
2013 		if (rle->res != NULL)
2014 			return (EBUSY);
2015 		if (i == 0) {
2016 			cpuid = rle->cpuid;
2017 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2018 			    ("invalid MSI target cpuid %d", cpuid));
2019 		} else {
2020 			KASSERT(rle->cpuid == cpuid,
2021 			    ("MSI targets different cpus, "
2022 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2023 		}
2024 		irqs[i] = rle->start;
2025 	}
2026 
2027 	/* Update control register with 0 count. */
2028 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2029 	    ("%s: MSI still enabled", __func__));
2030 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2031 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2032 	    msi->msi_ctrl, 2);
2033 
2034 	/* Release the messages. */
2035 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2036 	    cpuid);
2037 	for (i = 0; i < msi->msi_alloc; i++)
2038 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2039 
2040 	/* Update alloc count. */
2041 	msi->msi_alloc = 0;
2042 	msi->msi_addr = 0;
2043 	msi->msi_data = 0;
2044 	return (0);
2045 }
2046 
2047 /*
2048  * Return the max supported MSI messages this device supports.
2049  * Basically, assuming the MD code can alloc messages, this function
2050  * should return the maximum value that pci_alloc_msi() can return.
2051  * Thus, it is subject to the tunables, etc.
2052  */
2053 int
2054 pci_msi_count_method(device_t dev, device_t child)
2055 {
2056 	struct pci_devinfo *dinfo = device_get_ivars(child);
2057 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2058 
2059 	if (pci_do_msi && msi->msi_location != 0)
2060 		return (msi->msi_msgnum);
2061 	return (0);
2062 }
2063 
2064 /* kfree pcicfgregs structure and all depending data structures */
2065 
2066 int
2067 pci_freecfg(struct pci_devinfo *dinfo)
2068 {
2069 	struct devlist *devlist_head;
2070 	int i;
2071 
2072 	devlist_head = &pci_devq;
2073 
2074 	if (dinfo->cfg.vpd.vpd_reg) {
2075 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2076 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2077 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2078 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2079 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2080 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2081 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2082 	}
2083 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2084 	kfree(dinfo, M_DEVBUF);
2085 
2086 	/* increment the generation count */
2087 	pci_generation++;
2088 
2089 	/* we're losing one device */
2090 	pci_numdevs--;
2091 	return (0);
2092 }
2093 
2094 /*
2095  * PCI power manangement
2096  */
2097 int
2098 pci_set_powerstate_method(device_t dev, device_t child, int state)
2099 {
2100 	struct pci_devinfo *dinfo = device_get_ivars(child);
2101 	pcicfgregs *cfg = &dinfo->cfg;
2102 	uint16_t status;
2103 	int oldstate, highest, delay;
2104 
2105 	if (cfg->pp.pp_cap == 0)
2106 		return (EOPNOTSUPP);
2107 
2108 	/*
2109 	 * Optimize a no state change request away.  While it would be OK to
2110 	 * write to the hardware in theory, some devices have shown odd
2111 	 * behavior when going from D3 -> D3.
2112 	 */
2113 	oldstate = pci_get_powerstate(child);
2114 	if (oldstate == state)
2115 		return (0);
2116 
2117 	/*
2118 	 * The PCI power management specification states that after a state
2119 	 * transition between PCI power states, system software must
2120 	 * guarantee a minimal delay before the function accesses the device.
2121 	 * Compute the worst case delay that we need to guarantee before we
2122 	 * access the device.  Many devices will be responsive much more
2123 	 * quickly than this delay, but there are some that don't respond
2124 	 * instantly to state changes.  Transitions to/from D3 state require
2125 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2126 	 * is done below with DELAY rather than a sleeper function because
2127 	 * this function can be called from contexts where we cannot sleep.
2128 	 */
2129 	highest = (oldstate > state) ? oldstate : state;
2130 	if (highest == PCI_POWERSTATE_D3)
2131 	    delay = 10000;
2132 	else if (highest == PCI_POWERSTATE_D2)
2133 	    delay = 200;
2134 	else
2135 	    delay = 0;
2136 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2137 	    & ~PCIM_PSTAT_DMASK;
2138 	switch (state) {
2139 	case PCI_POWERSTATE_D0:
2140 		status |= PCIM_PSTAT_D0;
2141 		break;
2142 	case PCI_POWERSTATE_D1:
2143 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2144 			return (EOPNOTSUPP);
2145 		status |= PCIM_PSTAT_D1;
2146 		break;
2147 	case PCI_POWERSTATE_D2:
2148 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2149 			return (EOPNOTSUPP);
2150 		status |= PCIM_PSTAT_D2;
2151 		break;
2152 	case PCI_POWERSTATE_D3:
2153 		status |= PCIM_PSTAT_D3;
2154 		break;
2155 	default:
2156 		return (EINVAL);
2157 	}
2158 
2159 	if (bootverbose)
2160 		kprintf(
2161 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2162 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2163 		    dinfo->cfg.func, oldstate, state);
2164 
2165 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2166 	if (delay)
2167 		DELAY(delay);
2168 	return (0);
2169 }
2170 
2171 int
2172 pci_get_powerstate_method(device_t dev, device_t child)
2173 {
2174 	struct pci_devinfo *dinfo = device_get_ivars(child);
2175 	pcicfgregs *cfg = &dinfo->cfg;
2176 	uint16_t status;
2177 	int result;
2178 
2179 	if (cfg->pp.pp_cap != 0) {
2180 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2181 		switch (status & PCIM_PSTAT_DMASK) {
2182 		case PCIM_PSTAT_D0:
2183 			result = PCI_POWERSTATE_D0;
2184 			break;
2185 		case PCIM_PSTAT_D1:
2186 			result = PCI_POWERSTATE_D1;
2187 			break;
2188 		case PCIM_PSTAT_D2:
2189 			result = PCI_POWERSTATE_D2;
2190 			break;
2191 		case PCIM_PSTAT_D3:
2192 			result = PCI_POWERSTATE_D3;
2193 			break;
2194 		default:
2195 			result = PCI_POWERSTATE_UNKNOWN;
2196 			break;
2197 		}
2198 	} else {
2199 		/* No support, device is always at D0 */
2200 		result = PCI_POWERSTATE_D0;
2201 	}
2202 	return (result);
2203 }
2204 
2205 /*
2206  * Some convenience functions for PCI device drivers.
2207  */
2208 
2209 static __inline void
2210 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2211 {
2212 	uint16_t	command;
2213 
2214 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2215 	command |= bit;
2216 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2217 }
2218 
2219 static __inline void
2220 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2221 {
2222 	uint16_t	command;
2223 
2224 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2225 	command &= ~bit;
2226 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2227 }
2228 
2229 int
2230 pci_enable_busmaster_method(device_t dev, device_t child)
2231 {
2232 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2233 	return (0);
2234 }
2235 
2236 int
2237 pci_disable_busmaster_method(device_t dev, device_t child)
2238 {
2239 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2240 	return (0);
2241 }
2242 
2243 int
2244 pci_enable_io_method(device_t dev, device_t child, int space)
2245 {
2246 	uint16_t command;
2247 	uint16_t bit;
2248 	char *error;
2249 
2250 	bit = 0;
2251 	error = NULL;
2252 
2253 	switch(space) {
2254 	case SYS_RES_IOPORT:
2255 		bit = PCIM_CMD_PORTEN;
2256 		error = "port";
2257 		break;
2258 	case SYS_RES_MEMORY:
2259 		bit = PCIM_CMD_MEMEN;
2260 		error = "memory";
2261 		break;
2262 	default:
2263 		return (EINVAL);
2264 	}
2265 	pci_set_command_bit(dev, child, bit);
2266 	/* Some devices seem to need a brief stall here, what do to? */
2267 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2268 	if (command & bit)
2269 		return (0);
2270 	device_printf(child, "failed to enable %s mapping!\n", error);
2271 	return (ENXIO);
2272 }
2273 
2274 int
2275 pci_disable_io_method(device_t dev, device_t child, int space)
2276 {
2277 	uint16_t command;
2278 	uint16_t bit;
2279 	char *error;
2280 
2281 	bit = 0;
2282 	error = NULL;
2283 
2284 	switch(space) {
2285 	case SYS_RES_IOPORT:
2286 		bit = PCIM_CMD_PORTEN;
2287 		error = "port";
2288 		break;
2289 	case SYS_RES_MEMORY:
2290 		bit = PCIM_CMD_MEMEN;
2291 		error = "memory";
2292 		break;
2293 	default:
2294 		return (EINVAL);
2295 	}
2296 	pci_clear_command_bit(dev, child, bit);
2297 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2298 	if (command & bit) {
2299 		device_printf(child, "failed to disable %s mapping!\n", error);
2300 		return (ENXIO);
2301 	}
2302 	return (0);
2303 }
2304 
2305 /*
2306  * New style pci driver.  Parent device is either a pci-host-bridge or a
2307  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2308  */
2309 
2310 void
2311 pci_print_verbose(struct pci_devinfo *dinfo)
2312 {
2313 
2314 	if (bootverbose) {
2315 		pcicfgregs *cfg = &dinfo->cfg;
2316 
2317 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2318 		    cfg->vendor, cfg->device, cfg->revid);
2319 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2320 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2321 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2322 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2323 		    cfg->mfdev);
2324 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2325 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2326 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2327 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2328 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2329 		if (cfg->intpin > 0)
2330 			kprintf("\tintpin=%c, irq=%d\n",
2331 			    cfg->intpin +'a' -1, cfg->intline);
2332 		if (cfg->pp.pp_cap) {
2333 			uint16_t status;
2334 
2335 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2336 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2337 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2338 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2339 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2340 			    status & PCIM_PSTAT_DMASK);
2341 		}
2342 		if (cfg->msi.msi_location) {
2343 			int ctrl;
2344 
2345 			ctrl = cfg->msi.msi_ctrl;
2346 			kprintf("\tMSI supports %d message%s%s%s\n",
2347 			    cfg->msi.msi_msgnum,
2348 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2349 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2350 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2351 		}
2352 		if (cfg->msix.msix_location) {
2353 			kprintf("\tMSI-X supports %d message%s ",
2354 			    cfg->msix.msix_msgnum,
2355 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2356 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2357 				kprintf("in map 0x%x\n",
2358 				    cfg->msix.msix_table_bar);
2359 			else
2360 				kprintf("in maps 0x%x and 0x%x\n",
2361 				    cfg->msix.msix_table_bar,
2362 				    cfg->msix.msix_pba_bar);
2363 		}
2364 		pci_print_verbose_expr(cfg);
2365 	}
2366 }
2367 
2368 static void
2369 pci_print_verbose_expr(const pcicfgregs *cfg)
2370 {
2371 	const struct pcicfg_expr *expr = &cfg->expr;
2372 	const char *port_name;
2373 	uint16_t port_type;
2374 
2375 	if (!bootverbose)
2376 		return;
2377 
2378 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2379 		return;
2380 
2381 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2382 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2383 
2384 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2385 
2386 	switch (port_type) {
2387 	case PCIE_END_POINT:
2388 		port_name = "DEVICE";
2389 		break;
2390 	case PCIE_LEG_END_POINT:
2391 		port_name = "LEGDEV";
2392 		break;
2393 	case PCIE_ROOT_PORT:
2394 		port_name = "ROOT";
2395 		break;
2396 	case PCIE_UP_STREAM_PORT:
2397 		port_name = "UPSTREAM";
2398 		break;
2399 	case PCIE_DOWN_STREAM_PORT:
2400 		port_name = "DOWNSTRM";
2401 		break;
2402 	case PCIE_PCIE2PCI_BRIDGE:
2403 		port_name = "PCIE2PCI";
2404 		break;
2405 	case PCIE_PCI2PCIE_BRIDGE:
2406 		port_name = "PCI2PCIE";
2407 		break;
2408 	case PCIE_ROOT_END_POINT:
2409 		port_name = "ROOTDEV";
2410 		break;
2411 	case PCIE_ROOT_EVT_COLL:
2412 		port_name = "ROOTEVTC";
2413 		break;
2414 	default:
2415 		port_name = NULL;
2416 		break;
2417 	}
2418 	if ((port_type == PCIE_ROOT_PORT ||
2419 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2420 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2421 		port_name = NULL;
2422 	if (port_name != NULL)
2423 		kprintf("[%s]", port_name);
2424 
2425 	if (pcie_slotimpl(cfg)) {
2426 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2427 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2428 			kprintf("[HOTPLUG]");
2429 	}
2430 	kprintf("\n");
2431 }
2432 
2433 static int
2434 pci_porten(device_t pcib, int b, int s, int f)
2435 {
2436 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2437 		& PCIM_CMD_PORTEN) != 0;
2438 }
2439 
2440 static int
2441 pci_memen(device_t pcib, int b, int s, int f)
2442 {
2443 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2444 		& PCIM_CMD_MEMEN) != 0;
2445 }
2446 
2447 /*
2448  * Add a resource based on a pci map register. Return 1 if the map
2449  * register is a 32bit map register or 2 if it is a 64bit register.
2450  */
2451 static int
2452 pci_add_map(device_t pcib, device_t bus, device_t dev,
2453     int b, int s, int f, int reg, struct resource_list *rl, int force,
2454     int prefetch)
2455 {
2456 	uint32_t map;
2457 	uint16_t old_cmd;
2458 	pci_addr_t base;
2459 	pci_addr_t start, end, count;
2460 	uint8_t ln2size;
2461 	uint8_t ln2range;
2462 	uint32_t testval;
2463 	uint16_t cmd;
2464 	int type;
2465 	int barlen;
2466 	struct resource *res;
2467 
2468 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2469 
2470         /* Disable access to device memory */
2471 	old_cmd = 0;
2472 	if (PCI_BAR_MEM(map)) {
2473 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2474 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2475 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2476 	}
2477 
2478 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2479 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2480 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2481 
2482         /* Restore memory access mode */
2483 	if (PCI_BAR_MEM(map)) {
2484 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2485 	}
2486 
2487 	if (PCI_BAR_MEM(map)) {
2488 		type = SYS_RES_MEMORY;
2489 		if (map & PCIM_BAR_MEM_PREFETCH)
2490 			prefetch = 1;
2491 	} else
2492 		type = SYS_RES_IOPORT;
2493 	ln2size = pci_mapsize(testval);
2494 	ln2range = pci_maprange(testval);
2495 	base = pci_mapbase(map);
2496 	barlen = ln2range == 64 ? 2 : 1;
2497 
2498 	/*
2499 	 * For I/O registers, if bottom bit is set, and the next bit up
2500 	 * isn't clear, we know we have a BAR that doesn't conform to the
2501 	 * spec, so ignore it.  Also, sanity check the size of the data
2502 	 * areas to the type of memory involved.  Memory must be at least
2503 	 * 16 bytes in size, while I/O ranges must be at least 4.
2504 	 */
2505 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2506 		return (barlen);
2507 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2508 	    (type == SYS_RES_IOPORT && ln2size < 2))
2509 		return (barlen);
2510 
2511 	if (ln2range == 64)
2512 		/* Read the other half of a 64bit map register */
2513 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2514 	if (bootverbose) {
2515 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2516 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2517 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2518 			kprintf(", port disabled\n");
2519 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2520 			kprintf(", memory disabled\n");
2521 		else
2522 			kprintf(", enabled\n");
2523 	}
2524 
2525 	/*
2526 	 * If base is 0, then we have problems.  It is best to ignore
2527 	 * such entries for the moment.  These will be allocated later if
2528 	 * the driver specifically requests them.  However, some
2529 	 * removable busses look better when all resources are allocated,
2530 	 * so allow '0' to be overriden.
2531 	 *
2532 	 * Similarly treat maps whose values is the same as the test value
2533 	 * read back.  These maps have had all f's written to them by the
2534 	 * BIOS in an attempt to disable the resources.
2535 	 */
2536 	if (!force && (base == 0 || map == testval))
2537 		return (barlen);
2538 	if ((u_long)base != base) {
2539 		device_printf(bus,
2540 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2541 		    pci_get_domain(dev), b, s, f, reg);
2542 		return (barlen);
2543 	}
2544 
2545 	/*
2546 	 * This code theoretically does the right thing, but has
2547 	 * undesirable side effects in some cases where peripherals
2548 	 * respond oddly to having these bits enabled.  Let the user
2549 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2550 	 * default).
2551 	 */
2552 	if (pci_enable_io_modes) {
2553 		/* Turn on resources that have been left off by a lazy BIOS */
2554 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2555 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2556 			cmd |= PCIM_CMD_PORTEN;
2557 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2558 		}
2559 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2560 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2561 			cmd |= PCIM_CMD_MEMEN;
2562 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2563 		}
2564 	} else {
2565 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2566 			return (barlen);
2567 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2568 			return (barlen);
2569 	}
2570 
2571 	count = 1 << ln2size;
2572 	if (base == 0 || base == pci_mapbase(testval)) {
2573 		start = 0;	/* Let the parent decide. */
2574 		end = ~0ULL;
2575 	} else {
2576 		start = base;
2577 		end = base + (1 << ln2size) - 1;
2578 	}
2579 	resource_list_add(rl, type, reg, start, end, count, -1);
2580 
2581 	/*
2582 	 * Try to allocate the resource for this BAR from our parent
2583 	 * so that this resource range is already reserved.  The
2584 	 * driver for this device will later inherit this resource in
2585 	 * pci_alloc_resource().
2586 	 */
2587 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2588 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2589 	if (res == NULL) {
2590 		/*
2591 		 * If the allocation fails, delete the resource list
2592 		 * entry to force pci_alloc_resource() to allocate
2593 		 * resources from the parent.
2594 		 */
2595 		resource_list_delete(rl, type, reg);
2596 #ifdef PCI_BAR_CLEAR
2597 		/* Clear the BAR */
2598 		start = 0;
2599 #else	/* !PCI_BAR_CLEAR */
2600 		/*
2601 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2602 		 * PCI function, clearing the BAR causes HPET timer
2603 		 * stop ticking.
2604 		 */
2605 		if (bootverbose) {
2606 			kprintf("pci:%d:%d:%d: resource reservation failed "
2607 				"%#jx - %#jx\n", b, s, f,
2608 				(intmax_t)start, (intmax_t)end);
2609 		}
2610 		return (barlen);
2611 #endif	/* PCI_BAR_CLEAR */
2612 	} else {
2613 		start = rman_get_start(res);
2614 	}
2615 	pci_write_config(dev, reg, start, 4);
2616 	if (ln2range == 64)
2617 		pci_write_config(dev, reg + 4, start >> 32, 4);
2618 	return (barlen);
2619 }
2620 
2621 /*
2622  * For ATA devices we need to decide early what addressing mode to use.
2623  * Legacy demands that the primary and secondary ATA ports sits on the
2624  * same addresses that old ISA hardware did. This dictates that we use
2625  * those addresses and ignore the BAR's if we cannot set PCI native
2626  * addressing mode.
2627  */
2628 static void
2629 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2630     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2631 {
2632 	int rid, type, progif;
2633 #if 0
2634 	/* if this device supports PCI native addressing use it */
2635 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2636 	if ((progif & 0x8a) == 0x8a) {
2637 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2638 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2639 			kprintf("Trying ATA native PCI addressing mode\n");
2640 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2641 		}
2642 	}
2643 #endif
2644 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2645 	type = SYS_RES_IOPORT;
2646 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2647 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2648 		    prefetchmask & (1 << 0));
2649 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2650 		    prefetchmask & (1 << 1));
2651 	} else {
2652 		rid = PCIR_BAR(0);
2653 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2654 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2655 		    0, -1);
2656 		rid = PCIR_BAR(1);
2657 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2658 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2659 		    0, -1);
2660 	}
2661 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2662 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2663 		    prefetchmask & (1 << 2));
2664 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2665 		    prefetchmask & (1 << 3));
2666 	} else {
2667 		rid = PCIR_BAR(2);
2668 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2669 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2670 		    0, -1);
2671 		rid = PCIR_BAR(3);
2672 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2673 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2674 		    0, -1);
2675 	}
2676 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2677 	    prefetchmask & (1 << 4));
2678 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2679 	    prefetchmask & (1 << 5));
2680 }
2681 
2682 static void
2683 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2684 {
2685 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2686 	pcicfgregs *cfg = &dinfo->cfg;
2687 	char tunable_name[64];
2688 	int irq;
2689 
2690 	/* Has to have an intpin to have an interrupt. */
2691 	if (cfg->intpin == 0)
2692 		return;
2693 
2694 	/* Let the user override the IRQ with a tunable. */
2695 	irq = PCI_INVALID_IRQ;
2696 	ksnprintf(tunable_name, sizeof(tunable_name),
2697 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2698 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2699 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2700 		if (irq >= 255 || irq <= 0) {
2701 			irq = PCI_INVALID_IRQ;
2702 		} else {
2703 			if (machintr_legacy_intr_find(irq,
2704 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2705 				device_printf(dev,
2706 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2707 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2708 				    cfg->intpin + 'A' - 1, irq);
2709 				irq = PCI_INVALID_IRQ;
2710 			} else {
2711 				BUS_CONFIG_INTR(bus, dev, irq,
2712 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2713 			}
2714 		}
2715 	}
2716 
2717 	/*
2718 	 * If we didn't get an IRQ via the tunable, then we either use the
2719 	 * IRQ value in the intline register or we ask the bus to route an
2720 	 * interrupt for us.  If force_route is true, then we only use the
2721 	 * value in the intline register if the bus was unable to assign an
2722 	 * IRQ.
2723 	 */
2724 	if (!PCI_INTERRUPT_VALID(irq)) {
2725 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2726 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2727 		if (!PCI_INTERRUPT_VALID(irq))
2728 			irq = cfg->intline;
2729 	}
2730 
2731 	/* If after all that we don't have an IRQ, just bail. */
2732 	if (!PCI_INTERRUPT_VALID(irq))
2733 		return;
2734 
2735 	/* Update the config register if it changed. */
2736 	if (irq != cfg->intline) {
2737 		cfg->intline = irq;
2738 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2739 	}
2740 
2741 	/* Add this IRQ as rid 0 interrupt resource. */
2742 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2743 	    machintr_legacy_intr_cpuid(irq));
2744 }
2745 
2746 void
2747 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2748 {
2749 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2750 	pcicfgregs *cfg = &dinfo->cfg;
2751 	struct resource_list *rl = &dinfo->resources;
2752 	struct pci_quirk *q;
2753 	int b, i, f, s;
2754 
2755 	b = cfg->bus;
2756 	s = cfg->slot;
2757 	f = cfg->func;
2758 
2759 	/* ATA devices needs special map treatment */
2760 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2761 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2762 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2763 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2764 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2765 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2766 	else
2767 		for (i = 0; i < cfg->nummaps;)
2768 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2769 			    rl, force, prefetchmask & (1 << i));
2770 
2771 	/*
2772 	 * Add additional, quirked resources.
2773 	 */
2774 	for (q = &pci_quirks[0]; q->devid; q++) {
2775 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2776 		    && q->type == PCI_QUIRK_MAP_REG)
2777 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2778 			  force, 0);
2779 	}
2780 
2781 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2782 		/*
2783 		 * Try to re-route interrupts. Sometimes the BIOS or
2784 		 * firmware may leave bogus values in these registers.
2785 		 * If the re-route fails, then just stick with what we
2786 		 * have.
2787 		 */
2788 		pci_assign_interrupt(bus, dev, 1);
2789 	}
2790 }
2791 
2792 void
2793 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2794 {
2795 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2796 	device_t pcib = device_get_parent(dev);
2797 	struct pci_devinfo *dinfo;
2798 	int maxslots;
2799 	int s, f, pcifunchigh;
2800 	uint8_t hdrtype;
2801 
2802 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2803 	    ("dinfo_size too small"));
2804 	maxslots = PCIB_MAXSLOTS(pcib);
2805 	for (s = 0; s <= maxslots; s++) {
2806 		pcifunchigh = 0;
2807 		f = 0;
2808 		DELAY(1);
2809 		hdrtype = REG(PCIR_HDRTYPE, 1);
2810 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2811 			continue;
2812 		if (hdrtype & PCIM_MFDEV)
2813 			pcifunchigh = PCI_FUNCMAX;
2814 		for (f = 0; f <= pcifunchigh; f++) {
2815 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2816 			    dinfo_size);
2817 			if (dinfo != NULL) {
2818 				pci_add_child(dev, dinfo);
2819 			}
2820 		}
2821 	}
2822 #undef REG
2823 }
2824 
2825 void
2826 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2827 {
2828 	device_t pcib;
2829 
2830 	pcib = device_get_parent(bus);
2831 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2832 	device_set_ivars(dinfo->cfg.dev, dinfo);
2833 	resource_list_init(&dinfo->resources);
2834 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2835 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2836 	pci_print_verbose(dinfo);
2837 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2838 }
2839 
2840 static int
2841 pci_probe(device_t dev)
2842 {
2843 	device_set_desc(dev, "PCI bus");
2844 
2845 	/* Allow other subclasses to override this driver. */
2846 	return (-1000);
2847 }
2848 
2849 static int
2850 pci_attach(device_t dev)
2851 {
2852 	int busno, domain;
2853 
2854 	/*
2855 	 * Since there can be multiple independantly numbered PCI
2856 	 * busses on systems with multiple PCI domains, we can't use
2857 	 * the unit number to decide which bus we are probing. We ask
2858 	 * the parent pcib what our domain and bus numbers are.
2859 	 */
2860 	domain = pcib_get_domain(dev);
2861 	busno = pcib_get_bus(dev);
2862 	if (bootverbose)
2863 		device_printf(dev, "domain=%d, physical bus=%d\n",
2864 		    domain, busno);
2865 
2866 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
2867 
2868 	return (bus_generic_attach(dev));
2869 }
2870 
2871 int
2872 pci_suspend(device_t dev)
2873 {
2874 	int dstate, error, i, numdevs;
2875 	device_t acpi_dev, child, *devlist;
2876 	struct pci_devinfo *dinfo;
2877 
2878 	/*
2879 	 * Save the PCI configuration space for each child and set the
2880 	 * device in the appropriate power state for this sleep state.
2881 	 */
2882 	acpi_dev = NULL;
2883 	if (pci_do_power_resume)
2884 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2885 	device_get_children(dev, &devlist, &numdevs);
2886 	for (i = 0; i < numdevs; i++) {
2887 		child = devlist[i];
2888 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2889 		pci_cfg_save(child, dinfo, 0);
2890 	}
2891 
2892 	/* Suspend devices before potentially powering them down. */
2893 	error = bus_generic_suspend(dev);
2894 	if (error) {
2895 		kfree(devlist, M_TEMP);
2896 		return (error);
2897 	}
2898 
2899 	/*
2900 	 * Always set the device to D3.  If ACPI suggests a different
2901 	 * power state, use it instead.  If ACPI is not present, the
2902 	 * firmware is responsible for managing device power.  Skip
2903 	 * children who aren't attached since they are powered down
2904 	 * separately.  Only manage type 0 devices for now.
2905 	 */
2906 	for (i = 0; acpi_dev && i < numdevs; i++) {
2907 		child = devlist[i];
2908 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2909 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
2910 			dstate = PCI_POWERSTATE_D3;
2911 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
2912 			pci_set_powerstate(child, dstate);
2913 		}
2914 	}
2915 	kfree(devlist, M_TEMP);
2916 	return (0);
2917 }
2918 
2919 int
2920 pci_resume(device_t dev)
2921 {
2922 	int i, numdevs;
2923 	device_t acpi_dev, child, *devlist;
2924 	struct pci_devinfo *dinfo;
2925 
2926 	/*
2927 	 * Set each child to D0 and restore its PCI configuration space.
2928 	 */
2929 	acpi_dev = NULL;
2930 	if (pci_do_power_resume)
2931 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
2932 	device_get_children(dev, &devlist, &numdevs);
2933 	for (i = 0; i < numdevs; i++) {
2934 		/*
2935 		 * Notify ACPI we're going to D0 but ignore the result.  If
2936 		 * ACPI is not present, the firmware is responsible for
2937 		 * managing device power.  Only manage type 0 devices for now.
2938 		 */
2939 		child = devlist[i];
2940 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
2941 		if (acpi_dev && device_is_attached(child) &&
2942 		    dinfo->cfg.hdrtype == 0) {
2943 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
2944 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
2945 		}
2946 
2947 		/* Now the device is powered up, restore its config space. */
2948 		pci_cfg_restore(child, dinfo);
2949 	}
2950 	kfree(devlist, M_TEMP);
2951 	return (bus_generic_resume(dev));
2952 }
2953 
2954 static void
2955 pci_load_vendor_data(void)
2956 {
2957 	caddr_t vendordata, info;
2958 
2959 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
2960 		info = preload_search_info(vendordata, MODINFO_ADDR);
2961 		pci_vendordata = *(char **)info;
2962 		info = preload_search_info(vendordata, MODINFO_SIZE);
2963 		pci_vendordata_size = *(size_t *)info;
2964 		/* terminate the database */
2965 		pci_vendordata[pci_vendordata_size] = '\n';
2966 	}
2967 }
2968 
2969 void
2970 pci_driver_added(device_t dev, driver_t *driver)
2971 {
2972 	int numdevs;
2973 	device_t *devlist;
2974 	device_t child;
2975 	struct pci_devinfo *dinfo;
2976 	int i;
2977 
2978 	if (bootverbose)
2979 		device_printf(dev, "driver added\n");
2980 	DEVICE_IDENTIFY(driver, dev);
2981 	device_get_children(dev, &devlist, &numdevs);
2982 	for (i = 0; i < numdevs; i++) {
2983 		child = devlist[i];
2984 		if (device_get_state(child) != DS_NOTPRESENT)
2985 			continue;
2986 		dinfo = device_get_ivars(child);
2987 		pci_print_verbose(dinfo);
2988 		if (bootverbose)
2989 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
2990 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2991 			    dinfo->cfg.func);
2992 		pci_cfg_restore(child, dinfo);
2993 		if (device_probe_and_attach(child) != 0)
2994 			pci_cfg_save(child, dinfo, 1);
2995 	}
2996 	kfree(devlist, M_TEMP);
2997 }
2998 
2999 static void
3000 pci_child_detached(device_t parent __unused, device_t child)
3001 {
3002 	/* Turn child's power off */
3003 	pci_cfg_save(child, device_get_ivars(child), 1);
3004 }
3005 
3006 int
3007 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3008     driver_intr_t *intr, void *arg, void **cookiep,
3009     lwkt_serialize_t serializer, const char *desc)
3010 {
3011 	int rid, error;
3012 	void *cookie;
3013 
3014 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3015 	    arg, &cookie, serializer, desc);
3016 	if (error)
3017 		return (error);
3018 
3019 	/* If this is not a direct child, just bail out. */
3020 	if (device_get_parent(child) != dev) {
3021 		*cookiep = cookie;
3022 		return(0);
3023 	}
3024 
3025 	rid = rman_get_rid(irq);
3026 	if (rid == 0) {
3027 		/* Make sure that INTx is enabled */
3028 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3029 	} else {
3030 		struct pci_devinfo *dinfo = device_get_ivars(child);
3031 		uint64_t addr;
3032 		uint32_t data;
3033 
3034 		/*
3035 		 * Check to see if the interrupt is MSI or MSI-X.
3036 		 * Ask our parent to map the MSI and give
3037 		 * us the address and data register values.
3038 		 * If we fail for some reason, teardown the
3039 		 * interrupt handler.
3040 		 */
3041 		if (dinfo->cfg.msi.msi_alloc > 0) {
3042 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3043 
3044 			if (msi->msi_addr == 0) {
3045 				KASSERT(msi->msi_handlers == 0,
3046 			    ("MSI has handlers, but vectors not mapped"));
3047 				error = PCIB_MAP_MSI(device_get_parent(dev),
3048 				    child, rman_get_start(irq), &addr, &data,
3049 				    rman_get_cpuid(irq));
3050 				if (error)
3051 					goto bad;
3052 				msi->msi_addr = addr;
3053 				msi->msi_data = data;
3054 				pci_enable_msi(child, addr, data);
3055 			}
3056 			msi->msi_handlers++;
3057 		} else {
3058 			struct msix_vector *mv;
3059 			u_int vector;
3060 
3061 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3062 			    ("No MSI-X or MSI rid %d allocated", rid));
3063 
3064 			mv = pci_find_msix_vector(child, rid);
3065 			KASSERT(mv != NULL,
3066 			    ("MSI-X rid %d is not allocated", rid));
3067 			KASSERT(mv->mv_address == 0,
3068 			    ("MSI-X rid %d has been setup", rid));
3069 
3070 			error = PCIB_MAP_MSI(device_get_parent(dev),
3071 			    child, rman_get_start(irq), &addr, &data,
3072 			    rman_get_cpuid(irq));
3073 			if (error)
3074 				goto bad;
3075 			mv->mv_address = addr;
3076 			mv->mv_data = data;
3077 
3078 			vector = PCI_MSIX_RID2VEC(rid);
3079 			pci_setup_msix_vector(child, vector,
3080 			    mv->mv_address, mv->mv_data);
3081 			pci_unmask_msix_vector(child, vector);
3082 		}
3083 
3084 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3085 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3086 	bad:
3087 		if (error) {
3088 			(void)bus_generic_teardown_intr(dev, child, irq,
3089 			    cookie);
3090 			return (error);
3091 		}
3092 	}
3093 	*cookiep = cookie;
3094 	return (0);
3095 }
3096 
3097 int
3098 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3099     void *cookie)
3100 {
3101 	int rid, error;
3102 
3103 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3104 		return (EINVAL);
3105 
3106 	/* If this isn't a direct child, just bail out */
3107 	if (device_get_parent(child) != dev)
3108 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3109 
3110 	rid = rman_get_rid(irq);
3111 	if (rid == 0) {
3112 		/* Mask INTx */
3113 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3114 	} else {
3115 		struct pci_devinfo *dinfo = device_get_ivars(child);
3116 
3117 		/*
3118 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3119 		 * decrement the appropriate handlers count and mask the
3120 		 * MSI-X message, or disable MSI messages if the count
3121 		 * drops to 0.
3122 		 */
3123 		if (dinfo->cfg.msi.msi_alloc > 0) {
3124 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3125 
3126 			KASSERT(rid <= msi->msi_alloc,
3127 			    ("MSI-X index too high"));
3128 			KASSERT(msi->msi_handlers > 0,
3129 			    ("MSI rid %d is not setup", rid));
3130 
3131 			msi->msi_handlers--;
3132 			if (msi->msi_handlers == 0)
3133 				pci_disable_msi(child);
3134 		} else {
3135 			struct msix_vector *mv;
3136 
3137 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3138 			    ("No MSI or MSI-X rid %d allocated", rid));
3139 
3140 			mv = pci_find_msix_vector(child, rid);
3141 			KASSERT(mv != NULL,
3142 			    ("MSI-X rid %d is not allocated", rid));
3143 			KASSERT(mv->mv_address != 0,
3144 			    ("MSI-X rid %d has not been setup", rid));
3145 
3146 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3147 			mv->mv_address = 0;
3148 			mv->mv_data = 0;
3149 		}
3150 	}
3151 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3152 	if (rid > 0)
3153 		KASSERT(error == 0,
3154 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3155 	return (error);
3156 }
3157 
3158 int
3159 pci_print_child(device_t dev, device_t child)
3160 {
3161 	struct pci_devinfo *dinfo;
3162 	struct resource_list *rl;
3163 	int retval = 0;
3164 
3165 	dinfo = device_get_ivars(child);
3166 	rl = &dinfo->resources;
3167 
3168 	retval += bus_print_child_header(dev, child);
3169 
3170 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3171 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3172 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3173 	if (device_get_flags(dev))
3174 		retval += kprintf(" flags %#x", device_get_flags(dev));
3175 
3176 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3177 	    pci_get_function(child));
3178 
3179 	retval += bus_print_child_footer(dev, child);
3180 
3181 	return (retval);
3182 }
3183 
3184 static struct
3185 {
3186 	int	class;
3187 	int	subclass;
3188 	char	*desc;
3189 } pci_nomatch_tab[] = {
3190 	{PCIC_OLD,		-1,			"old"},
3191 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3192 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3193 	{PCIC_STORAGE,		-1,			"mass storage"},
3194 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3195 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3196 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3197 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3198 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3199 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3200 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3201 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3202 	{PCIC_NETWORK,		-1,			"network"},
3203 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3204 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3205 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3206 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3207 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3208 	{PCIC_DISPLAY,		-1,			"display"},
3209 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3210 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3211 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3212 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3213 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3214 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3215 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3216 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3217 	{PCIC_MEMORY,		-1,			"memory"},
3218 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3219 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3220 	{PCIC_BRIDGE,		-1,			"bridge"},
3221 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3222 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3223 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3224 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3225 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3226 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3227 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3228 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3229 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3230 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3231 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3232 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3233 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3234 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3235 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3236 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3237 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3238 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3239 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3240 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3241 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3242 	{PCIC_INPUTDEV,		-1,			"input device"},
3243 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3244 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3245 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3246 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3247 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3248 	{PCIC_DOCKING,		-1,			"docking station"},
3249 	{PCIC_PROCESSOR,	-1,			"processor"},
3250 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3251 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3252 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3253 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3254 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3255 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3256 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3257 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3258 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3259 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3260 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3261 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3262 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3263 	{PCIC_SATCOM,		-1,			"satellite communication"},
3264 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3265 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3266 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3267 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3268 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3269 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3270 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3271 	{PCIC_DASP,		-1,			"dasp"},
3272 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3273 	{0, 0,		NULL}
3274 };
3275 
3276 void
3277 pci_probe_nomatch(device_t dev, device_t child)
3278 {
3279 	int	i;
3280 	char	*cp, *scp, *device;
3281 
3282 	/*
3283 	 * Look for a listing for this device in a loaded device database.
3284 	 */
3285 	if ((device = pci_describe_device(child)) != NULL) {
3286 		device_printf(dev, "<%s>", device);
3287 		kfree(device, M_DEVBUF);
3288 	} else {
3289 		/*
3290 		 * Scan the class/subclass descriptions for a general
3291 		 * description.
3292 		 */
3293 		cp = "unknown";
3294 		scp = NULL;
3295 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3296 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3297 				if (pci_nomatch_tab[i].subclass == -1) {
3298 					cp = pci_nomatch_tab[i].desc;
3299 				} else if (pci_nomatch_tab[i].subclass ==
3300 				    pci_get_subclass(child)) {
3301 					scp = pci_nomatch_tab[i].desc;
3302 				}
3303 			}
3304 		}
3305 		device_printf(dev, "<%s%s%s>",
3306 		    cp ? cp : "",
3307 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3308 		    scp ? scp : "");
3309 	}
3310 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3311 		pci_get_vendor(child), pci_get_device(child),
3312 		pci_get_slot(child), pci_get_function(child));
3313 	if (pci_get_intpin(child) > 0) {
3314 		int irq;
3315 
3316 		irq = pci_get_irq(child);
3317 		if (PCI_INTERRUPT_VALID(irq))
3318 			kprintf(" irq %d", irq);
3319 	}
3320 	kprintf("\n");
3321 
3322 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3323 }
3324 
3325 /*
3326  * Parse the PCI device database, if loaded, and return a pointer to a
3327  * description of the device.
3328  *
3329  * The database is flat text formatted as follows:
3330  *
3331  * Any line not in a valid format is ignored.
3332  * Lines are terminated with newline '\n' characters.
3333  *
3334  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3335  * the vendor name.
3336  *
3337  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3338  * - devices cannot be listed without a corresponding VENDOR line.
3339  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3340  * another TAB, then the device name.
3341  */
3342 
3343 /*
3344  * Assuming (ptr) points to the beginning of a line in the database,
3345  * return the vendor or device and description of the next entry.
3346  * The value of (vendor) or (device) inappropriate for the entry type
3347  * is set to -1.  Returns nonzero at the end of the database.
3348  *
3349  * Note that this is slightly unrobust in the face of corrupt data;
3350  * we attempt to safeguard against this by spamming the end of the
3351  * database with a newline when we initialise.
3352  */
3353 static int
3354 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3355 {
3356 	char	*cp = *ptr;
3357 	int	left;
3358 
3359 	*device = -1;
3360 	*vendor = -1;
3361 	**desc = '\0';
3362 	for (;;) {
3363 		left = pci_vendordata_size - (cp - pci_vendordata);
3364 		if (left <= 0) {
3365 			*ptr = cp;
3366 			return(1);
3367 		}
3368 
3369 		/* vendor entry? */
3370 		if (*cp != '\t' &&
3371 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3372 			break;
3373 		/* device entry? */
3374 		if (*cp == '\t' &&
3375 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3376 			break;
3377 
3378 		/* skip to next line */
3379 		while (*cp != '\n' && left > 0) {
3380 			cp++;
3381 			left--;
3382 		}
3383 		if (*cp == '\n') {
3384 			cp++;
3385 			left--;
3386 		}
3387 	}
3388 	/* skip to next line */
3389 	while (*cp != '\n' && left > 0) {
3390 		cp++;
3391 		left--;
3392 	}
3393 	if (*cp == '\n' && left > 0)
3394 		cp++;
3395 	*ptr = cp;
3396 	return(0);
3397 }
3398 
3399 static char *
3400 pci_describe_device(device_t dev)
3401 {
3402 	int	vendor, device;
3403 	char	*desc, *vp, *dp, *line;
3404 
3405 	desc = vp = dp = NULL;
3406 
3407 	/*
3408 	 * If we have no vendor data, we can't do anything.
3409 	 */
3410 	if (pci_vendordata == NULL)
3411 		goto out;
3412 
3413 	/*
3414 	 * Scan the vendor data looking for this device
3415 	 */
3416 	line = pci_vendordata;
3417 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3418 		goto out;
3419 	for (;;) {
3420 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3421 			goto out;
3422 		if (vendor == pci_get_vendor(dev))
3423 			break;
3424 	}
3425 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3426 		goto out;
3427 	for (;;) {
3428 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3429 			*dp = 0;
3430 			break;
3431 		}
3432 		if (vendor != -1) {
3433 			*dp = 0;
3434 			break;
3435 		}
3436 		if (device == pci_get_device(dev))
3437 			break;
3438 	}
3439 	if (dp[0] == '\0')
3440 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3441 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3442 	    NULL)
3443 		ksprintf(desc, "%s, %s", vp, dp);
3444  out:
3445 	if (vp != NULL)
3446 		kfree(vp, M_DEVBUF);
3447 	if (dp != NULL)
3448 		kfree(dp, M_DEVBUF);
3449 	return(desc);
3450 }
3451 
3452 int
3453 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3454 {
3455 	struct pci_devinfo *dinfo;
3456 	pcicfgregs *cfg;
3457 
3458 	dinfo = device_get_ivars(child);
3459 	cfg = &dinfo->cfg;
3460 
3461 	switch (which) {
3462 	case PCI_IVAR_ETHADDR:
3463 		/*
3464 		 * The generic accessor doesn't deal with failure, so
3465 		 * we set the return value, then return an error.
3466 		 */
3467 		*((uint8_t **) result) = NULL;
3468 		return (EINVAL);
3469 	case PCI_IVAR_SUBVENDOR:
3470 		*result = cfg->subvendor;
3471 		break;
3472 	case PCI_IVAR_SUBDEVICE:
3473 		*result = cfg->subdevice;
3474 		break;
3475 	case PCI_IVAR_VENDOR:
3476 		*result = cfg->vendor;
3477 		break;
3478 	case PCI_IVAR_DEVICE:
3479 		*result = cfg->device;
3480 		break;
3481 	case PCI_IVAR_DEVID:
3482 		*result = (cfg->device << 16) | cfg->vendor;
3483 		break;
3484 	case PCI_IVAR_CLASS:
3485 		*result = cfg->baseclass;
3486 		break;
3487 	case PCI_IVAR_SUBCLASS:
3488 		*result = cfg->subclass;
3489 		break;
3490 	case PCI_IVAR_PROGIF:
3491 		*result = cfg->progif;
3492 		break;
3493 	case PCI_IVAR_REVID:
3494 		*result = cfg->revid;
3495 		break;
3496 	case PCI_IVAR_INTPIN:
3497 		*result = cfg->intpin;
3498 		break;
3499 	case PCI_IVAR_IRQ:
3500 		*result = cfg->intline;
3501 		break;
3502 	case PCI_IVAR_DOMAIN:
3503 		*result = cfg->domain;
3504 		break;
3505 	case PCI_IVAR_BUS:
3506 		*result = cfg->bus;
3507 		break;
3508 	case PCI_IVAR_SLOT:
3509 		*result = cfg->slot;
3510 		break;
3511 	case PCI_IVAR_FUNCTION:
3512 		*result = cfg->func;
3513 		break;
3514 	case PCI_IVAR_CMDREG:
3515 		*result = cfg->cmdreg;
3516 		break;
3517 	case PCI_IVAR_CACHELNSZ:
3518 		*result = cfg->cachelnsz;
3519 		break;
3520 	case PCI_IVAR_MINGNT:
3521 		*result = cfg->mingnt;
3522 		break;
3523 	case PCI_IVAR_MAXLAT:
3524 		*result = cfg->maxlat;
3525 		break;
3526 	case PCI_IVAR_LATTIMER:
3527 		*result = cfg->lattimer;
3528 		break;
3529 	case PCI_IVAR_PCIXCAP_PTR:
3530 		*result = cfg->pcix.pcix_ptr;
3531 		break;
3532 	case PCI_IVAR_PCIECAP_PTR:
3533 		*result = cfg->expr.expr_ptr;
3534 		break;
3535 	case PCI_IVAR_VPDCAP_PTR:
3536 		*result = cfg->vpd.vpd_reg;
3537 		break;
3538 	default:
3539 		return (ENOENT);
3540 	}
3541 	return (0);
3542 }
3543 
3544 int
3545 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3546 {
3547 	struct pci_devinfo *dinfo;
3548 
3549 	dinfo = device_get_ivars(child);
3550 
3551 	switch (which) {
3552 	case PCI_IVAR_INTPIN:
3553 		dinfo->cfg.intpin = value;
3554 		return (0);
3555 	case PCI_IVAR_ETHADDR:
3556 	case PCI_IVAR_SUBVENDOR:
3557 	case PCI_IVAR_SUBDEVICE:
3558 	case PCI_IVAR_VENDOR:
3559 	case PCI_IVAR_DEVICE:
3560 	case PCI_IVAR_DEVID:
3561 	case PCI_IVAR_CLASS:
3562 	case PCI_IVAR_SUBCLASS:
3563 	case PCI_IVAR_PROGIF:
3564 	case PCI_IVAR_REVID:
3565 	case PCI_IVAR_IRQ:
3566 	case PCI_IVAR_DOMAIN:
3567 	case PCI_IVAR_BUS:
3568 	case PCI_IVAR_SLOT:
3569 	case PCI_IVAR_FUNCTION:
3570 		return (EINVAL);	/* disallow for now */
3571 
3572 	default:
3573 		return (ENOENT);
3574 	}
3575 }
3576 #ifdef notyet
3577 #include "opt_ddb.h"
3578 #ifdef DDB
3579 #include <ddb/ddb.h>
3580 #include <sys/cons.h>
3581 
3582 /*
3583  * List resources based on pci map registers, used for within ddb
3584  */
3585 
3586 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3587 {
3588 	struct pci_devinfo *dinfo;
3589 	struct devlist *devlist_head;
3590 	struct pci_conf *p;
3591 	const char *name;
3592 	int i, error, none_count;
3593 
3594 	none_count = 0;
3595 	/* get the head of the device queue */
3596 	devlist_head = &pci_devq;
3597 
3598 	/*
3599 	 * Go through the list of devices and print out devices
3600 	 */
3601 	for (error = 0, i = 0,
3602 	     dinfo = STAILQ_FIRST(devlist_head);
3603 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3604 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3605 
3606 		/* Populate pd_name and pd_unit */
3607 		name = NULL;
3608 		if (dinfo->cfg.dev)
3609 			name = device_get_name(dinfo->cfg.dev);
3610 
3611 		p = &dinfo->conf;
3612 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3613 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3614 			(name && *name) ? name : "none",
3615 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3616 			none_count++,
3617 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3618 			p->pc_sel.pc_func, (p->pc_class << 16) |
3619 			(p->pc_subclass << 8) | p->pc_progif,
3620 			(p->pc_subdevice << 16) | p->pc_subvendor,
3621 			(p->pc_device << 16) | p->pc_vendor,
3622 			p->pc_revid, p->pc_hdr);
3623 	}
3624 }
3625 #endif /* DDB */
3626 #endif
3627 
3628 static struct resource *
3629 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3630     u_long start, u_long end, u_long count, u_int flags)
3631 {
3632 	struct pci_devinfo *dinfo = device_get_ivars(child);
3633 	struct resource_list *rl = &dinfo->resources;
3634 	struct resource_list_entry *rle;
3635 	struct resource *res;
3636 	pci_addr_t map, testval;
3637 	int mapsize;
3638 
3639 	/*
3640 	 * Weed out the bogons, and figure out how large the BAR/map
3641 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3642 	 * Note: atapci in legacy mode are special and handled elsewhere
3643 	 * in the code.  If you have a atapci device in legacy mode and
3644 	 * it fails here, that other code is broken.
3645 	 */
3646 	res = NULL;
3647 	map = pci_read_config(child, *rid, 4);
3648 	pci_write_config(child, *rid, 0xffffffff, 4);
3649 	testval = pci_read_config(child, *rid, 4);
3650 	if (pci_maprange(testval) == 64)
3651 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3652 	if (pci_mapbase(testval) == 0)
3653 		goto out;
3654 
3655 	/*
3656 	 * Restore the original value of the BAR.  We may have reprogrammed
3657 	 * the BAR of the low-level console device and when booting verbose,
3658 	 * we need the console device addressable.
3659 	 */
3660 	pci_write_config(child, *rid, map, 4);
3661 
3662 	if (PCI_BAR_MEM(testval)) {
3663 		if (type != SYS_RES_MEMORY) {
3664 			if (bootverbose)
3665 				device_printf(dev,
3666 				    "child %s requested type %d for rid %#x,"
3667 				    " but the BAR says it is an memio\n",
3668 				    device_get_nameunit(child), type, *rid);
3669 			goto out;
3670 		}
3671 	} else {
3672 		if (type != SYS_RES_IOPORT) {
3673 			if (bootverbose)
3674 				device_printf(dev,
3675 				    "child %s requested type %d for rid %#x,"
3676 				    " but the BAR says it is an ioport\n",
3677 				    device_get_nameunit(child), type, *rid);
3678 			goto out;
3679 		}
3680 	}
3681 	/*
3682 	 * For real BARs, we need to override the size that
3683 	 * the driver requests, because that's what the BAR
3684 	 * actually uses and we would otherwise have a
3685 	 * situation where we might allocate the excess to
3686 	 * another driver, which won't work.
3687 	 */
3688 	mapsize = pci_mapsize(testval);
3689 	count = 1UL << mapsize;
3690 	if (RF_ALIGNMENT(flags) < mapsize)
3691 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3692 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3693 		flags |= RF_PREFETCHABLE;
3694 
3695 	/*
3696 	 * Allocate enough resource, and then write back the
3697 	 * appropriate bar for that resource.
3698 	 */
3699 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3700 	    start, end, count, flags, -1);
3701 	if (res == NULL) {
3702 		device_printf(child,
3703 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3704 		    count, *rid, type, start, end);
3705 		goto out;
3706 	}
3707 	resource_list_add(rl, type, *rid, start, end, count, -1);
3708 	rle = resource_list_find(rl, type, *rid);
3709 	if (rle == NULL)
3710 		panic("pci_alloc_map: unexpectedly can't find resource.");
3711 	rle->res = res;
3712 	rle->start = rman_get_start(res);
3713 	rle->end = rman_get_end(res);
3714 	rle->count = count;
3715 	if (bootverbose)
3716 		device_printf(child,
3717 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3718 		    count, *rid, type, rman_get_start(res));
3719 	map = rman_get_start(res);
3720 out:;
3721 	pci_write_config(child, *rid, map, 4);
3722 	if (pci_maprange(testval) == 64)
3723 		pci_write_config(child, *rid + 4, map >> 32, 4);
3724 	return (res);
3725 }
3726 
3727 
3728 struct resource *
3729 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3730     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3731 {
3732 	struct pci_devinfo *dinfo = device_get_ivars(child);
3733 	struct resource_list *rl = &dinfo->resources;
3734 	struct resource_list_entry *rle;
3735 	pcicfgregs *cfg = &dinfo->cfg;
3736 
3737 	/*
3738 	 * Perform lazy resource allocation
3739 	 */
3740 	if (device_get_parent(child) == dev) {
3741 		switch (type) {
3742 		case SYS_RES_IRQ:
3743 			/*
3744 			 * Can't alloc legacy interrupt once MSI messages
3745 			 * have been allocated.
3746 			 */
3747 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3748 			    cfg->msix.msix_alloc > 0))
3749 				return (NULL);
3750 			/*
3751 			 * If the child device doesn't have an
3752 			 * interrupt routed and is deserving of an
3753 			 * interrupt, try to assign it one.
3754 			 */
3755 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3756 			    (cfg->intpin != 0))
3757 				pci_assign_interrupt(dev, child, 0);
3758 			break;
3759 		case SYS_RES_IOPORT:
3760 		case SYS_RES_MEMORY:
3761 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3762 				/*
3763 				 * Enable the I/O mode.  We should
3764 				 * also be assigning resources too
3765 				 * when none are present.  The
3766 				 * resource_list_alloc kind of sorta does
3767 				 * this...
3768 				 */
3769 				if (PCI_ENABLE_IO(dev, child, type))
3770 					return (NULL);
3771 			}
3772 			rle = resource_list_find(rl, type, *rid);
3773 			if (rle == NULL)
3774 				return (pci_alloc_map(dev, child, type, rid,
3775 				    start, end, count, flags));
3776 			break;
3777 		}
3778 		/*
3779 		 * If we've already allocated the resource, then
3780 		 * return it now.  But first we may need to activate
3781 		 * it, since we don't allocate the resource as active
3782 		 * above.  Normally this would be done down in the
3783 		 * nexus, but since we short-circuit that path we have
3784 		 * to do its job here.  Not sure if we should kfree the
3785 		 * resource if it fails to activate.
3786 		 */
3787 		rle = resource_list_find(rl, type, *rid);
3788 		if (rle != NULL && rle->res != NULL) {
3789 			if (bootverbose)
3790 				device_printf(child,
3791 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3792 				    rman_get_size(rle->res), *rid, type,
3793 				    rman_get_start(rle->res));
3794 			if ((flags & RF_ACTIVE) &&
3795 			    bus_generic_activate_resource(dev, child, type,
3796 			    *rid, rle->res) != 0)
3797 				return (NULL);
3798 			return (rle->res);
3799 		}
3800 	}
3801 	return (resource_list_alloc(rl, dev, child, type, rid,
3802 	    start, end, count, flags, cpuid));
3803 }
3804 
3805 void
3806 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3807 {
3808 	struct pci_devinfo *dinfo;
3809 	struct resource_list *rl;
3810 	struct resource_list_entry *rle;
3811 
3812 	if (device_get_parent(child) != dev)
3813 		return;
3814 
3815 	dinfo = device_get_ivars(child);
3816 	rl = &dinfo->resources;
3817 	rle = resource_list_find(rl, type, rid);
3818 	if (rle) {
3819 		if (rle->res) {
3820 			if (rman_get_device(rle->res) != dev ||
3821 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3822 				device_printf(dev, "delete_resource: "
3823 				    "Resource still owned by child, oops. "
3824 				    "(type=%d, rid=%d, addr=%lx)\n",
3825 				    rle->type, rle->rid,
3826 				    rman_get_start(rle->res));
3827 				return;
3828 			}
3829 			bus_release_resource(dev, type, rid, rle->res);
3830 		}
3831 		resource_list_delete(rl, type, rid);
3832 	}
3833 	/*
3834 	 * Why do we turn off the PCI configuration BAR when we delete a
3835 	 * resource? -- imp
3836 	 */
3837 	pci_write_config(child, rid, 0, 4);
3838 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3839 }
3840 
3841 struct resource_list *
3842 pci_get_resource_list (device_t dev, device_t child)
3843 {
3844 	struct pci_devinfo *dinfo = device_get_ivars(child);
3845 
3846 	if (dinfo == NULL)
3847 		return (NULL);
3848 
3849 	return (&dinfo->resources);
3850 }
3851 
3852 uint32_t
3853 pci_read_config_method(device_t dev, device_t child, int reg, int width)
3854 {
3855 	struct pci_devinfo *dinfo = device_get_ivars(child);
3856 	pcicfgregs *cfg = &dinfo->cfg;
3857 
3858 	return (PCIB_READ_CONFIG(device_get_parent(dev),
3859 	    cfg->bus, cfg->slot, cfg->func, reg, width));
3860 }
3861 
3862 void
3863 pci_write_config_method(device_t dev, device_t child, int reg,
3864     uint32_t val, int width)
3865 {
3866 	struct pci_devinfo *dinfo = device_get_ivars(child);
3867 	pcicfgregs *cfg = &dinfo->cfg;
3868 
3869 	PCIB_WRITE_CONFIG(device_get_parent(dev),
3870 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
3871 }
3872 
3873 int
3874 pci_child_location_str_method(device_t dev, device_t child, char *buf,
3875     size_t buflen)
3876 {
3877 
3878 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
3879 	    pci_get_function(child));
3880 	return (0);
3881 }
3882 
3883 int
3884 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
3885     size_t buflen)
3886 {
3887 	struct pci_devinfo *dinfo;
3888 	pcicfgregs *cfg;
3889 
3890 	dinfo = device_get_ivars(child);
3891 	cfg = &dinfo->cfg;
3892 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
3893 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
3894 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
3895 	    cfg->progif);
3896 	return (0);
3897 }
3898 
3899 int
3900 pci_assign_interrupt_method(device_t dev, device_t child)
3901 {
3902 	struct pci_devinfo *dinfo = device_get_ivars(child);
3903 	pcicfgregs *cfg = &dinfo->cfg;
3904 
3905 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
3906 	    cfg->intpin));
3907 }
3908 
3909 static int
3910 pci_modevent(module_t mod, int what, void *arg)
3911 {
3912 	static struct cdev *pci_cdev;
3913 
3914 	switch (what) {
3915 	case MOD_LOAD:
3916 		STAILQ_INIT(&pci_devq);
3917 		pci_generation = 0;
3918 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
3919 				    "pci");
3920 		pci_load_vendor_data();
3921 		break;
3922 
3923 	case MOD_UNLOAD:
3924 		destroy_dev(pci_cdev);
3925 		break;
3926 	}
3927 
3928 	return (0);
3929 }
3930 
3931 void
3932 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
3933 {
3934 	int i;
3935 
3936 	/*
3937 	 * Only do header type 0 devices.  Type 1 devices are bridges,
3938 	 * which we know need special treatment.  Type 2 devices are
3939 	 * cardbus bridges which also require special treatment.
3940 	 * Other types are unknown, and we err on the side of safety
3941 	 * by ignoring them.
3942 	 */
3943 	if (dinfo->cfg.hdrtype != 0)
3944 		return;
3945 
3946 	/*
3947 	 * Restore the device to full power mode.  We must do this
3948 	 * before we restore the registers because moving from D3 to
3949 	 * D0 will cause the chip's BARs and some other registers to
3950 	 * be reset to some unknown power on reset values.  Cut down
3951 	 * the noise on boot by doing nothing if we are already in
3952 	 * state D0.
3953 	 */
3954 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
3955 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
3956 	}
3957 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3958 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
3959 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
3960 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
3961 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
3962 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
3963 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
3964 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
3965 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
3966 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
3967 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
3968 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
3969 
3970 	/* Restore MSI and MSI-X configurations if they are present. */
3971 	if (dinfo->cfg.msi.msi_location != 0)
3972 		pci_resume_msi(dev);
3973 	if (dinfo->cfg.msix.msix_location != 0)
3974 		pci_resume_msix(dev);
3975 }
3976 
3977 void
3978 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
3979 {
3980 	int i;
3981 	uint32_t cls;
3982 	int ps;
3983 
3984 	/*
3985 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
3986 	 * we know need special treatment.  Type 2 devices are cardbus bridges
3987 	 * which also require special treatment.  Other types are unknown, and
3988 	 * we err on the side of safety by ignoring them.  Powering down
3989 	 * bridges should not be undertaken lightly.
3990 	 */
3991 	if (dinfo->cfg.hdrtype != 0)
3992 		return;
3993 	for (i = 0; i < dinfo->cfg.nummaps; i++)
3994 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
3995 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
3996 
3997 	/*
3998 	 * Some drivers apparently write to these registers w/o updating our
3999 	 * cached copy.  No harm happens if we update the copy, so do so here
4000 	 * so we can restore them.  The COMMAND register is modified by the
4001 	 * bus w/o updating the cache.  This should represent the normally
4002 	 * writable portion of the 'defined' part of type 0 headers.  In
4003 	 * theory we also need to save/restore the PCI capability structures
4004 	 * we know about, but apart from power we don't know any that are
4005 	 * writable.
4006 	 */
4007 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4008 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4009 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4010 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4011 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4012 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4013 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4014 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4015 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4016 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4017 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4018 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4019 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4020 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4021 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4022 
4023 	/*
4024 	 * don't set the state for display devices, base peripherals and
4025 	 * memory devices since bad things happen when they are powered down.
4026 	 * We should (a) have drivers that can easily detach and (b) use
4027 	 * generic drivers for these devices so that some device actually
4028 	 * attaches.  We need to make sure that when we implement (a) we don't
4029 	 * power the device down on a reattach.
4030 	 */
4031 	cls = pci_get_class(dev);
4032 	if (!setstate)
4033 		return;
4034 	switch (pci_do_power_nodriver)
4035 	{
4036 		case 0:		/* NO powerdown at all */
4037 			return;
4038 		case 1:		/* Conservative about what to power down */
4039 			if (cls == PCIC_STORAGE)
4040 				return;
4041 			/*FALLTHROUGH*/
4042 		case 2:		/* Agressive about what to power down */
4043 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4044 			    cls == PCIC_BASEPERIPH)
4045 				return;
4046 			/*FALLTHROUGH*/
4047 		case 3:		/* Power down everything */
4048 			break;
4049 	}
4050 	/*
4051 	 * PCI spec says we can only go into D3 state from D0 state.
4052 	 * Transition from D[12] into D0 before going to D3 state.
4053 	 */
4054 	ps = pci_get_powerstate(dev);
4055 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4056 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4057 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4058 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4059 }
4060 
4061 #ifdef COMPAT_OLDPCI
4062 
4063 /*
4064  * Locate the parent of a PCI device by scanning the PCI devlist
4065  * and return the entry for the parent.
4066  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4067  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4068  */
4069 pcicfgregs *
4070 pci_devlist_get_parent(pcicfgregs *cfg)
4071 {
4072 	struct devlist *devlist_head;
4073 	struct pci_devinfo *dinfo;
4074 	pcicfgregs *bridge_cfg;
4075 	int i;
4076 
4077 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4078 
4079 	/* If the device is on PCI bus 0, look for the host */
4080 	if (cfg->bus == 0) {
4081 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4082 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4083 			bridge_cfg = &dinfo->cfg;
4084 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4085 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4086 		    		&& bridge_cfg->bus == cfg->bus) {
4087 				return bridge_cfg;
4088 			}
4089 		}
4090 	}
4091 
4092 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4093 	if (cfg->bus > 0) {
4094 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4095 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4096 			bridge_cfg = &dinfo->cfg;
4097 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4098 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4099 				&& bridge_cfg->secondarybus == cfg->bus) {
4100 				return bridge_cfg;
4101 			}
4102 		}
4103 	}
4104 
4105 	return NULL;
4106 }
4107 
4108 #endif	/* COMPAT_OLDPCI */
4109 
4110 int
4111 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4112 {
4113 	int rid, type;
4114 	u_int flags;
4115 
4116 	rid = 0;
4117 	type = PCI_INTR_TYPE_LEGACY;
4118 	flags = RF_SHAREABLE | RF_ACTIVE;
4119 
4120 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4121 	if (msi_enable) {
4122 		int cpu;
4123 
4124 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4125 		if (cpu >= ncpus)
4126 			cpu = ncpus - 1;
4127 
4128 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4129 			flags &= ~RF_SHAREABLE;
4130 			type = PCI_INTR_TYPE_MSI;
4131 		}
4132 	}
4133 
4134 	*rid0 = rid;
4135 	*flags0 = flags;
4136 
4137 	return type;
4138 }
4139