xref: /illumos-gate/usr/src/uts/common/io/pciex/pcie.c (revision b8052df9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2019 Joyent, Inc.
25  * Copyright 2022 Oxide Computer Company
26  */
27 
28 /*
29  * PCIe Initialization
30  * -------------------
31  *
32  * The PCIe subsystem is split about and initializes itself in a couple of
33  * different places. This is due to the platform-specific nature of initializing
34  * resources and the nature of the SPARC PROM and how that influenced the
35  * subsystem. Note that traditional PCI (mostly seen these days in Virtual
36  * Machines) follows most of the same basic path outlined here, but skips a
37  * large chunk of PCIe-specific initialization.
38  *
39  * First, there is an initial device discovery phase that is taken care of by
40  * the platform. This is where we discover the set of devices that are present
41  * at system power on. These devices may or may not be hot-pluggable. In
42  * particular, this happens in a platform-specific way right now. In general, we
43  * expect most discovery to be driven by scanning each bus, device, and
44  * function, and seeing what actually exists and responds to configuration space
45  * reads. This is driven via pci_boot.c on x86. This may be seeded by something
46  * like device tree, a PROM, supplemented with ACPI, or by knowledge that the
47  * underlying platform has.
48  *
49  * As a part of this discovery process, the full set of resources that exist in
50  * the system for PCIe are:
51  *
52  *   o PCI buses
53  *   o Prefetchable Memory
54  *   o Non-prefetchable memory
55  *   o I/O ports
56  *
57  * This process is driven by a platform's PCI platform Resource Discovery (PRD)
58  * module. The PRD definitions can be found in <sys/plat/pci_prd.h> and are used
59  * to discover these resources, which will be converted into the initial set of
60  * the standard properties in the system: 'regs', 'available', 'ranges', etc.
61  * Currently it is up to platform-specific code (which should ideally be
62  * consolidated at some point) to set up all these properties.
63  *
64  * As a part of the discovery process, the platform code will create a device
65  * node (dev_info_t) for each discovered function and will create a PCIe nexus
66  * for each overall root complex that exists in the system. Most root complexes
67  * will have multiple root ports, each of which is the foundation of an
68  * independent PCIe bus due to the point-to-point nature of PCIe. When a root
69  * complex is found, a nexus driver such as npe (Nexus for PCIe Express) is
70  * attached. In the case of a non-PCIe-capable system this is where the older
71  * pci nexus driver would be used instead.
72  *
73  * To track data about a given device on a bus, a 'pcie_bus_t' structure is
74  * created for and assigned to every PCIe-based dev_info_t. This can be used to
75  * find the root port and get basic information about the device, its faults,
76  * and related information. This contains pointers to the corresponding root
77  * port as well.
78  *
79  * A root complex has its pcie_bus_t initialized as part of the device discovery
80  * process. That is, because we're trying to bootstrap the actual tree and most
81  * platforms don't have a representation for this that's explicitly
82  * discoverable, this is created manually. See callers of pcie_rc_init_bus().
83  *
84  * For other devices, bridges, and switches, the process is split into two.
85  * There is an initial pcie_bus_t that is created which will exist before we go
86  * through the actual driver attachment process. For example, on x86 this is
87  * done as part of the device and function discovery. The second pass of
88  * initialization is done only after the nexus driver actually is attached and
89  * it goes through and finishes processing all of its children.
90  *
91  * Child Initialization
92  * --------------------
93  *
94  * Generally speaking, the platform will first enumerate all PCIe devices that
95  * are in the sytem before it actually creates a device tree. This is part of
96  * the bus/device/function scanning that is performed and from that dev_info_t
97  * nodes are created for each discovered device and are inserted into the
98  * broader device tree. Later in boot, the actual device tree is walked and the
99  * nodes go through the standard dev_info_t initialization process (DS_PROTO,
100  * DS_LINKED, DS_BOUND, etc.).
101  *
102  * PCIe-specific initialization can roughly be broken into the following pieces:
103  *
104  *   1. Platform initial discovery and resource assignment
105  *   2. The pcie_bus_t initialization
106  *   3. Nexus driver child initialization
107  *   4. Fabric initialization
108  *   5. Device driver-specific initialization
109  *
110  * The first part of this (1) and (2) are discussed in the previous section.
111  * Part (1) in particular is a combination of the PRD (platform resource
112  * discovery) and general device initialization. After this, because we have a
113  * device tree, most of the standard nexus initialization happens.
114  *
115  * (5) is somewhat simple, so let's get into it before we discuss (3) and (4).
116  * This is the last thing that is called and that happens after all of the
117  * others are done. This is the logic that occurs in a driver's attach(9E) entry
118  * point. This is always device-specific and generally speaking should not be
119  * manipulating standard PCIe registers directly on their own. For example, the
120  * MSI/MSI-X, AER, Serial Number, etc. capabilities will be automatically dealt
121  * with by the framework in (3) and (4) below. In many cases, particularly
122  * things that are part of (4), adjusting them in the individual driver is not
123  * safe.
124  *
125  * Finally, let's talk about (3) and (4) as these are related. The NDI provides
126  * for a standard hook for a nexus to initialize its children. In our platforms,
127  * there are basically two possible PCIe nexus drivers: there is the generic
128  * pcieb -- PCIe bridge -- driver which is used for standard root ports,
129  * switches, etc. Then there is the platform-specific primary nexus driver,
130  * which is being slowly consolidated into a single one where it makes sense. An
131  * example of this is npe.
132  *
133  * Each of these has a child initialization function which is called from their
134  * DDI_CTLOPS_INITCHILD operation on the bus_ctl function pointer. This goes
135  * through and initializes a large number of different pieces of PCIe-based
136  * settings through the common pcie_initchild() function. This takes care of
137  * things like:
138  *
139  *   o Advanced Error Reporting
140  *   o Alternative Routing
141  *   o Capturing information around link speed, width, serial numbers, etc.
142  *   o Setting common properties around aborts
143  *
144  * There are a few caveats with this that need to be kept in mind:
145  *
146  *   o A dev_info_t indicates a specific function. This means that a
147  *     multi-function device will not all be initialized at the same time and
148  *     there is no guarantee that all children will be initialized before one of
149  *     them is attached.
150  *   o A child is only initialized if we have found a driver that matches an
151  *     alias in the dev_info_t's compatible array property.  While a lot of
152  *     multi-function devices are often multiple instances of the same thing
153  *     (e.g. a multi-port NIC with a function / NIC), this is not always the
154  *     case and one cannot make any assumptions here.
155  *
156  * This in turn leads to the next form of initialization that takes place in the
157  * case of (4). This is where we take care of things that need to be consistent
158  * across either entire devices or more generally across an entire root port and
159  * all of its children. There are a few different examples of this:
160  *
161  *   o Setting the maximum packet size
162  *   o Determining the tag width
163  *
164  * Note that features which are only based on function 0, such as ASPM (Active
165  * State Power Management), hardware autonomous width disable, etc. ultimately
166  * do not go through this path today. There are some implications here in that
167  * today several of these things are captured on functions which may not have
168  * any control here. This is an area of needed improvement.
169  *
170  * The settings in (4) are initialized in a common way, via
171  * pcie_fabric_setup(). This is called into from two different parts of
172  * the stack:
173  *
174  *   1. When we attach a root port, which is driven by pcieb.
175  *   2. When we have a hotplug event that adds a device.
176  *
177  * In general here we are going to use the term 'fabric' to refer to everything
178  * that is downstream of a root port. This corresponds to what the PCIe
179  * specification calls a 'hierarchy domain'. Strictly speaking, this is fine
180  * until peer-to-peer requests begin to happen that cause you to need to forward
181  * things across root ports. At that point the scope of the fabric increases and
182  * these settings become more complicated. We currently optimize for the much
183  * more common case, which is that each root port is effectively independent
184  * from a PCIe transaction routing perspective.
185  *
186  * Put differently, we use the term 'fabric' to refer to a set of PCIe devices
187  * that can route transactions to one another, which is generally constrained to
188  * everything under a root port and that root ports are independent. If this
189  * constraint changes, then all one needs to do is replace the discussion of the
190  * root port below with the broader root complex and system.
191  *
192  * A challenge with these settings is that once they're set and devices are
193  * actively making requests, we cannot really change them without resetting the
194  * links and cancelling all outstanding transactions via device resets. Because
195  * this is not something that we want to do, we instead look at how and when we
196  * set this to constrain what's going on.
197  *
198  * Because of this we basically say that if a given fabric has more than one
199  * hot-plug capable device that's encountered, then we have to use safe defaults
200  * (which we can allow an operator to tune eventually via pcieadm). If we have a
201  * mix of non-hotpluggable slots with downstream endpoints present and
202  * hot-pluggable slots, then we're in this case. If we don't have hot-pluggable
203  * slots, then we can have an arbitrarily complex setup. Let's look at a few of
204  * these visually:
205  *
206  * In the following diagrams, RP stands for Root Port, EP stands for Endpoint.
207  * If something is hot-pluggable, then we label it with (HP).
208  *
209  *   (1) RP --> EP
210  *   (2) RP --> Switch --> EP
211  *                    +--> EP
212  *                    +--> EP
213  *
214  *   (3) RP --> Switch --> EP
215  *                    +--> EP
216  *                    +--> Switch --> EP
217  *                               +--> EP
218  *                    +--> EP
219  *
220  *
221  *   (4) RP (HP) --> EP
222  *   (5) RP (HP) --> Switch --> EP
223  *                         +--> EP
224  *                         +--> EP
225  *
226  *   (6) RP --> Switch (HP) --> EP
227  *   (7) RP (HP) --> Switch (HP) --> EP
228  *
229  * If we look at all of these, these are all cases where it's safe for us to set
230  * things based on all devices. (1), (2), and (3) are straightforward because
231  * they have no hot-pluggable elements. This means that nothing should come/go
232  * on the system and we can set up fabric-wide properties as part of the root
233  * port.
234  *
235  * Case (4) is the most standard one that we encounter for hot-plug. Here you
236  * have a root port directly connected to an endpoint. The most common example
237  * would be an NVMe device plugged into a root port. Case (5) is interesting to
238  * highlight. While there is a switch and multiple endpoints there, they are
239  * showing up as a unit. This ends up being a weirder variant of (4), but it is
240  * safe for us to set advanced properties because we can figure out what the
241  * total set should be.
242  *
243  * Now, the more interesting bits here are (6) and (7). The reason that (6)
244  * works is that ultimately there is only a single down-stream port here that is
245  * hot-pluggable and all non-hotpluggable ports do not have a device present,
246  * which suggests that they will never have a device present. (7) also could be
247  * made to work by making the observation that if there's truly only one
248  * endpoint in a fabric, it doesn't matter how many switches there are that are
249  * hot-pluggable. This would only hold if we can assume for some reason that no
250  * other endpoints could be added.
251  *
252  * In turn, let's look at several cases that we believe aren't safe:
253  *
254  *   (8) RP --> Switch --> EP
255  *                    +--> EP
256  *               (HP) +--> EP
257  *
258  *   (9) RP --> Switch (HP) +--> EP
259  *                     (HP) +--> EP
260  *
261  *   (10) RP (HP) --> Switch (HP) +--> EP
262  *                           (HP) +--> EP
263  *
264  * All of these are situations where it's much more explicitly unsafe. Let's
265  * take (8). The problem here is that the devices on the non-hotpluggable
266  * downstream switches are always there and we should assume all device drivers
267  * will be active and performing I/O when the hot-pluggable slot changes. If the
268  * hot-pluggable slot has a lower max payload size, then we're mostly out of
269  * luck. The case of (9) is very similar to (8), just that we have more hot-plug
270  * capable slots.
271  *
272  * Finally (10) is a case of multiple instances of hotplug. (9) and (10) are the
273  * more general case of (6) and (7). While we can try to detect (6) and (7) more
274  * generally or try to make it safe, we're going to start with a simpler form of
275  * detection for this, which roughly follows the following rules:
276  *
277  *   o If there are no hot-pluggable slots in an entire fabric, then we can set
278  *     all fabric properties based on device capabilities.
279  *   o If we encounter a hot-pluggable slot, we can only set fabric properties
280  *     based on device capabilities if:
281  *
282  *       1. The hotpluggable slot is a root port.
283  *       2. There are no other hotpluggable devices downstream of it.
284  *
285  * Otherwise, if neither of the above is true, then we must use the basic PCIe
286  * defaults for various fabric-wide properties (discussed below). Even in these
287  * more complicated cases, device-specific properties such as the configuration
288  * of AERs, ASPM, etc. are still handled in the general pcie_init_bus() and
289  * related discussed earlier here.
290  *
291  * Because the only fabrics that we'll change are those that correspond to root
292  * ports, we will only call into the actual fabric feature setup when one of
293  * those changes. This has the side effect of simplifying locking. When we make
294  * changes here we need to be able to hold the entire device tree under the root
295  * port (including the root port and its parent). This is much harder to do
296  * safely when starting in the middle of the tree.
297  *
298  * Handling of Specific Properties
299  * -------------------------------
300  *
301  * This section goes into the rationale behind how we initialize and program
302  * various parts of the PCIe stack.
303  *
304  * 5-, 8-, 10- AND 14-BIT TAGS
305  *
306  * Tags are part of PCIe transactions and when combined with a device identifier
307  * are used to uniquely identify a transaction. In PCIe parlance, a Requester
308  * (someone who initiates a PCIe request) sets a unique tag in the request and
309  * the Completer (someone who processes and responds to a PCIe request) echoes
310  * the tag back. This means that a requester generally is responsible for
311  * ensuring that they don't reuse a tag between transactions.
312  *
313  * Thus the number of tags that a device has relates to the number of
314  * outstanding transactions that it can have, which are usually tied to the
315  * number of outstanding DMA transfers. The size of these transactions is also
316  * then scoped by the handling of the Maximum Packet Payload.
317  *
318  * In PCIe 1.0, devices default to a 5-bit tag. There was also an option to
319  * support an 8-bit tag. The 8-bit extended tag did not distinguish between a
320  * Requester or Completer. There was a bit to indicate device support of 8-bit
321  * tags in the Device Capabilities Register of the PCIe Capability and a
322  * separate bit to enable it in the Device Control Register of the PCIe
323  * Capability.
324  *
325  * In PCIe 4.0, support for a 10-bit tag was added. The specification broke
326  * apart the support bit into multiple pieces. In particular, in the Device
327  * Capabilities 2 register of the PCIe Capability there is a separate bit to
328  * indicate whether the device supports 10-bit completions and 10-bit requests.
329  * All PCIe 4.0 compliant devices are required to support 10-bit tags if they
330  * operate at 16.0 GT/s speed (a PCIe Gen 4 compliant device does not have to
331  * operate at Gen 4 speeds).
332  *
333  * This allows a device to support 10-bit completions but not 10-bit requests.
334  * A device that supports 10-bit requests is required to support 10-bit
335  * completions. There is no ability to enable or disable 10-bit completion
336  * support in the Device Capabilities 2 register. There is only a bit to enable
337  * 10-bit requests. This distinction makes our life easier as this means that as
338  * long as the entire fabric supports 10-bit completions, it doesn't matter if
339  * not all devices support 10-bit requests and we can enable them as required.
340  * More on this in a bit.
341  *
342  * In PCIe 6.0, another set of bits was added for 14-bit tags. These follow the
343  * same pattern as the 10-bit tags. The biggest difference is that the
344  * capabilities and control for these are found in the Device Capabilities 3
345  * and Device Control 3 register of the Device 3 Extended Capability. Similar to
346  * what we see with 10-bit tags, requesters are required to support the
347  * completer capability. The only control bit is for whether or not they enable
348  * a 14-bit requester.
349  *
350  * PCIe switches which sit between root ports and endpoints and show up to
351  * software as a set of bridges. Bridges generally don't have to know about tags
352  * as they are usually neither requesters or completers (unless directly talking
353  * to the bridge instance). That is they are generally required to forward
354  * packets without modifying them. This works until we deal with switch error
355  * handling. At that point, the switch may try to interpret the transaction and
356  * if it doesn't understand the tagging scheme in use, return the transaction to
357  * with the wrong tag and also an incorrectly diagnosed error (usually a
358  * malformed TLP).
359  *
360  * With all this, we construct a somewhat simple policy of how and when we
361  * enable extended tags:
362  *
363  *    o If we have a complex hotplug-capable fabric (based on the discussion
364  *      earlier in fabric-specific settings), then we cannot enable any of the
365  *      8-bit, 10-bit, and 14-bit tagging features. This is due to the issues
366  *      with intermediate PCIe switches and related.
367  *
368  *    o If every device supports 8-bit capable tags, then we will go through and
369  *      enable those everywhere.
370  *
371  *    o If every device supports 10-bit capable completions, then we will enable
372  *      10-bit requester on every device that supports it.
373  *
374  *    o If every device supports 14-bit capable completions, then we will enable
375  *      14-bit requesters on every device that supports it.
376  *
377  * This is the simpler end of the policy and one that is relatively easy to
378  * implement. While we could attempt to relax the constraint that every device
379  * in the fabric implement these features by making assumptions about peer-to-
380  * peer requests (that is devices at the same layer in the tree won't talk to
381  * one another), that is a lot of complexity. For now, we leave such an
382  * implementation to those who need it in the future.
383  *
384  * MAX PAYLOAD SIZE
385  *
386  * When performing transactions on the PCIe bus, a given transaction has a
387  * maximum allowed size. This size is called the MPS or 'Maximum Payload Size'.
388  * A given device reports its maximum supported size in the Device Capabilities
389  * register of the PCIe Capability. It is then set in the Device Control
390  * register.
391  *
392  * One of the challenges with this value is that different functions of a device
393  * have independent values, but strictly speaking are required to actually have
394  * the same value programmed in all of them lest device behavior goes awry. When
395  * a device has the ARI (alternative routing ID) capability enabled, then only
396  * function 0 controls the actual payload size.
397  *
398  * The settings for this need to be consistent throughout the fabric. A
399  * Transmitter is not allowed to create a TLP that exceeds its maximum packet
400  * size and a Receiver is not allowed to receive a packet that exceeds its
401  * maximum packet size. In all of these cases, this would result in something
402  * like a malformed TLP error.
403  *
404  * Effectively, this means that everything on a given fabric must have the same
405  * value programmed in its Device Control register for this value. While in the
406  * case of tags, switches generally weren't completers or requesters, here every
407  * device along the path is subject to this. This makes the actual value that we
408  * set throughout the fabric even more important and the constraints of hotplug
409  * even worse to deal with.
410  *
411  * Because a hotplug device can be inserted with any packet size, if we hit
412  * anything other than the simple hotplug cases discussed in the fabric-specific
413  * settings section, then we must use the smallest size of 128 byte payloads.
414  * This is because a device could be plugged in that supports something smaller
415  * than we had otherwise set. If there are other active devices, those could not
416  * be changed without quiescing the entire fabric. As such our algorithm is as
417  * follows:
418  *
419  *     1. Scan the entire fabric, keeping track of the smallest seen MPS in the
420  *        Device Capabilities Register.
421  *     2. If we have a complex fabric, program each Device Control register with
422  *        a 128 byte maximum payload size, otherwise, program it with the
423  *        discovered value.
424  *
425  *
426  * MAX READ REQUEST SIZE
427  *
428  * The maximum read request size (mrrs) is a much more confusing thing when
429  * compared to the maximum payload size counterpart. The maximum payload size
430  * (MPS) above is what restricts the actual size of a TLP. The mrrs value
431  * is used to control part of the behavior of Memory Read Request, which is not
432  * strictly speaking subject to the MPS. A PCIe device is allowed to respond to
433  * a Memory Read Request with less bytes than were actually requested in a
434  * single completion. In general, the default size that a root complex and its
435  * root port will reply to are based around the length of a cache line.
436  *
437  * What this ultimately controls is the number of requests that the Requester
438  * has to make and trades off bandwidth, bus sharing, and related here. For
439  * example, if the maximum read request size is 4 KiB, then the requester would
440  * only issue a single read request asking for 4 KiB. It would still receive
441  * these as multiple packets in units of the MPS. If however, the maximum read
442  * request was only say 512 B, then it would need to make 8 separate requests,
443  * potentially increasing latency. On the other hand, if systems are relying on
444  * total requests for QoS, then it's important to set it to something that's
445  * closer to the actual MPS.
446  *
447  * Traditionally, the OS has not been the most straightforward about this. It's
448  * important to remember that setting this up is also somewhat in the realm of
449  * system firmware. Due to the PCI Firmware specification, the firmware may have
450  * set up a value for not just the MRRS but also the MPS. As such, our logic
451  * basically left the MRRS alone and used whatever the device had there as long
452  * as we weren't shrinking the device's MPS. If we were, then we'd set it to the
453  * MPS. If the device was a root port, then it was just left at a system wide
454  * and PCIe default of 512 bytes.
455  *
456  * If we survey firmware (which isn't easy due to its nature), we have seen most
457  * cases where the firmware just doesn't do anything and leaves it to the
458  * device's default, which is basically just the PCIe default, unless it has a
459  * specific knowledge of something like say wanting to do something for an NVMe
460  * device. The same is generally true of other systems, leaving it at its
461  * default unless otherwise set by a device driver.
462  *
463  * Because this value doesn't really have the same constraints as other fabric
464  * properties, this becomes much simpler and we instead opt to set it as part of
465  * the device node initialization. In addition, there are no real rules about
466  * different functions having different values here as it doesn't really impact
467  * the TLP processing the same way that the MPS does.
468  *
469  * While we should add a fuller way of setting this and allowing operator
470  * override of the MRRS based on things like device class, etc. that is driven
471  * by pcieadm, that is left to the future. For now we opt to that all devices
472  * are kept at their default (512 bytes or whatever firmware left behind) and we
473  * ensure that root ports always have the mrrs set to 512.
474  */
475 
476 #include <sys/sysmacros.h>
477 #include <sys/types.h>
478 #include <sys/kmem.h>
479 #include <sys/modctl.h>
480 #include <sys/ddi.h>
481 #include <sys/sunddi.h>
482 #include <sys/sunndi.h>
483 #include <sys/fm/protocol.h>
484 #include <sys/fm/util.h>
485 #include <sys/promif.h>
486 #include <sys/disp.h>
487 #include <sys/stat.h>
488 #include <sys/file.h>
489 #include <sys/pci_cap.h>
490 #include <sys/pci_impl.h>
491 #include <sys/pcie_impl.h>
492 #include <sys/hotplug/pci/pcie_hp.h>
493 #include <sys/hotplug/pci/pciehpc.h>
494 #include <sys/hotplug/pci/pcishpc.h>
495 #include <sys/hotplug/pci/pcicfg.h>
496 #include <sys/pci_cfgacc.h>
497 #include <sys/sysevent.h>
498 #include <sys/sysevent/eventdefs.h>
499 #include <sys/sysevent/pcie.h>
500 
501 /* Local functions prototypes */
502 static void pcie_init_pfd(dev_info_t *);
503 static void pcie_fini_pfd(dev_info_t *);
504 
505 #if defined(__x86)
506 static void pcie_check_io_mem_range(ddi_acc_handle_t, boolean_t *, boolean_t *);
507 #endif /* defined(__x86) */
508 
509 #ifdef DEBUG
510 uint_t pcie_debug_flags = 0;
511 static void pcie_print_bus(pcie_bus_t *bus_p);
512 void pcie_dbg(char *fmt, ...);
513 #endif /* DEBUG */
514 
515 /* Variable to control default PCI-Express config settings */
516 ushort_t pcie_command_default =
517     PCI_COMM_SERR_ENABLE |
518     PCI_COMM_WAIT_CYC_ENAB |
519     PCI_COMM_PARITY_DETECT |
520     PCI_COMM_ME |
521     PCI_COMM_MAE |
522     PCI_COMM_IO;
523 
524 /* xxx_fw are bits that are controlled by FW and should not be modified */
525 ushort_t pcie_command_default_fw =
526     PCI_COMM_SPEC_CYC |
527     PCI_COMM_MEMWR_INVAL |
528     PCI_COMM_PALETTE_SNOOP |
529     PCI_COMM_WAIT_CYC_ENAB |
530     0xF800; /* Reserved Bits */
531 
532 ushort_t pcie_bdg_command_default_fw =
533     PCI_BCNF_BCNTRL_ISA_ENABLE |
534     PCI_BCNF_BCNTRL_VGA_ENABLE |
535     0xF000; /* Reserved Bits */
536 
537 /* PCI-Express Base error defaults */
538 ushort_t pcie_base_err_default =
539     PCIE_DEVCTL_CE_REPORTING_EN |
540     PCIE_DEVCTL_NFE_REPORTING_EN |
541     PCIE_DEVCTL_FE_REPORTING_EN |
542     PCIE_DEVCTL_UR_REPORTING_EN;
543 
544 /* PCI-Express Device Control Register */
545 uint16_t pcie_devctl_default = PCIE_DEVCTL_RO_EN |
546     PCIE_DEVCTL_MAX_READ_REQ_512;
547 
548 /* PCI-Express AER Root Control Register */
549 #define	PCIE_ROOT_SYS_ERR	(PCIE_ROOTCTL_SYS_ERR_ON_CE_EN | \
550 				PCIE_ROOTCTL_SYS_ERR_ON_NFE_EN | \
551 				PCIE_ROOTCTL_SYS_ERR_ON_FE_EN)
552 
553 ushort_t pcie_root_ctrl_default =
554     PCIE_ROOTCTL_SYS_ERR_ON_CE_EN |
555     PCIE_ROOTCTL_SYS_ERR_ON_NFE_EN |
556     PCIE_ROOTCTL_SYS_ERR_ON_FE_EN;
557 
558 /* PCI-Express Root Error Command Register */
559 ushort_t pcie_root_error_cmd_default =
560     PCIE_AER_RE_CMD_CE_REP_EN |
561     PCIE_AER_RE_CMD_NFE_REP_EN |
562     PCIE_AER_RE_CMD_FE_REP_EN;
563 
564 /* ECRC settings in the PCIe AER Control Register */
565 uint32_t pcie_ecrc_value =
566     PCIE_AER_CTL_ECRC_GEN_ENA |
567     PCIE_AER_CTL_ECRC_CHECK_ENA;
568 
569 /*
570  * If a particular platform wants to disable certain errors such as UR/MA,
571  * instead of using #defines have the platform's PCIe Root Complex driver set
572  * these masks using the pcie_get_XXX_mask and pcie_set_XXX_mask functions.  For
573  * x86 the closest thing to a PCIe root complex driver is NPE.	For SPARC the
574  * closest PCIe root complex driver is PX.
575  *
576  * pcie_serr_disable_flag : disable SERR only (in RCR and command reg) x86
577  * systems may want to disable SERR in general.  For root ports, enabling SERR
578  * causes NMIs which are not handled and results in a watchdog timeout error.
579  */
580 uint32_t pcie_aer_uce_mask = 0;		/* AER UE Mask */
581 uint32_t pcie_aer_ce_mask = 0;		/* AER CE Mask */
582 uint32_t pcie_aer_suce_mask = 0;	/* AER Secondary UE Mask */
583 uint32_t pcie_serr_disable_flag = 0;	/* Disable SERR */
584 
585 /* Default severities needed for eversholt.  Error handling doesn't care */
586 uint32_t pcie_aer_uce_severity = PCIE_AER_UCE_MTLP | PCIE_AER_UCE_RO | \
587     PCIE_AER_UCE_FCP | PCIE_AER_UCE_SD | PCIE_AER_UCE_DLP | \
588     PCIE_AER_UCE_TRAINING;
589 uint32_t pcie_aer_suce_severity = PCIE_AER_SUCE_SERR_ASSERT | \
590     PCIE_AER_SUCE_UC_ADDR_ERR | PCIE_AER_SUCE_UC_ATTR_ERR | \
591     PCIE_AER_SUCE_USC_MSG_DATA_ERR;
592 
593 int pcie_disable_ari = 0;
594 
595 /*
596  * On some platforms, such as the AMD B450 chipset, we've seen an odd
597  * relationship between enabling link bandwidth notifications and AERs about
598  * ECRC errors. This provides a mechanism to disable it.
599  */
600 int pcie_disable_lbw = 0;
601 
602 /*
603  * Amount of time to wait for an in-progress retraining. The default is to try
604  * 500 times in 10ms chunks, thus a total of 5s.
605  */
606 uint32_t pcie_link_retrain_count = 500;
607 uint32_t pcie_link_retrain_delay_ms = 10;
608 
609 taskq_t *pcie_link_tq;
610 kmutex_t pcie_link_tq_mutex;
611 
612 static void pcie_scan_mps(dev_info_t *rc_dip, dev_info_t *dip,
613 	int *max_supported);
614 static int pcie_get_max_supported(dev_info_t *dip, void *arg);
615 static int pcie_map_phys(dev_info_t *dip, pci_regspec_t *phys_spec,
616     caddr_t *addrp, ddi_acc_handle_t *handlep);
617 static void pcie_unmap_phys(ddi_acc_handle_t *handlep,	pci_regspec_t *ph);
618 static int pcie_link_bw_intr(dev_info_t *);
619 static void pcie_capture_speeds(dev_info_t *);
620 
621 dev_info_t *pcie_get_rc_dip(dev_info_t *dip);
622 
623 /*
624  * modload support
625  */
626 
627 static struct modlmisc modlmisc	= {
628 	&mod_miscops,	/* Type	of module */
629 	"PCI Express Framework Module"
630 };
631 
632 static struct modlinkage modlinkage = {
633 	MODREV_1,
634 	(void	*)&modlmisc,
635 	NULL
636 };
637 
638 /*
639  * Global Variables needed for a non-atomic version of ddi_fm_ereport_post.
640  * Currently used to send the pci.fabric ereports whose payload depends on the
641  * type of PCI device it is being sent for.
642  */
643 char		*pcie_nv_buf;
644 nv_alloc_t	*pcie_nvap;
645 nvlist_t	*pcie_nvl;
646 
647 int
648 _init(void)
649 {
650 	int rval;
651 
652 	pcie_nv_buf = kmem_alloc(ERPT_DATA_SZ, KM_SLEEP);
653 	pcie_nvap = fm_nva_xcreate(pcie_nv_buf, ERPT_DATA_SZ);
654 	pcie_nvl = fm_nvlist_create(pcie_nvap);
655 	mutex_init(&pcie_link_tq_mutex, NULL, MUTEX_DRIVER, NULL);
656 
657 	if ((rval = mod_install(&modlinkage)) != 0) {
658 		mutex_destroy(&pcie_link_tq_mutex);
659 		fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
660 		fm_nva_xdestroy(pcie_nvap);
661 		kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
662 	}
663 	return (rval);
664 }
665 
666 int
667 _fini()
668 {
669 	int		rval;
670 
671 	if ((rval = mod_remove(&modlinkage)) == 0) {
672 		if (pcie_link_tq != NULL) {
673 			taskq_destroy(pcie_link_tq);
674 		}
675 		mutex_destroy(&pcie_link_tq_mutex);
676 		fm_nvlist_destroy(pcie_nvl, FM_NVA_RETAIN);
677 		fm_nva_xdestroy(pcie_nvap);
678 		kmem_free(pcie_nv_buf, ERPT_DATA_SZ);
679 	}
680 	return (rval);
681 }
682 
683 int
684 _info(struct modinfo *modinfop)
685 {
686 	return (mod_info(&modlinkage, modinfop));
687 }
688 
689 /* ARGSUSED */
690 int
691 pcie_init(dev_info_t *dip, caddr_t arg)
692 {
693 	int	ret = DDI_SUCCESS;
694 
695 	/*
696 	 * Our _init function is too early to create a taskq. Create the pcie
697 	 * link management taskq here now instead.
698 	 */
699 	mutex_enter(&pcie_link_tq_mutex);
700 	if (pcie_link_tq == NULL) {
701 		pcie_link_tq = taskq_create("pcie_link", 1, minclsyspri, 0, 0,
702 		    0);
703 	}
704 	mutex_exit(&pcie_link_tq_mutex);
705 
706 
707 	/*
708 	 * Create a "devctl" minor node to support DEVCTL_DEVICE_*
709 	 * and DEVCTL_BUS_* ioctls to this bus.
710 	 */
711 	if ((ret = ddi_create_minor_node(dip, "devctl", S_IFCHR,
712 	    PCI_MINOR_NUM(ddi_get_instance(dip), PCI_DEVCTL_MINOR),
713 	    DDI_NT_NEXUS, 0)) != DDI_SUCCESS) {
714 		PCIE_DBG("Failed to create devctl minor node for %s%d\n",
715 		    ddi_driver_name(dip), ddi_get_instance(dip));
716 
717 		return (ret);
718 	}
719 
720 	if ((ret = pcie_hp_init(dip, arg)) != DDI_SUCCESS) {
721 		/*
722 		 * On some x86 platforms, we observed unexpected hotplug
723 		 * initialization failures in recent years. The known cause
724 		 * is a hardware issue: while the problem PCI bridges have
725 		 * the Hotplug Capable registers set, the machine actually
726 		 * does not implement the expected ACPI object.
727 		 *
728 		 * We don't want to stop PCI driver attach and system boot
729 		 * just because of this hotplug initialization failure.
730 		 * Continue with a debug message printed.
731 		 */
732 		PCIE_DBG("%s%d: Failed setting hotplug framework\n",
733 		    ddi_driver_name(dip), ddi_get_instance(dip));
734 
735 #if defined(__sparc)
736 		ddi_remove_minor_node(dip, "devctl");
737 
738 		return (ret);
739 #endif /* defined(__sparc) */
740 	}
741 
742 	return (DDI_SUCCESS);
743 }
744 
745 /* ARGSUSED */
746 int
747 pcie_uninit(dev_info_t *dip)
748 {
749 	int	ret = DDI_SUCCESS;
750 
751 	if (pcie_ari_is_enabled(dip) == PCIE_ARI_FORW_ENABLED)
752 		(void) pcie_ari_disable(dip);
753 
754 	if ((ret = pcie_hp_uninit(dip)) != DDI_SUCCESS) {
755 		PCIE_DBG("Failed to uninitialize hotplug for %s%d\n",
756 		    ddi_driver_name(dip), ddi_get_instance(dip));
757 
758 		return (ret);
759 	}
760 
761 	if (pcie_link_bw_supported(dip)) {
762 		(void) pcie_link_bw_disable(dip);
763 	}
764 
765 	ddi_remove_minor_node(dip, "devctl");
766 
767 	return (ret);
768 }
769 
770 /*
771  * PCIe module interface for enabling hotplug interrupt.
772  *
773  * It should be called after pcie_init() is done and bus driver's
774  * interrupt handlers have being attached.
775  */
776 int
777 pcie_hpintr_enable(dev_info_t *dip)
778 {
779 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
780 	pcie_hp_ctrl_t	*ctrl_p = PCIE_GET_HP_CTRL(dip);
781 
782 	if (PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
783 		(void) (ctrl_p->hc_ops.enable_hpc_intr)(ctrl_p);
784 	} else if (PCIE_IS_PCI_HOTPLUG_ENABLED(bus_p)) {
785 		(void) pcishpc_enable_irqs(ctrl_p);
786 	}
787 	return (DDI_SUCCESS);
788 }
789 
790 /*
791  * PCIe module interface for disabling hotplug interrupt.
792  *
793  * It should be called before pcie_uninit() is called and bus driver's
794  * interrupt handlers is dettached.
795  */
796 int
797 pcie_hpintr_disable(dev_info_t *dip)
798 {
799 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
800 	pcie_hp_ctrl_t	*ctrl_p = PCIE_GET_HP_CTRL(dip);
801 
802 	if (PCIE_IS_PCIE_HOTPLUG_ENABLED(bus_p)) {
803 		(void) (ctrl_p->hc_ops.disable_hpc_intr)(ctrl_p);
804 	} else if (PCIE_IS_PCI_HOTPLUG_ENABLED(bus_p)) {
805 		(void) pcishpc_disable_irqs(ctrl_p);
806 	}
807 	return (DDI_SUCCESS);
808 }
809 
810 /* ARGSUSED */
811 int
812 pcie_intr(dev_info_t *dip)
813 {
814 	int hp, lbw;
815 
816 	hp = pcie_hp_intr(dip);
817 	lbw = pcie_link_bw_intr(dip);
818 
819 	if (hp == DDI_INTR_CLAIMED || lbw == DDI_INTR_CLAIMED) {
820 		return (DDI_INTR_CLAIMED);
821 	}
822 
823 	return (DDI_INTR_UNCLAIMED);
824 }
825 
826 /* ARGSUSED */
827 int
828 pcie_open(dev_info_t *dip, dev_t *devp, int flags, int otyp, cred_t *credp)
829 {
830 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
831 
832 	/*
833 	 * Make sure the open is for the right file type.
834 	 */
835 	if (otyp != OTYP_CHR)
836 		return (EINVAL);
837 
838 	/*
839 	 * Handle the open by tracking the device state.
840 	 */
841 	if ((bus_p->bus_soft_state == PCI_SOFT_STATE_OPEN_EXCL) ||
842 	    ((flags & FEXCL) &&
843 	    (bus_p->bus_soft_state != PCI_SOFT_STATE_CLOSED))) {
844 		return (EBUSY);
845 	}
846 
847 	if (flags & FEXCL)
848 		bus_p->bus_soft_state = PCI_SOFT_STATE_OPEN_EXCL;
849 	else
850 		bus_p->bus_soft_state = PCI_SOFT_STATE_OPEN;
851 
852 	return (0);
853 }
854 
855 /* ARGSUSED */
856 int
857 pcie_close(dev_info_t *dip, dev_t dev, int flags, int otyp, cred_t *credp)
858 {
859 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
860 
861 	if (otyp != OTYP_CHR)
862 		return (EINVAL);
863 
864 	bus_p->bus_soft_state = PCI_SOFT_STATE_CLOSED;
865 
866 	return (0);
867 }
868 
869 /* ARGSUSED */
870 int
871 pcie_ioctl(dev_info_t *dip, dev_t dev, int cmd, intptr_t arg, int mode,
872     cred_t *credp, int *rvalp)
873 {
874 	struct devctl_iocdata	*dcp;
875 	uint_t			bus_state;
876 	int			rv = DDI_SUCCESS;
877 
878 	/*
879 	 * We can use the generic implementation for devctl ioctl
880 	 */
881 	switch (cmd) {
882 	case DEVCTL_DEVICE_GETSTATE:
883 	case DEVCTL_DEVICE_ONLINE:
884 	case DEVCTL_DEVICE_OFFLINE:
885 	case DEVCTL_BUS_GETSTATE:
886 		return (ndi_devctl_ioctl(dip, cmd, arg, mode, 0));
887 	default:
888 		break;
889 	}
890 
891 	/*
892 	 * read devctl ioctl data
893 	 */
894 	if (ndi_dc_allochdl((void *)arg, &dcp) != NDI_SUCCESS)
895 		return (EFAULT);
896 
897 	switch (cmd) {
898 	case DEVCTL_BUS_QUIESCE:
899 		if (ndi_get_bus_state(dip, &bus_state) == NDI_SUCCESS)
900 			if (bus_state == BUS_QUIESCED)
901 				break;
902 		(void) ndi_set_bus_state(dip, BUS_QUIESCED);
903 		break;
904 	case DEVCTL_BUS_UNQUIESCE:
905 		if (ndi_get_bus_state(dip, &bus_state) == NDI_SUCCESS)
906 			if (bus_state == BUS_ACTIVE)
907 				break;
908 		(void) ndi_set_bus_state(dip, BUS_ACTIVE);
909 		break;
910 	case DEVCTL_BUS_RESET:
911 	case DEVCTL_BUS_RESETALL:
912 	case DEVCTL_DEVICE_RESET:
913 		rv = ENOTSUP;
914 		break;
915 	default:
916 		rv = ENOTTY;
917 	}
918 
919 	ndi_dc_freehdl(dcp);
920 	return (rv);
921 }
922 
923 /* ARGSUSED */
924 int
925 pcie_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
926     int flags, char *name, caddr_t valuep, int *lengthp)
927 {
928 	if (dev == DDI_DEV_T_ANY)
929 		goto skip;
930 
931 	if (PCIE_IS_HOTPLUG_CAPABLE(dip) &&
932 	    strcmp(name, "pci-occupant") == 0) {
933 		int	pci_dev = PCI_MINOR_NUM_TO_PCI_DEVNUM(getminor(dev));
934 
935 		pcie_hp_create_occupant_props(dip, dev, pci_dev);
936 	}
937 
938 skip:
939 	return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp));
940 }
941 
942 int
943 pcie_init_cfghdl(dev_info_t *cdip)
944 {
945 	pcie_bus_t		*bus_p;
946 	ddi_acc_handle_t	eh = NULL;
947 
948 	bus_p = PCIE_DIP2BUS(cdip);
949 	if (bus_p == NULL)
950 		return (DDI_FAILURE);
951 
952 	/* Create an config access special to error handling */
953 	if (pci_config_setup(cdip, &eh) != DDI_SUCCESS) {
954 		cmn_err(CE_WARN, "Cannot setup config access"
955 		    " for BDF 0x%x\n", bus_p->bus_bdf);
956 		return (DDI_FAILURE);
957 	}
958 
959 	bus_p->bus_cfg_hdl = eh;
960 	return (DDI_SUCCESS);
961 }
962 
963 void
964 pcie_fini_cfghdl(dev_info_t *cdip)
965 {
966 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(cdip);
967 
968 	pci_config_teardown(&bus_p->bus_cfg_hdl);
969 }
970 
971 void
972 pcie_determine_serial(dev_info_t *dip)
973 {
974 	pcie_bus_t		*bus_p = PCIE_DIP2BUS(dip);
975 	ddi_acc_handle_t	h;
976 	uint16_t		cap;
977 	uchar_t			serial[8];
978 	uint32_t		low, high;
979 
980 	if (!PCIE_IS_PCIE(bus_p))
981 		return;
982 
983 	h = bus_p->bus_cfg_hdl;
984 
985 	if ((PCI_CAP_LOCATE(h, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_SER), &cap)) ==
986 	    DDI_FAILURE)
987 		return;
988 
989 	high = PCI_XCAP_GET32(h, 0, cap, PCIE_SER_SID_UPPER_DW);
990 	low = PCI_XCAP_GET32(h, 0, cap, PCIE_SER_SID_LOWER_DW);
991 
992 	/*
993 	 * Here, we're trying to figure out if we had an invalid PCIe read. From
994 	 * looking at the contents of the value, it can be hard to tell the
995 	 * difference between a value that has all 1s correctly versus if we had
996 	 * an error. In this case, we only assume it's invalid if both register
997 	 * reads are invalid. We also only use 32-bit reads as we're not sure if
998 	 * all devices will support these as 64-bit reads, while we know that
999 	 * they'll support these as 32-bit reads.
1000 	 */
1001 	if (high == PCI_EINVAL32 && low == PCI_EINVAL32)
1002 		return;
1003 
1004 	serial[0] = low & 0xff;
1005 	serial[1] = (low >> 8) & 0xff;
1006 	serial[2] = (low >> 16) & 0xff;
1007 	serial[3] = (low >> 24) & 0xff;
1008 	serial[4] = high & 0xff;
1009 	serial[5] = (high >> 8) & 0xff;
1010 	serial[6] = (high >> 16) & 0xff;
1011 	serial[7] = (high >> 24) & 0xff;
1012 
1013 	(void) ndi_prop_update_byte_array(DDI_DEV_T_NONE, dip, "pcie-serial",
1014 	    serial, sizeof (serial));
1015 }
1016 
1017 static void
1018 pcie_determine_aspm(dev_info_t *dip)
1019 {
1020 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1021 	uint32_t	linkcap;
1022 	uint16_t	linkctl;
1023 
1024 	if (!PCIE_IS_PCIE(bus_p))
1025 		return;
1026 
1027 	linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
1028 	linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
1029 
1030 	switch (linkcap & PCIE_LINKCAP_ASPM_SUP_MASK) {
1031 	case PCIE_LINKCAP_ASPM_SUP_L0S:
1032 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1033 		    "pcie-aspm-support", "l0s");
1034 		break;
1035 	case PCIE_LINKCAP_ASPM_SUP_L1:
1036 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1037 		    "pcie-aspm-support", "l1");
1038 		break;
1039 	case PCIE_LINKCAP_ASPM_SUP_L0S_L1:
1040 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1041 		    "pcie-aspm-support", "l0s,l1");
1042 		break;
1043 	default:
1044 		return;
1045 	}
1046 
1047 	switch (linkctl & PCIE_LINKCTL_ASPM_CTL_MASK) {
1048 	case PCIE_LINKCTL_ASPM_CTL_DIS:
1049 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1050 		    "pcie-aspm-state", "disabled");
1051 		break;
1052 	case PCIE_LINKCTL_ASPM_CTL_L0S:
1053 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1054 		    "pcie-aspm-state", "l0s");
1055 		break;
1056 	case PCIE_LINKCTL_ASPM_CTL_L1:
1057 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1058 		    "pcie-aspm-state", "l1");
1059 		break;
1060 	case PCIE_LINKCTL_ASPM_CTL_L0S_L1:
1061 		(void) ndi_prop_update_string(DDI_DEV_T_NONE, dip,
1062 		    "pcie-aspm-state", "l0s,l1");
1063 		break;
1064 	}
1065 }
1066 
1067 /*
1068  * PCI-Express child device initialization. Note, this only will be called on a
1069  * device or function if we actually attach a device driver to it.
1070  *
1071  * This function enables generic pci-express interrupts and error handling.
1072  * Note, tagging, the max packet size, and related are all set up before this
1073  * point and is performed in pcie_fabric_setup().
1074  *
1075  * @param pdip		root dip (root nexus's dip)
1076  * @param cdip		child's dip (device's dip)
1077  * @return		DDI_SUCCESS or DDI_FAILURE
1078  */
1079 /* ARGSUSED */
1080 int
1081 pcie_initchild(dev_info_t *cdip)
1082 {
1083 	uint16_t		tmp16, reg16;
1084 	pcie_bus_t		*bus_p;
1085 	uint32_t		devid, venid;
1086 
1087 	bus_p = PCIE_DIP2BUS(cdip);
1088 	if (bus_p == NULL) {
1089 		PCIE_DBG("%s: BUS not found.\n",
1090 		    ddi_driver_name(cdip));
1091 
1092 		return (DDI_FAILURE);
1093 	}
1094 
1095 	if (pcie_init_cfghdl(cdip) != DDI_SUCCESS)
1096 		return (DDI_FAILURE);
1097 
1098 	/*
1099 	 * Update pcie_bus_t with real Vendor Id Device Id.
1100 	 *
1101 	 * For assigned devices in IOV environment, the OBP will return
1102 	 * faked device id/vendor id on configration read and for both
1103 	 * properties in root domain. translate_devid() function will
1104 	 * update the properties with real device-id/vendor-id on such
1105 	 * platforms, so that we can utilize the properties here to get
1106 	 * real device-id/vendor-id and overwrite the faked ids.
1107 	 *
1108 	 * For unassigned devices or devices in non-IOV environment, the
1109 	 * operation below won't make a difference.
1110 	 *
1111 	 * The IOV implementation only supports assignment of PCIE
1112 	 * endpoint devices. Devices under pci-pci bridges don't need
1113 	 * operation like this.
1114 	 */
1115 	devid = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1116 	    "device-id", -1);
1117 	venid = ddi_prop_get_int(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1118 	    "vendor-id", -1);
1119 	bus_p->bus_dev_ven_id = (devid << 16) | (venid & 0xffff);
1120 
1121 	/* Clear the device's status register */
1122 	reg16 = PCIE_GET(16, bus_p, PCI_CONF_STAT);
1123 	PCIE_PUT(16, bus_p, PCI_CONF_STAT, reg16);
1124 
1125 	/* Setup the device's command register */
1126 	reg16 = PCIE_GET(16, bus_p, PCI_CONF_COMM);
1127 	tmp16 = (reg16 & pcie_command_default_fw) | pcie_command_default;
1128 
1129 #if defined(__x86)
1130 	boolean_t empty_io_range = B_FALSE;
1131 	boolean_t empty_mem_range = B_FALSE;
1132 	/*
1133 	 * Check for empty IO and Mem ranges on bridges. If so disable IO/Mem
1134 	 * access as it can cause a hang if enabled.
1135 	 */
1136 	pcie_check_io_mem_range(bus_p->bus_cfg_hdl, &empty_io_range,
1137 	    &empty_mem_range);
1138 	if ((empty_io_range == B_TRUE) &&
1139 	    (pcie_command_default & PCI_COMM_IO)) {
1140 		tmp16 &= ~PCI_COMM_IO;
1141 		PCIE_DBG("No I/O range found for %s, bdf 0x%x\n",
1142 		    ddi_driver_name(cdip), bus_p->bus_bdf);
1143 	}
1144 	if ((empty_mem_range == B_TRUE) &&
1145 	    (pcie_command_default & PCI_COMM_MAE)) {
1146 		tmp16 &= ~PCI_COMM_MAE;
1147 		PCIE_DBG("No Mem range found for %s, bdf 0x%x\n",
1148 		    ddi_driver_name(cdip), bus_p->bus_bdf);
1149 	}
1150 #endif /* defined(__x86) */
1151 
1152 	if (pcie_serr_disable_flag && PCIE_IS_PCIE(bus_p))
1153 		tmp16 &= ~PCI_COMM_SERR_ENABLE;
1154 
1155 	PCIE_PUT(16, bus_p, PCI_CONF_COMM, tmp16);
1156 	PCIE_DBG_CFG(cdip, bus_p, "COMMAND", 16, PCI_CONF_COMM, reg16);
1157 
1158 	/*
1159 	 * If the device has a bus control register then program it
1160 	 * based on the settings in the command register.
1161 	 */
1162 	if (PCIE_IS_BDG(bus_p)) {
1163 		/* Clear the device's secondary status register */
1164 		reg16 = PCIE_GET(16, bus_p, PCI_BCNF_SEC_STATUS);
1165 		PCIE_PUT(16, bus_p, PCI_BCNF_SEC_STATUS, reg16);
1166 
1167 		/* Setup the device's secondary command register */
1168 		reg16 = PCIE_GET(16, bus_p, PCI_BCNF_BCNTRL);
1169 		tmp16 = (reg16 & pcie_bdg_command_default_fw);
1170 
1171 		tmp16 |= PCI_BCNF_BCNTRL_SERR_ENABLE;
1172 		/*
1173 		 * Workaround for this Nvidia bridge. Don't enable the SERR
1174 		 * enable bit in the bridge control register as it could lead to
1175 		 * bogus NMIs.
1176 		 */
1177 		if (bus_p->bus_dev_ven_id == 0x037010DE)
1178 			tmp16 &= ~PCI_BCNF_BCNTRL_SERR_ENABLE;
1179 
1180 		if (pcie_command_default & PCI_COMM_PARITY_DETECT)
1181 			tmp16 |= PCI_BCNF_BCNTRL_PARITY_ENABLE;
1182 
1183 		/*
1184 		 * Enable Master Abort Mode only if URs have not been masked.
1185 		 * For PCI and PCIe-PCI bridges, enabling this bit causes a
1186 		 * Master Aborts/UR to be forwarded as a UR/TA or SERR.  If this
1187 		 * bit is masked, posted requests are dropped and non-posted
1188 		 * requests are returned with -1.
1189 		 */
1190 		if (pcie_aer_uce_mask & PCIE_AER_UCE_UR)
1191 			tmp16 &= ~PCI_BCNF_BCNTRL_MAST_AB_MODE;
1192 		else
1193 			tmp16 |= PCI_BCNF_BCNTRL_MAST_AB_MODE;
1194 		PCIE_PUT(16, bus_p, PCI_BCNF_BCNTRL, tmp16);
1195 		PCIE_DBG_CFG(cdip, bus_p, "SEC CMD", 16, PCI_BCNF_BCNTRL,
1196 		    reg16);
1197 	}
1198 
1199 	if (PCIE_IS_PCIE(bus_p)) {
1200 		/* Setup PCIe device control register */
1201 		reg16 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
1202 		/* note: MPS/MRRS are initialized in pcie_initchild_mps() */
1203 		tmp16 = (reg16 & (PCIE_DEVCTL_MAX_READ_REQ_MASK |
1204 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) |
1205 		    (pcie_devctl_default & ~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
1206 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK));
1207 		PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, tmp16);
1208 		PCIE_DBG_CAP(cdip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, reg16);
1209 
1210 		/* Enable PCIe errors */
1211 		pcie_enable_errors(cdip);
1212 
1213 		pcie_determine_serial(cdip);
1214 
1215 		pcie_determine_aspm(cdip);
1216 
1217 		pcie_capture_speeds(cdip);
1218 	}
1219 
1220 	bus_p->bus_ari = B_FALSE;
1221 	if ((pcie_ari_is_enabled(ddi_get_parent(cdip))
1222 	    == PCIE_ARI_FORW_ENABLED) && (pcie_ari_device(cdip)
1223 	    == PCIE_ARI_DEVICE)) {
1224 		bus_p->bus_ari = B_TRUE;
1225 	}
1226 
1227 	return (DDI_SUCCESS);
1228 }
1229 
1230 static void
1231 pcie_init_pfd(dev_info_t *dip)
1232 {
1233 	pf_data_t	*pfd_p = PCIE_ZALLOC(pf_data_t);
1234 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1235 
1236 	PCIE_DIP2PFD(dip) = pfd_p;
1237 
1238 	pfd_p->pe_bus_p = bus_p;
1239 	pfd_p->pe_severity_flags = 0;
1240 	pfd_p->pe_severity_mask = 0;
1241 	pfd_p->pe_orig_severity_flags = 0;
1242 	pfd_p->pe_lock = B_FALSE;
1243 	pfd_p->pe_valid = B_FALSE;
1244 
1245 	/* Allocate the root fault struct for both RC and RP */
1246 	if (PCIE_IS_ROOT(bus_p)) {
1247 		PCIE_ROOT_FAULT(pfd_p) = PCIE_ZALLOC(pf_root_fault_t);
1248 		PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
1249 		PCIE_ROOT_EH_SRC(pfd_p) = PCIE_ZALLOC(pf_root_eh_src_t);
1250 	}
1251 
1252 	PCI_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_err_regs_t);
1253 	PFD_AFFECTED_DEV(pfd_p) = PCIE_ZALLOC(pf_affected_dev_t);
1254 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
1255 
1256 	if (PCIE_IS_BDG(bus_p))
1257 		PCI_BDG_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_bdg_err_regs_t);
1258 
1259 	if (PCIE_IS_PCIE(bus_p)) {
1260 		PCIE_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_err_regs_t);
1261 
1262 		if (PCIE_IS_RP(bus_p))
1263 			PCIE_RP_REG(pfd_p) =
1264 			    PCIE_ZALLOC(pf_pcie_rp_err_regs_t);
1265 
1266 		PCIE_ADV_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_err_regs_t);
1267 		PCIE_ADV_REG(pfd_p)->pcie_ue_tgt_bdf = PCIE_INVALID_BDF;
1268 
1269 		if (PCIE_IS_RP(bus_p)) {
1270 			PCIE_ADV_RP_REG(pfd_p) =
1271 			    PCIE_ZALLOC(pf_pcie_adv_rp_err_regs_t);
1272 			PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id =
1273 			    PCIE_INVALID_BDF;
1274 			PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id =
1275 			    PCIE_INVALID_BDF;
1276 		} else if (PCIE_IS_PCIE_BDG(bus_p)) {
1277 			PCIE_ADV_BDG_REG(pfd_p) =
1278 			    PCIE_ZALLOC(pf_pcie_adv_bdg_err_regs_t);
1279 			PCIE_ADV_BDG_REG(pfd_p)->pcie_sue_tgt_bdf =
1280 			    PCIE_INVALID_BDF;
1281 		}
1282 
1283 		if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
1284 			PCIX_BDG_ERR_REG(pfd_p) =
1285 			    PCIE_ZALLOC(pf_pcix_bdg_err_regs_t);
1286 
1287 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1288 				PCIX_BDG_ECC_REG(pfd_p, 0) =
1289 				    PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1290 				PCIX_BDG_ECC_REG(pfd_p, 1) =
1291 				    PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1292 			}
1293 		}
1294 
1295 		PCIE_SLOT_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_slot_regs_t);
1296 		PCIE_SLOT_REG(pfd_p)->pcie_slot_regs_valid = B_FALSE;
1297 		PCIE_SLOT_REG(pfd_p)->pcie_slot_cap = 0;
1298 		PCIE_SLOT_REG(pfd_p)->pcie_slot_control = 0;
1299 		PCIE_SLOT_REG(pfd_p)->pcie_slot_status = 0;
1300 
1301 	} else if (PCIE_IS_PCIX(bus_p)) {
1302 		if (PCIE_IS_BDG(bus_p)) {
1303 			PCIX_BDG_ERR_REG(pfd_p) =
1304 			    PCIE_ZALLOC(pf_pcix_bdg_err_regs_t);
1305 
1306 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1307 				PCIX_BDG_ECC_REG(pfd_p, 0) =
1308 				    PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1309 				PCIX_BDG_ECC_REG(pfd_p, 1) =
1310 				    PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1311 			}
1312 		} else {
1313 			PCIX_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcix_err_regs_t);
1314 
1315 			if (PCIX_ECC_VERSION_CHECK(bus_p))
1316 				PCIX_ECC_REG(pfd_p) =
1317 				    PCIE_ZALLOC(pf_pcix_ecc_regs_t);
1318 		}
1319 	}
1320 }
1321 
1322 static void
1323 pcie_fini_pfd(dev_info_t *dip)
1324 {
1325 	pf_data_t	*pfd_p = PCIE_DIP2PFD(dip);
1326 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1327 
1328 	if (PCIE_IS_PCIE(bus_p)) {
1329 		if (PCIE_IS_PCIE_BDG(bus_p) && PCIE_IS_PCIX(bus_p)) {
1330 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1331 				kmem_free(PCIX_BDG_ECC_REG(pfd_p, 0),
1332 				    sizeof (pf_pcix_ecc_regs_t));
1333 				kmem_free(PCIX_BDG_ECC_REG(pfd_p, 1),
1334 				    sizeof (pf_pcix_ecc_regs_t));
1335 			}
1336 
1337 			kmem_free(PCIX_BDG_ERR_REG(pfd_p),
1338 			    sizeof (pf_pcix_bdg_err_regs_t));
1339 		}
1340 
1341 		if (PCIE_IS_RP(bus_p))
1342 			kmem_free(PCIE_ADV_RP_REG(pfd_p),
1343 			    sizeof (pf_pcie_adv_rp_err_regs_t));
1344 		else if (PCIE_IS_PCIE_BDG(bus_p))
1345 			kmem_free(PCIE_ADV_BDG_REG(pfd_p),
1346 			    sizeof (pf_pcie_adv_bdg_err_regs_t));
1347 
1348 		kmem_free(PCIE_ADV_REG(pfd_p),
1349 		    sizeof (pf_pcie_adv_err_regs_t));
1350 
1351 		if (PCIE_IS_RP(bus_p))
1352 			kmem_free(PCIE_RP_REG(pfd_p),
1353 			    sizeof (pf_pcie_rp_err_regs_t));
1354 
1355 		kmem_free(PCIE_ERR_REG(pfd_p), sizeof (pf_pcie_err_regs_t));
1356 	} else if (PCIE_IS_PCIX(bus_p)) {
1357 		if (PCIE_IS_BDG(bus_p)) {
1358 			if (PCIX_ECC_VERSION_CHECK(bus_p)) {
1359 				kmem_free(PCIX_BDG_ECC_REG(pfd_p, 0),
1360 				    sizeof (pf_pcix_ecc_regs_t));
1361 				kmem_free(PCIX_BDG_ECC_REG(pfd_p, 1),
1362 				    sizeof (pf_pcix_ecc_regs_t));
1363 			}
1364 
1365 			kmem_free(PCIX_BDG_ERR_REG(pfd_p),
1366 			    sizeof (pf_pcix_bdg_err_regs_t));
1367 		} else {
1368 			if (PCIX_ECC_VERSION_CHECK(bus_p))
1369 				kmem_free(PCIX_ECC_REG(pfd_p),
1370 				    sizeof (pf_pcix_ecc_regs_t));
1371 
1372 			kmem_free(PCIX_ERR_REG(pfd_p),
1373 			    sizeof (pf_pcix_err_regs_t));
1374 		}
1375 	}
1376 
1377 	if (PCIE_IS_BDG(bus_p))
1378 		kmem_free(PCI_BDG_ERR_REG(pfd_p),
1379 		    sizeof (pf_pci_bdg_err_regs_t));
1380 
1381 	kmem_free(PFD_AFFECTED_DEV(pfd_p), sizeof (pf_affected_dev_t));
1382 	kmem_free(PCI_ERR_REG(pfd_p), sizeof (pf_pci_err_regs_t));
1383 
1384 	if (PCIE_IS_ROOT(bus_p)) {
1385 		kmem_free(PCIE_ROOT_FAULT(pfd_p), sizeof (pf_root_fault_t));
1386 		kmem_free(PCIE_ROOT_EH_SRC(pfd_p), sizeof (pf_root_eh_src_t));
1387 	}
1388 
1389 	kmem_free(PCIE_DIP2PFD(dip), sizeof (pf_data_t));
1390 
1391 	PCIE_DIP2PFD(dip) = NULL;
1392 }
1393 
1394 
1395 /*
1396  * Special functions to allocate pf_data_t's for PCIe root complexes.
1397  * Note: Root Complex not Root Port
1398  */
1399 void
1400 pcie_rc_init_pfd(dev_info_t *dip, pf_data_t *pfd_p)
1401 {
1402 	pfd_p->pe_bus_p = PCIE_DIP2DOWNBUS(dip);
1403 	pfd_p->pe_severity_flags = 0;
1404 	pfd_p->pe_severity_mask = 0;
1405 	pfd_p->pe_orig_severity_flags = 0;
1406 	pfd_p->pe_lock = B_FALSE;
1407 	pfd_p->pe_valid = B_FALSE;
1408 
1409 	PCIE_ROOT_FAULT(pfd_p) = PCIE_ZALLOC(pf_root_fault_t);
1410 	PCIE_ROOT_FAULT(pfd_p)->scan_bdf = PCIE_INVALID_BDF;
1411 	PCIE_ROOT_EH_SRC(pfd_p) = PCIE_ZALLOC(pf_root_eh_src_t);
1412 	PCI_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_err_regs_t);
1413 	PFD_AFFECTED_DEV(pfd_p) = PCIE_ZALLOC(pf_affected_dev_t);
1414 	PFD_AFFECTED_DEV(pfd_p)->pe_affected_bdf = PCIE_INVALID_BDF;
1415 	PCI_BDG_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pci_bdg_err_regs_t);
1416 	PCIE_ERR_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_err_regs_t);
1417 	PCIE_RP_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_rp_err_regs_t);
1418 	PCIE_ADV_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_err_regs_t);
1419 	PCIE_ADV_RP_REG(pfd_p) = PCIE_ZALLOC(pf_pcie_adv_rp_err_regs_t);
1420 	PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ce_src_id = PCIE_INVALID_BDF;
1421 	PCIE_ADV_RP_REG(pfd_p)->pcie_rp_ue_src_id = PCIE_INVALID_BDF;
1422 
1423 	PCIE_ADV_REG(pfd_p)->pcie_ue_sev = pcie_aer_uce_severity;
1424 }
1425 
1426 void
1427 pcie_rc_fini_pfd(pf_data_t *pfd_p)
1428 {
1429 	kmem_free(PCIE_ADV_RP_REG(pfd_p), sizeof (pf_pcie_adv_rp_err_regs_t));
1430 	kmem_free(PCIE_ADV_REG(pfd_p), sizeof (pf_pcie_adv_err_regs_t));
1431 	kmem_free(PCIE_RP_REG(pfd_p), sizeof (pf_pcie_rp_err_regs_t));
1432 	kmem_free(PCIE_ERR_REG(pfd_p), sizeof (pf_pcie_err_regs_t));
1433 	kmem_free(PCI_BDG_ERR_REG(pfd_p), sizeof (pf_pci_bdg_err_regs_t));
1434 	kmem_free(PFD_AFFECTED_DEV(pfd_p), sizeof (pf_affected_dev_t));
1435 	kmem_free(PCI_ERR_REG(pfd_p), sizeof (pf_pci_err_regs_t));
1436 	kmem_free(PCIE_ROOT_FAULT(pfd_p), sizeof (pf_root_fault_t));
1437 	kmem_free(PCIE_ROOT_EH_SRC(pfd_p), sizeof (pf_root_eh_src_t));
1438 }
1439 
1440 /*
1441  * init pcie_bus_t for root complex
1442  *
1443  * Only a few of the fields in bus_t is valid for root complex.
1444  * The fields that are bracketed are initialized in this routine:
1445  *
1446  * dev_info_t *		<bus_dip>
1447  * dev_info_t *		bus_rp_dip
1448  * ddi_acc_handle_t	bus_cfg_hdl
1449  * uint_t		<bus_fm_flags>
1450  * pcie_req_id_t	bus_bdf
1451  * pcie_req_id_t	bus_rp_bdf
1452  * uint32_t		bus_dev_ven_id
1453  * uint8_t		bus_rev_id
1454  * uint8_t		<bus_hdr_type>
1455  * uint16_t		<bus_dev_type>
1456  * uint8_t		bus_bdg_secbus
1457  * uint16_t		bus_pcie_off
1458  * uint16_t		<bus_aer_off>
1459  * uint16_t		bus_pcix_off
1460  * uint16_t		bus_ecc_ver
1461  * pci_bus_range_t	bus_bus_range
1462  * ppb_ranges_t	*	bus_addr_ranges
1463  * int			bus_addr_entries
1464  * pci_regspec_t *	bus_assigned_addr
1465  * int			bus_assigned_entries
1466  * pf_data_t *		bus_pfd
1467  * pcie_domain_t *	<bus_dom>
1468  * int			bus_mps
1469  * uint64_t		bus_cfgacc_base
1470  * void	*		bus_plat_private
1471  */
1472 void
1473 pcie_rc_init_bus(dev_info_t *dip)
1474 {
1475 	pcie_bus_t *bus_p;
1476 
1477 	bus_p = (pcie_bus_t *)kmem_zalloc(sizeof (pcie_bus_t), KM_SLEEP);
1478 	bus_p->bus_dip = dip;
1479 	bus_p->bus_dev_type = PCIE_PCIECAP_DEV_TYPE_RC_PSEUDO;
1480 	bus_p->bus_hdr_type = PCI_HEADER_ONE;
1481 
1482 	/* Fake that there are AER logs */
1483 	bus_p->bus_aer_off = (uint16_t)-1;
1484 
1485 	/* Needed only for handle lookup */
1486 	atomic_or_uint(&bus_p->bus_fm_flags, PF_FM_READY);
1487 
1488 	ndi_set_bus_private(dip, B_FALSE, DEVI_PORT_TYPE_PCI, bus_p);
1489 
1490 	PCIE_BUS2DOM(bus_p) = PCIE_ZALLOC(pcie_domain_t);
1491 }
1492 
1493 void
1494 pcie_rc_fini_bus(dev_info_t *dip)
1495 {
1496 	pcie_bus_t *bus_p = PCIE_DIP2DOWNBUS(dip);
1497 	ndi_set_bus_private(dip, B_FALSE, 0, NULL);
1498 	kmem_free(PCIE_BUS2DOM(bus_p), sizeof (pcie_domain_t));
1499 	kmem_free(bus_p, sizeof (pcie_bus_t));
1500 }
1501 
1502 static int
1503 pcie_width_to_int(pcie_link_width_t width)
1504 {
1505 	switch (width) {
1506 	case PCIE_LINK_WIDTH_X1:
1507 		return (1);
1508 	case PCIE_LINK_WIDTH_X2:
1509 		return (2);
1510 	case PCIE_LINK_WIDTH_X4:
1511 		return (4);
1512 	case PCIE_LINK_WIDTH_X8:
1513 		return (8);
1514 	case PCIE_LINK_WIDTH_X12:
1515 		return (12);
1516 	case PCIE_LINK_WIDTH_X16:
1517 		return (16);
1518 	case PCIE_LINK_WIDTH_X32:
1519 		return (32);
1520 	default:
1521 		return (0);
1522 	}
1523 }
1524 
1525 /*
1526  * Return the speed in Transfers / second. This is a signed quantity to match
1527  * the ndi/ddi property interfaces.
1528  */
1529 static int64_t
1530 pcie_speed_to_int(pcie_link_speed_t speed)
1531 {
1532 	switch (speed) {
1533 	case PCIE_LINK_SPEED_2_5:
1534 		return (2500000000LL);
1535 	case PCIE_LINK_SPEED_5:
1536 		return (5000000000LL);
1537 	case PCIE_LINK_SPEED_8:
1538 		return (8000000000LL);
1539 	case PCIE_LINK_SPEED_16:
1540 		return (16000000000LL);
1541 	case PCIE_LINK_SPEED_32:
1542 		return (32000000000LL);
1543 	case PCIE_LINK_SPEED_64:
1544 		return (64000000000LL);
1545 	default:
1546 		return (0);
1547 	}
1548 }
1549 
1550 /*
1551  * Translate the recorded speed information into devinfo properties.
1552  */
1553 static void
1554 pcie_speeds_to_devinfo(dev_info_t *dip, pcie_bus_t *bus_p)
1555 {
1556 	if (bus_p->bus_max_width != PCIE_LINK_WIDTH_UNKNOWN) {
1557 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
1558 		    "pcie-link-maximum-width",
1559 		    pcie_width_to_int(bus_p->bus_max_width));
1560 	}
1561 
1562 	if (bus_p->bus_cur_width != PCIE_LINK_WIDTH_UNKNOWN) {
1563 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
1564 		    "pcie-link-current-width",
1565 		    pcie_width_to_int(bus_p->bus_cur_width));
1566 	}
1567 
1568 	if (bus_p->bus_cur_speed != PCIE_LINK_SPEED_UNKNOWN) {
1569 		(void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1570 		    "pcie-link-current-speed",
1571 		    pcie_speed_to_int(bus_p->bus_cur_speed));
1572 	}
1573 
1574 	if (bus_p->bus_max_speed != PCIE_LINK_SPEED_UNKNOWN) {
1575 		(void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1576 		    "pcie-link-maximum-speed",
1577 		    pcie_speed_to_int(bus_p->bus_max_speed));
1578 	}
1579 
1580 	if (bus_p->bus_target_speed != PCIE_LINK_SPEED_UNKNOWN) {
1581 		(void) ndi_prop_update_int64(DDI_DEV_T_NONE, dip,
1582 		    "pcie-link-target-speed",
1583 		    pcie_speed_to_int(bus_p->bus_target_speed));
1584 	}
1585 
1586 	if ((bus_p->bus_speed_flags & PCIE_LINK_F_ADMIN_TARGET) != 0) {
1587 		(void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip,
1588 		    "pcie-link-admin-target-speed");
1589 	}
1590 
1591 	if (bus_p->bus_sup_speed != PCIE_LINK_SPEED_UNKNOWN) {
1592 		int64_t speeds[PCIE_NSPEEDS];
1593 		uint_t nspeeds = 0;
1594 
1595 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_2_5) {
1596 			speeds[nspeeds++] =
1597 			    pcie_speed_to_int(PCIE_LINK_SPEED_2_5);
1598 		}
1599 
1600 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_5) {
1601 			speeds[nspeeds++] =
1602 			    pcie_speed_to_int(PCIE_LINK_SPEED_5);
1603 		}
1604 
1605 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_8) {
1606 			speeds[nspeeds++] =
1607 			    pcie_speed_to_int(PCIE_LINK_SPEED_8);
1608 		}
1609 
1610 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_16) {
1611 			speeds[nspeeds++] =
1612 			    pcie_speed_to_int(PCIE_LINK_SPEED_16);
1613 		}
1614 
1615 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_32) {
1616 			speeds[nspeeds++] =
1617 			    pcie_speed_to_int(PCIE_LINK_SPEED_32);
1618 		}
1619 
1620 		if (bus_p->bus_sup_speed & PCIE_LINK_SPEED_64) {
1621 			speeds[nspeeds++] =
1622 			    pcie_speed_to_int(PCIE_LINK_SPEED_64);
1623 		}
1624 
1625 		(void) ndi_prop_update_int64_array(DDI_DEV_T_NONE, dip,
1626 		    "pcie-link-supported-speeds", speeds, nspeeds);
1627 	}
1628 }
1629 
1630 /*
1631  * We need to capture the supported, maximum, and current device speed and
1632  * width. The way that this has been done has changed over time.
1633  *
1634  * Prior to PCIe Gen 3, there were only current and supported speed fields.
1635  * These were found in the link status and link capabilities registers of the
1636  * PCI express capability. With the change to PCIe Gen 3, the information in the
1637  * link capabilities changed to the maximum value. The supported speeds vector
1638  * was moved to the link capabilities 2 register.
1639  *
1640  * Now, a device may not implement some of these registers. To determine whether
1641  * or not it's here, we have to do the following. First, we need to check the
1642  * revision of the PCI express capability. The link capabilities 2 register did
1643  * not exist prior to version 2 of this capability. If a modern device does not
1644  * implement it, it is supposed to return zero for the register.
1645  */
1646 static void
1647 pcie_capture_speeds(dev_info_t *dip)
1648 {
1649 	uint16_t	vers, status;
1650 	uint32_t	cap, cap2, ctl2;
1651 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
1652 	dev_info_t	*rcdip;
1653 
1654 	if (!PCIE_IS_PCIE(bus_p))
1655 		return;
1656 
1657 	rcdip = pcie_get_rc_dip(dip);
1658 	if (bus_p->bus_cfg_hdl == NULL) {
1659 		vers = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1660 		    bus_p->bus_pcie_off + PCIE_PCIECAP);
1661 	} else {
1662 		vers = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP);
1663 	}
1664 	if (vers == PCI_EINVAL16)
1665 		return;
1666 	vers &= PCIE_PCIECAP_VER_MASK;
1667 
1668 	/*
1669 	 * Verify the capability's version.
1670 	 */
1671 	switch (vers) {
1672 	case PCIE_PCIECAP_VER_1_0:
1673 		cap2 = 0;
1674 		ctl2 = 0;
1675 		break;
1676 	case PCIE_PCIECAP_VER_2_0:
1677 		if (bus_p->bus_cfg_hdl == NULL) {
1678 			cap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
1679 			    bus_p->bus_pcie_off + PCIE_LINKCAP2);
1680 			ctl2 = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1681 			    bus_p->bus_pcie_off + PCIE_LINKCTL2);
1682 		} else {
1683 			cap2 = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP2);
1684 			ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
1685 		}
1686 		if (cap2 == PCI_EINVAL32)
1687 			cap2 = 0;
1688 		if (ctl2 == PCI_EINVAL16)
1689 			ctl2 = 0;
1690 		break;
1691 	default:
1692 		/* Don't try and handle an unknown version */
1693 		return;
1694 	}
1695 
1696 	if (bus_p->bus_cfg_hdl == NULL) {
1697 		status = pci_cfgacc_get16(rcdip, bus_p->bus_bdf,
1698 		    bus_p->bus_pcie_off + PCIE_LINKSTS);
1699 		cap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
1700 		    bus_p->bus_pcie_off + PCIE_LINKCAP);
1701 	} else {
1702 		status = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
1703 		cap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
1704 	}
1705 	if (status == PCI_EINVAL16 || cap == PCI_EINVAL32)
1706 		return;
1707 
1708 	mutex_enter(&bus_p->bus_speed_mutex);
1709 
1710 	switch (status & PCIE_LINKSTS_SPEED_MASK) {
1711 	case PCIE_LINKSTS_SPEED_2_5:
1712 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_2_5;
1713 		break;
1714 	case PCIE_LINKSTS_SPEED_5:
1715 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_5;
1716 		break;
1717 	case PCIE_LINKSTS_SPEED_8:
1718 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_8;
1719 		break;
1720 	case PCIE_LINKSTS_SPEED_16:
1721 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_16;
1722 		break;
1723 	case PCIE_LINKSTS_SPEED_32:
1724 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_32;
1725 		break;
1726 	case PCIE_LINKSTS_SPEED_64:
1727 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_64;
1728 		break;
1729 	default:
1730 		bus_p->bus_cur_speed = PCIE_LINK_SPEED_UNKNOWN;
1731 		break;
1732 	}
1733 
1734 	switch (status & PCIE_LINKSTS_NEG_WIDTH_MASK) {
1735 	case PCIE_LINKSTS_NEG_WIDTH_X1:
1736 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X1;
1737 		break;
1738 	case PCIE_LINKSTS_NEG_WIDTH_X2:
1739 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X2;
1740 		break;
1741 	case PCIE_LINKSTS_NEG_WIDTH_X4:
1742 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X4;
1743 		break;
1744 	case PCIE_LINKSTS_NEG_WIDTH_X8:
1745 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X8;
1746 		break;
1747 	case PCIE_LINKSTS_NEG_WIDTH_X12:
1748 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X12;
1749 		break;
1750 	case PCIE_LINKSTS_NEG_WIDTH_X16:
1751 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X16;
1752 		break;
1753 	case PCIE_LINKSTS_NEG_WIDTH_X32:
1754 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_X32;
1755 		break;
1756 	default:
1757 		bus_p->bus_cur_width = PCIE_LINK_WIDTH_UNKNOWN;
1758 		break;
1759 	}
1760 
1761 	switch (cap & PCIE_LINKCAP_MAX_WIDTH_MASK) {
1762 	case PCIE_LINKCAP_MAX_WIDTH_X1:
1763 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X1;
1764 		break;
1765 	case PCIE_LINKCAP_MAX_WIDTH_X2:
1766 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X2;
1767 		break;
1768 	case PCIE_LINKCAP_MAX_WIDTH_X4:
1769 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X4;
1770 		break;
1771 	case PCIE_LINKCAP_MAX_WIDTH_X8:
1772 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X8;
1773 		break;
1774 	case PCIE_LINKCAP_MAX_WIDTH_X12:
1775 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X12;
1776 		break;
1777 	case PCIE_LINKCAP_MAX_WIDTH_X16:
1778 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X16;
1779 		break;
1780 	case PCIE_LINKCAP_MAX_WIDTH_X32:
1781 		bus_p->bus_max_width = PCIE_LINK_WIDTH_X32;
1782 		break;
1783 	default:
1784 		bus_p->bus_max_width = PCIE_LINK_WIDTH_UNKNOWN;
1785 		break;
1786 	}
1787 
1788 	/*
1789 	 * If we have the Link Capabilities 2, then we can get the supported
1790 	 * speeds from it and treat the bits in Link Capabilities 1 as the
1791 	 * maximum. If we don't, then we need to follow the Implementation Note
1792 	 * in the standard under Link Capabilities 2. Effectively, this means
1793 	 * that if the value of 10b is set in Link Capabilities register, that
1794 	 * it supports both 2.5 and 5 GT/s speeds.
1795 	 */
1796 	if (cap2 != 0) {
1797 		if (cap2 & PCIE_LINKCAP2_SPEED_2_5)
1798 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_2_5;
1799 		if (cap2 & PCIE_LINKCAP2_SPEED_5)
1800 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_5;
1801 		if (cap2 & PCIE_LINKCAP2_SPEED_8)
1802 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_8;
1803 		if (cap2 & PCIE_LINKCAP2_SPEED_16)
1804 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_16;
1805 		if (cap2 & PCIE_LINKCAP2_SPEED_32)
1806 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_32;
1807 		if (cap2 & PCIE_LINKCAP2_SPEED_64)
1808 			bus_p->bus_sup_speed |= PCIE_LINK_SPEED_64;
1809 
1810 		switch (cap & PCIE_LINKCAP_MAX_SPEED_MASK) {
1811 		case PCIE_LINKCAP_MAX_SPEED_2_5:
1812 			bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5;
1813 			break;
1814 		case PCIE_LINKCAP_MAX_SPEED_5:
1815 			bus_p->bus_max_speed = PCIE_LINK_SPEED_5;
1816 			break;
1817 		case PCIE_LINKCAP_MAX_SPEED_8:
1818 			bus_p->bus_max_speed = PCIE_LINK_SPEED_8;
1819 			break;
1820 		case PCIE_LINKCAP_MAX_SPEED_16:
1821 			bus_p->bus_max_speed = PCIE_LINK_SPEED_16;
1822 			break;
1823 		case PCIE_LINKCAP_MAX_SPEED_32:
1824 			bus_p->bus_max_speed = PCIE_LINK_SPEED_32;
1825 			break;
1826 		case PCIE_LINKCAP_MAX_SPEED_64:
1827 			bus_p->bus_max_speed = PCIE_LINK_SPEED_64;
1828 			break;
1829 		default:
1830 			bus_p->bus_max_speed = PCIE_LINK_SPEED_UNKNOWN;
1831 			break;
1832 		}
1833 	} else {
1834 		if (cap & PCIE_LINKCAP_MAX_SPEED_5) {
1835 			bus_p->bus_max_speed = PCIE_LINK_SPEED_5;
1836 			bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5 |
1837 			    PCIE_LINK_SPEED_5;
1838 		} else if (cap & PCIE_LINKCAP_MAX_SPEED_2_5) {
1839 			bus_p->bus_max_speed = PCIE_LINK_SPEED_2_5;
1840 			bus_p->bus_sup_speed = PCIE_LINK_SPEED_2_5;
1841 		}
1842 	}
1843 
1844 	switch (ctl2 & PCIE_LINKCTL2_TARGET_SPEED_MASK) {
1845 	case PCIE_LINKCTL2_TARGET_SPEED_2_5:
1846 		bus_p->bus_target_speed = PCIE_LINK_SPEED_2_5;
1847 		break;
1848 	case PCIE_LINKCTL2_TARGET_SPEED_5:
1849 		bus_p->bus_target_speed = PCIE_LINK_SPEED_5;
1850 		break;
1851 	case PCIE_LINKCTL2_TARGET_SPEED_8:
1852 		bus_p->bus_target_speed = PCIE_LINK_SPEED_8;
1853 		break;
1854 	case PCIE_LINKCTL2_TARGET_SPEED_16:
1855 		bus_p->bus_target_speed = PCIE_LINK_SPEED_16;
1856 		break;
1857 	case PCIE_LINKCTL2_TARGET_SPEED_32:
1858 		bus_p->bus_target_speed = PCIE_LINK_SPEED_32;
1859 		break;
1860 	case PCIE_LINKCTL2_TARGET_SPEED_64:
1861 		bus_p->bus_target_speed = PCIE_LINK_SPEED_64;
1862 		break;
1863 	default:
1864 		bus_p->bus_target_speed = PCIE_LINK_SPEED_UNKNOWN;
1865 		break;
1866 	}
1867 
1868 	pcie_speeds_to_devinfo(dip, bus_p);
1869 	mutex_exit(&bus_p->bus_speed_mutex);
1870 }
1871 
1872 /*
1873  * partially init pcie_bus_t for device (dip,bdf) for accessing pci
1874  * config space
1875  *
1876  * This routine is invoked during boot, either after creating a devinfo node
1877  * (x86 case) or during px driver attach (sparc case); it is also invoked
1878  * in hotplug context after a devinfo node is created.
1879  *
1880  * The fields that are bracketed are initialized if flag PCIE_BUS_INITIAL
1881  * is set:
1882  *
1883  * dev_info_t *		<bus_dip>
1884  * dev_info_t *		<bus_rp_dip>
1885  * ddi_acc_handle_t	bus_cfg_hdl
1886  * uint_t		bus_fm_flags
1887  * pcie_req_id_t	<bus_bdf>
1888  * pcie_req_id_t	<bus_rp_bdf>
1889  * uint32_t		<bus_dev_ven_id>
1890  * uint8_t		<bus_rev_id>
1891  * uint8_t		<bus_hdr_type>
1892  * uint16_t		<bus_dev_type>
1893  * uint8_t		<bus_bdg_secbus
1894  * uint16_t		<bus_pcie_off>
1895  * uint16_t		<bus_aer_off>
1896  * uint16_t		<bus_pcix_off>
1897  * uint16_t		<bus_ecc_ver>
1898  * pci_bus_range_t	bus_bus_range
1899  * ppb_ranges_t	*	bus_addr_ranges
1900  * int			bus_addr_entries
1901  * pci_regspec_t *	bus_assigned_addr
1902  * int			bus_assigned_entries
1903  * pf_data_t *		bus_pfd
1904  * pcie_domain_t *	bus_dom
1905  * int			bus_mps
1906  * uint64_t		bus_cfgacc_base
1907  * void	*		bus_plat_private
1908  *
1909  * The fields that are bracketed are initialized if flag PCIE_BUS_FINAL
1910  * is set:
1911  *
1912  * dev_info_t *		bus_dip
1913  * dev_info_t *		bus_rp_dip
1914  * ddi_acc_handle_t	bus_cfg_hdl
1915  * uint_t		bus_fm_flags
1916  * pcie_req_id_t	bus_bdf
1917  * pcie_req_id_t	bus_rp_bdf
1918  * uint32_t		bus_dev_ven_id
1919  * uint8_t		bus_rev_id
1920  * uint8_t		bus_hdr_type
1921  * uint16_t		bus_dev_type
1922  * uint8_t		<bus_bdg_secbus>
1923  * uint16_t		bus_pcie_off
1924  * uint16_t		bus_aer_off
1925  * uint16_t		bus_pcix_off
1926  * uint16_t		bus_ecc_ver
1927  * pci_bus_range_t	<bus_bus_range>
1928  * ppb_ranges_t	*	<bus_addr_ranges>
1929  * int			<bus_addr_entries>
1930  * pci_regspec_t *	<bus_assigned_addr>
1931  * int			<bus_assigned_entries>
1932  * pf_data_t *		<bus_pfd>
1933  * pcie_domain_t *	bus_dom
1934  * int			bus_mps
1935  * uint64_t		bus_cfgacc_base
1936  * void	*		<bus_plat_private>
1937  */
1938 
1939 pcie_bus_t *
1940 pcie_init_bus(dev_info_t *dip, pcie_req_id_t bdf, uint8_t flags)
1941 {
1942 	uint16_t	status, base, baseptr, num_cap;
1943 	uint32_t	capid;
1944 	int		range_size;
1945 	pcie_bus_t	*bus_p = NULL;
1946 	dev_info_t	*rcdip;
1947 	dev_info_t	*pdip;
1948 	const char	*errstr = NULL;
1949 
1950 	if (!(flags & PCIE_BUS_INITIAL))
1951 		goto initial_done;
1952 
1953 	bus_p = kmem_zalloc(sizeof (pcie_bus_t), KM_SLEEP);
1954 
1955 	bus_p->bus_dip = dip;
1956 	bus_p->bus_bdf = bdf;
1957 
1958 	rcdip = pcie_get_rc_dip(dip);
1959 	ASSERT(rcdip != NULL);
1960 
1961 	/* Save the Vendor ID, Device ID and revision ID */
1962 	bus_p->bus_dev_ven_id = pci_cfgacc_get32(rcdip, bdf, PCI_CONF_VENID);
1963 	bus_p->bus_rev_id = pci_cfgacc_get8(rcdip, bdf, PCI_CONF_REVID);
1964 	/* Save the Header Type */
1965 	bus_p->bus_hdr_type = pci_cfgacc_get8(rcdip, bdf, PCI_CONF_HEADER);
1966 	bus_p->bus_hdr_type &= PCI_HEADER_TYPE_M;
1967 
1968 	/*
1969 	 * Figure out the device type and all the relavant capability offsets
1970 	 */
1971 	/* set default value */
1972 	bus_p->bus_dev_type = PCIE_PCIECAP_DEV_TYPE_PCI_PSEUDO;
1973 
1974 	status = pci_cfgacc_get16(rcdip, bdf, PCI_CONF_STAT);
1975 	if (status == PCI_CAP_EINVAL16 || !(status & PCI_STAT_CAP))
1976 		goto caps_done; /* capability not supported */
1977 
1978 	/* Relevant conventional capabilities first */
1979 
1980 	/* Conventional caps: PCI_CAP_ID_PCI_E, PCI_CAP_ID_PCIX */
1981 	num_cap = 2;
1982 
1983 	switch (bus_p->bus_hdr_type) {
1984 	case PCI_HEADER_ZERO:
1985 		baseptr = PCI_CONF_CAP_PTR;
1986 		break;
1987 	case PCI_HEADER_PPB:
1988 		baseptr = PCI_BCNF_CAP_PTR;
1989 		break;
1990 	case PCI_HEADER_CARDBUS:
1991 		baseptr = PCI_CBUS_CAP_PTR;
1992 		break;
1993 	default:
1994 		cmn_err(CE_WARN, "%s: unexpected pci header type:%x",
1995 		    __func__, bus_p->bus_hdr_type);
1996 		goto caps_done;
1997 	}
1998 
1999 	base = baseptr;
2000 	for (base = pci_cfgacc_get8(rcdip, bdf, base); base && num_cap;
2001 	    base = pci_cfgacc_get8(rcdip, bdf, base + PCI_CAP_NEXT_PTR)) {
2002 		capid = pci_cfgacc_get8(rcdip, bdf, base);
2003 		uint16_t pcap;
2004 
2005 		switch (capid) {
2006 		case PCI_CAP_ID_PCI_E:
2007 			bus_p->bus_pcie_off = base;
2008 			pcap = pci_cfgacc_get16(rcdip, bdf, base +
2009 			    PCIE_PCIECAP);
2010 			bus_p->bus_dev_type = pcap & PCIE_PCIECAP_DEV_TYPE_MASK;
2011 			bus_p->bus_pcie_vers = pcap & PCIE_PCIECAP_VER_MASK;
2012 
2013 			/* Check and save PCIe hotplug capability information */
2014 			if ((PCIE_IS_RP(bus_p) || PCIE_IS_SWD(bus_p)) &&
2015 			    (pci_cfgacc_get16(rcdip, bdf, base + PCIE_PCIECAP)
2016 			    & PCIE_PCIECAP_SLOT_IMPL) &&
2017 			    (pci_cfgacc_get32(rcdip, bdf, base + PCIE_SLOTCAP)
2018 			    & PCIE_SLOTCAP_HP_CAPABLE))
2019 				bus_p->bus_hp_sup_modes |= PCIE_NATIVE_HP_MODE;
2020 
2021 			num_cap--;
2022 			break;
2023 		case PCI_CAP_ID_PCIX:
2024 			bus_p->bus_pcix_off = base;
2025 			if (PCIE_IS_BDG(bus_p))
2026 				bus_p->bus_ecc_ver =
2027 				    pci_cfgacc_get16(rcdip, bdf, base +
2028 				    PCI_PCIX_SEC_STATUS) & PCI_PCIX_VER_MASK;
2029 			else
2030 				bus_p->bus_ecc_ver =
2031 				    pci_cfgacc_get16(rcdip, bdf, base +
2032 				    PCI_PCIX_COMMAND) & PCI_PCIX_VER_MASK;
2033 			num_cap--;
2034 			break;
2035 		default:
2036 			break;
2037 		}
2038 	}
2039 
2040 	/* Check and save PCI hotplug (SHPC) capability information */
2041 	if (PCIE_IS_BDG(bus_p)) {
2042 		base = baseptr;
2043 		for (base = pci_cfgacc_get8(rcdip, bdf, base);
2044 		    base; base = pci_cfgacc_get8(rcdip, bdf,
2045 		    base + PCI_CAP_NEXT_PTR)) {
2046 			capid = pci_cfgacc_get8(rcdip, bdf, base);
2047 			if (capid == PCI_CAP_ID_PCI_HOTPLUG) {
2048 				bus_p->bus_pci_hp_off = base;
2049 				bus_p->bus_hp_sup_modes |= PCIE_PCI_HP_MODE;
2050 				break;
2051 			}
2052 		}
2053 	}
2054 
2055 	/* Then, relevant extended capabilities */
2056 
2057 	if (!PCIE_IS_PCIE(bus_p))
2058 		goto caps_done;
2059 
2060 	/* Extended caps: PCIE_EXT_CAP_ID_AER */
2061 	for (base = PCIE_EXT_CAP; base; base = (capid >>
2062 	    PCIE_EXT_CAP_NEXT_PTR_SHIFT) & PCIE_EXT_CAP_NEXT_PTR_MASK) {
2063 		capid = pci_cfgacc_get32(rcdip, bdf, base);
2064 		if (capid == PCI_CAP_EINVAL32)
2065 			break;
2066 		switch ((capid >> PCIE_EXT_CAP_ID_SHIFT) &
2067 		    PCIE_EXT_CAP_ID_MASK) {
2068 		case PCIE_EXT_CAP_ID_AER:
2069 			bus_p->bus_aer_off = base;
2070 			break;
2071 		case PCIE_EXT_CAP_ID_DEV3:
2072 			bus_p->bus_dev3_off = base;
2073 			break;
2074 		}
2075 	}
2076 
2077 caps_done:
2078 	/* save RP dip and RP bdf */
2079 	if (PCIE_IS_RP(bus_p)) {
2080 		bus_p->bus_rp_dip = dip;
2081 		bus_p->bus_rp_bdf = bus_p->bus_bdf;
2082 
2083 		bus_p->bus_fab = PCIE_ZALLOC(pcie_fabric_data_t);
2084 	} else {
2085 		for (pdip = ddi_get_parent(dip); pdip;
2086 		    pdip = ddi_get_parent(pdip)) {
2087 			pcie_bus_t *parent_bus_p = PCIE_DIP2BUS(pdip);
2088 
2089 			/*
2090 			 * If RP dip and RP bdf in parent's bus_t have
2091 			 * been initialized, simply use these instead of
2092 			 * continuing up to the RC.
2093 			 */
2094 			if (parent_bus_p->bus_rp_dip != NULL) {
2095 				bus_p->bus_rp_dip = parent_bus_p->bus_rp_dip;
2096 				bus_p->bus_rp_bdf = parent_bus_p->bus_rp_bdf;
2097 				break;
2098 			}
2099 
2100 			/*
2101 			 * When debugging be aware that some NVIDIA x86
2102 			 * architectures have 2 nodes for each RP, One at Bus
2103 			 * 0x0 and one at Bus 0x80.  The requester is from Bus
2104 			 * 0x80
2105 			 */
2106 			if (PCIE_IS_ROOT(parent_bus_p)) {
2107 				bus_p->bus_rp_dip = pdip;
2108 				bus_p->bus_rp_bdf = parent_bus_p->bus_bdf;
2109 				break;
2110 			}
2111 		}
2112 	}
2113 
2114 	bus_p->bus_soft_state = PCI_SOFT_STATE_CLOSED;
2115 	(void) atomic_swap_uint(&bus_p->bus_fm_flags, 0);
2116 
2117 	ndi_set_bus_private(dip, B_TRUE, DEVI_PORT_TYPE_PCI, (void *)bus_p);
2118 
2119 	if (PCIE_IS_HOTPLUG_CAPABLE(dip))
2120 		(void) ndi_prop_create_boolean(DDI_DEV_T_NONE, dip,
2121 		    "hotplug-capable");
2122 
2123 initial_done:
2124 	if (!(flags & PCIE_BUS_FINAL))
2125 		goto final_done;
2126 
2127 	/* already initialized? */
2128 	bus_p = PCIE_DIP2BUS(dip);
2129 
2130 	/* Save the Range information if device is a switch/bridge */
2131 	if (PCIE_IS_BDG(bus_p)) {
2132 		/* get "bus_range" property */
2133 		range_size = sizeof (pci_bus_range_t);
2134 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2135 		    "bus-range", (caddr_t)&bus_p->bus_bus_range, &range_size)
2136 		    != DDI_PROP_SUCCESS) {
2137 			errstr = "Cannot find \"bus-range\" property";
2138 			cmn_err(CE_WARN,
2139 			    "PCIE init err info failed BDF 0x%x:%s\n",
2140 			    bus_p->bus_bdf, errstr);
2141 		}
2142 
2143 		/* get secondary bus number */
2144 		rcdip = pcie_get_rc_dip(dip);
2145 		ASSERT(rcdip != NULL);
2146 
2147 		bus_p->bus_bdg_secbus = pci_cfgacc_get8(rcdip,
2148 		    bus_p->bus_bdf, PCI_BCNF_SECBUS);
2149 
2150 		/* Get "ranges" property */
2151 		if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2152 		    "ranges", (caddr_t)&bus_p->bus_addr_ranges,
2153 		    &bus_p->bus_addr_entries) != DDI_PROP_SUCCESS)
2154 			bus_p->bus_addr_entries = 0;
2155 		bus_p->bus_addr_entries /= sizeof (ppb_ranges_t);
2156 	}
2157 
2158 	/* save "assigned-addresses" property array, ignore failues */
2159 	if (ddi_getlongprop(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2160 	    "assigned-addresses", (caddr_t)&bus_p->bus_assigned_addr,
2161 	    &bus_p->bus_assigned_entries) == DDI_PROP_SUCCESS)
2162 		bus_p->bus_assigned_entries /= sizeof (pci_regspec_t);
2163 	else
2164 		bus_p->bus_assigned_entries = 0;
2165 
2166 	pcie_init_pfd(dip);
2167 
2168 	pcie_init_plat(dip);
2169 
2170 	pcie_capture_speeds(dip);
2171 
2172 final_done:
2173 
2174 	PCIE_DBG("Add %s(dip 0x%p, bdf 0x%x, secbus 0x%x)\n",
2175 	    ddi_driver_name(dip), (void *)dip, bus_p->bus_bdf,
2176 	    bus_p->bus_bdg_secbus);
2177 #ifdef DEBUG
2178 	if (bus_p != NULL) {
2179 		pcie_print_bus(bus_p);
2180 	}
2181 #endif
2182 
2183 	return (bus_p);
2184 }
2185 
2186 /*
2187  * Invoked before destroying devinfo node, mostly during hotplug
2188  * operation to free pcie_bus_t data structure
2189  */
2190 /* ARGSUSED */
2191 void
2192 pcie_fini_bus(dev_info_t *dip, uint8_t flags)
2193 {
2194 	pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2195 	ASSERT(bus_p);
2196 
2197 	if (flags & PCIE_BUS_INITIAL) {
2198 		pcie_fini_plat(dip);
2199 		pcie_fini_pfd(dip);
2200 
2201 		if (PCIE_IS_RP(bus_p)) {
2202 			kmem_free(bus_p->bus_fab, sizeof (pcie_fabric_data_t));
2203 			bus_p->bus_fab = NULL;
2204 		}
2205 
2206 		kmem_free(bus_p->bus_assigned_addr,
2207 		    (sizeof (pci_regspec_t) * bus_p->bus_assigned_entries));
2208 		kmem_free(bus_p->bus_addr_ranges,
2209 		    (sizeof (ppb_ranges_t) * bus_p->bus_addr_entries));
2210 		/* zero out the fields that have been destroyed */
2211 		bus_p->bus_assigned_addr = NULL;
2212 		bus_p->bus_addr_ranges = NULL;
2213 		bus_p->bus_assigned_entries = 0;
2214 		bus_p->bus_addr_entries = 0;
2215 	}
2216 
2217 	if (flags & PCIE_BUS_FINAL) {
2218 		if (PCIE_IS_HOTPLUG_CAPABLE(dip)) {
2219 			(void) ndi_prop_remove(DDI_DEV_T_NONE, dip,
2220 			    "hotplug-capable");
2221 		}
2222 
2223 		ndi_set_bus_private(dip, B_TRUE, 0, NULL);
2224 		kmem_free(bus_p, sizeof (pcie_bus_t));
2225 	}
2226 }
2227 
2228 int
2229 pcie_postattach_child(dev_info_t *cdip)
2230 {
2231 	pcie_bus_t *bus_p = PCIE_DIP2BUS(cdip);
2232 
2233 	if (!bus_p)
2234 		return (DDI_FAILURE);
2235 
2236 	return (pcie_enable_ce(cdip));
2237 }
2238 
2239 /*
2240  * PCI-Express child device de-initialization.
2241  * This function disables generic pci-express interrupts and error
2242  * handling.
2243  */
2244 void
2245 pcie_uninitchild(dev_info_t *cdip)
2246 {
2247 	pcie_disable_errors(cdip);
2248 	pcie_fini_cfghdl(cdip);
2249 	pcie_fini_dom(cdip);
2250 }
2251 
2252 /*
2253  * find the root complex dip
2254  */
2255 dev_info_t *
2256 pcie_get_rc_dip(dev_info_t *dip)
2257 {
2258 	dev_info_t *rcdip;
2259 	pcie_bus_t *rc_bus_p;
2260 
2261 	for (rcdip = ddi_get_parent(dip); rcdip;
2262 	    rcdip = ddi_get_parent(rcdip)) {
2263 		rc_bus_p = PCIE_DIP2BUS(rcdip);
2264 		if (rc_bus_p && PCIE_IS_RC(rc_bus_p))
2265 			break;
2266 	}
2267 
2268 	return (rcdip);
2269 }
2270 
2271 boolean_t
2272 pcie_is_pci_device(dev_info_t *dip)
2273 {
2274 	dev_info_t	*pdip;
2275 	char		*device_type;
2276 
2277 	pdip = ddi_get_parent(dip);
2278 	if (pdip == NULL)
2279 		return (B_FALSE);
2280 
2281 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, DDI_PROP_DONTPASS,
2282 	    "device_type", &device_type) != DDI_PROP_SUCCESS)
2283 		return (B_FALSE);
2284 
2285 	if (strcmp(device_type, "pciex") != 0 &&
2286 	    strcmp(device_type, "pci") != 0) {
2287 		ddi_prop_free(device_type);
2288 		return (B_FALSE);
2289 	}
2290 
2291 	ddi_prop_free(device_type);
2292 	return (B_TRUE);
2293 }
2294 
2295 typedef struct {
2296 	boolean_t	init;
2297 	uint8_t		flags;
2298 } pcie_bus_arg_t;
2299 
2300 /*ARGSUSED*/
2301 static int
2302 pcie_fab_do_init_fini(dev_info_t *dip, void *arg)
2303 {
2304 	pcie_req_id_t	bdf;
2305 	pcie_bus_arg_t	*bus_arg = (pcie_bus_arg_t *)arg;
2306 
2307 	if (!pcie_is_pci_device(dip))
2308 		goto out;
2309 
2310 	if (bus_arg->init) {
2311 		if (pcie_get_bdf_from_dip(dip, &bdf) != DDI_SUCCESS)
2312 			goto out;
2313 
2314 		(void) pcie_init_bus(dip, bdf, bus_arg->flags);
2315 	} else {
2316 		(void) pcie_fini_bus(dip, bus_arg->flags);
2317 	}
2318 
2319 	return (DDI_WALK_CONTINUE);
2320 
2321 out:
2322 	return (DDI_WALK_PRUNECHILD);
2323 }
2324 
2325 void
2326 pcie_fab_init_bus(dev_info_t *rcdip, uint8_t flags)
2327 {
2328 	int		circular_count;
2329 	dev_info_t	*dip = ddi_get_child(rcdip);
2330 	pcie_bus_arg_t	arg;
2331 
2332 	arg.init = B_TRUE;
2333 	arg.flags = flags;
2334 
2335 	ndi_devi_enter(rcdip, &circular_count);
2336 	ddi_walk_devs(dip, pcie_fab_do_init_fini, &arg);
2337 	ndi_devi_exit(rcdip, circular_count);
2338 }
2339 
2340 void
2341 pcie_fab_fini_bus(dev_info_t *rcdip, uint8_t flags)
2342 {
2343 	int		circular_count;
2344 	dev_info_t	*dip = ddi_get_child(rcdip);
2345 	pcie_bus_arg_t	arg;
2346 
2347 	arg.init = B_FALSE;
2348 	arg.flags = flags;
2349 
2350 	ndi_devi_enter(rcdip, &circular_count);
2351 	ddi_walk_devs(dip, pcie_fab_do_init_fini, &arg);
2352 	ndi_devi_exit(rcdip, circular_count);
2353 }
2354 
2355 void
2356 pcie_enable_errors(dev_info_t *dip)
2357 {
2358 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
2359 	uint16_t	reg16, tmp16;
2360 	uint32_t	reg32, tmp32;
2361 
2362 	ASSERT(bus_p);
2363 
2364 	/*
2365 	 * Clear any pending errors
2366 	 */
2367 	pcie_clear_errors(dip);
2368 
2369 	if (!PCIE_IS_PCIE(bus_p))
2370 		return;
2371 
2372 	/*
2373 	 * Enable Baseline Error Handling but leave CE reporting off (poweron
2374 	 * default).
2375 	 */
2376 	if ((reg16 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL)) !=
2377 	    PCI_CAP_EINVAL16) {
2378 		tmp16 = (reg16 & (PCIE_DEVCTL_MAX_READ_REQ_MASK |
2379 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) |
2380 		    (pcie_devctl_default & ~(PCIE_DEVCTL_MAX_READ_REQ_MASK |
2381 		    PCIE_DEVCTL_MAX_PAYLOAD_MASK)) |
2382 		    (pcie_base_err_default & (~PCIE_DEVCTL_CE_REPORTING_EN));
2383 
2384 		PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, tmp16);
2385 		PCIE_DBG_CAP(dip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, reg16);
2386 	}
2387 
2388 	/* Enable Root Port Baseline Error Receiving */
2389 	if (PCIE_IS_ROOT(bus_p) &&
2390 	    (reg16 = PCIE_CAP_GET(16, bus_p, PCIE_ROOTCTL)) !=
2391 	    PCI_CAP_EINVAL16) {
2392 
2393 		tmp16 = pcie_serr_disable_flag ?
2394 		    (pcie_root_ctrl_default & ~PCIE_ROOT_SYS_ERR) :
2395 		    pcie_root_ctrl_default;
2396 		PCIE_CAP_PUT(16, bus_p, PCIE_ROOTCTL, tmp16);
2397 		PCIE_DBG_CAP(dip, bus_p, "ROOT DEVCTL", 16, PCIE_ROOTCTL,
2398 		    reg16);
2399 	}
2400 
2401 	/*
2402 	 * Enable PCI-Express Advanced Error Handling if Exists
2403 	 */
2404 	if (!PCIE_HAS_AER(bus_p))
2405 		return;
2406 
2407 	/* Set Uncorrectable Severity */
2408 	if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_UCE_SERV)) !=
2409 	    PCI_CAP_EINVAL32) {
2410 		tmp32 = pcie_aer_uce_severity;
2411 
2412 		PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_SERV, tmp32);
2413 		PCIE_DBG_AER(dip, bus_p, "AER UCE SEV", 32, PCIE_AER_UCE_SERV,
2414 		    reg32);
2415 	}
2416 
2417 	/* Enable Uncorrectable errors */
2418 	if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_UCE_MASK)) !=
2419 	    PCI_CAP_EINVAL32) {
2420 		tmp32 = pcie_aer_uce_mask;
2421 
2422 		PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_MASK, tmp32);
2423 		PCIE_DBG_AER(dip, bus_p, "AER UCE MASK", 32, PCIE_AER_UCE_MASK,
2424 		    reg32);
2425 	}
2426 
2427 	/* Enable ECRC generation and checking */
2428 	if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_CTL)) !=
2429 	    PCI_CAP_EINVAL32) {
2430 		tmp32 = reg32 | pcie_ecrc_value;
2431 		PCIE_AER_PUT(32, bus_p, PCIE_AER_CTL, tmp32);
2432 		PCIE_DBG_AER(dip, bus_p, "AER CTL", 32, PCIE_AER_CTL, reg32);
2433 	}
2434 
2435 	/* Enable Secondary Uncorrectable errors if this is a bridge */
2436 	if (!PCIE_IS_PCIE_BDG(bus_p))
2437 		goto root;
2438 
2439 	/* Set Uncorrectable Severity */
2440 	if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_SUCE_SERV)) !=
2441 	    PCI_CAP_EINVAL32) {
2442 		tmp32 = pcie_aer_suce_severity;
2443 
2444 		PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_SERV, tmp32);
2445 		PCIE_DBG_AER(dip, bus_p, "AER SUCE SEV", 32, PCIE_AER_SUCE_SERV,
2446 		    reg32);
2447 	}
2448 
2449 	if ((reg32 = PCIE_AER_GET(32, bus_p, PCIE_AER_SUCE_MASK)) !=
2450 	    PCI_CAP_EINVAL32) {
2451 		PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_MASK, pcie_aer_suce_mask);
2452 		PCIE_DBG_AER(dip, bus_p, "AER SUCE MASK", 32,
2453 		    PCIE_AER_SUCE_MASK, reg32);
2454 	}
2455 
2456 root:
2457 	/*
2458 	 * Enable Root Control this is a Root device
2459 	 */
2460 	if (!PCIE_IS_ROOT(bus_p))
2461 		return;
2462 
2463 	if ((reg16 = PCIE_AER_GET(16, bus_p, PCIE_AER_RE_CMD)) !=
2464 	    PCI_CAP_EINVAL16) {
2465 		PCIE_AER_PUT(16, bus_p, PCIE_AER_RE_CMD,
2466 		    pcie_root_error_cmd_default);
2467 		PCIE_DBG_AER(dip, bus_p, "AER Root Err Cmd", 16,
2468 		    PCIE_AER_RE_CMD, reg16);
2469 	}
2470 }
2471 
2472 /*
2473  * This function is used for enabling CE reporting and setting the AER CE mask.
2474  * When called from outside the pcie module it should always be preceded by
2475  * a call to pcie_enable_errors.
2476  */
2477 int
2478 pcie_enable_ce(dev_info_t *dip)
2479 {
2480 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
2481 	uint16_t	device_sts, device_ctl;
2482 	uint32_t	tmp_pcie_aer_ce_mask;
2483 
2484 	if (!PCIE_IS_PCIE(bus_p))
2485 		return (DDI_SUCCESS);
2486 
2487 	/*
2488 	 * The "pcie_ce_mask" property is used to control both the CE reporting
2489 	 * enable field in the device control register and the AER CE mask. We
2490 	 * leave CE reporting disabled if pcie_ce_mask is set to -1.
2491 	 */
2492 
2493 	tmp_pcie_aer_ce_mask = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, dip,
2494 	    DDI_PROP_DONTPASS, "pcie_ce_mask", pcie_aer_ce_mask);
2495 
2496 	if (tmp_pcie_aer_ce_mask == (uint32_t)-1) {
2497 		/*
2498 		 * Nothing to do since CE reporting has already been disabled.
2499 		 */
2500 		return (DDI_SUCCESS);
2501 	}
2502 
2503 	if (PCIE_HAS_AER(bus_p)) {
2504 		/* Enable AER CE */
2505 		PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_MASK, tmp_pcie_aer_ce_mask);
2506 		PCIE_DBG_AER(dip, bus_p, "AER CE MASK", 32, PCIE_AER_CE_MASK,
2507 		    0);
2508 
2509 		/* Clear any pending AER CE errors */
2510 		PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_STS, -1);
2511 	}
2512 
2513 	/* clear any pending CE errors */
2514 	if ((device_sts = PCIE_CAP_GET(16, bus_p, PCIE_DEVSTS)) !=
2515 	    PCI_CAP_EINVAL16)
2516 		PCIE_CAP_PUT(16, bus_p, PCIE_DEVSTS,
2517 		    device_sts & (~PCIE_DEVSTS_CE_DETECTED));
2518 
2519 	/* Enable CE reporting */
2520 	device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
2521 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL,
2522 	    (device_ctl & (~PCIE_DEVCTL_ERR_MASK)) | pcie_base_err_default);
2523 	PCIE_DBG_CAP(dip, bus_p, "DEVCTL", 16, PCIE_DEVCTL, device_ctl);
2524 
2525 	return (DDI_SUCCESS);
2526 }
2527 
2528 /* ARGSUSED */
2529 void
2530 pcie_disable_errors(dev_info_t *dip)
2531 {
2532 	pcie_bus_t	*bus_p = PCIE_DIP2BUS(dip);
2533 	uint16_t	device_ctl;
2534 	uint32_t	aer_reg;
2535 
2536 	if (!PCIE_IS_PCIE(bus_p))
2537 		return;
2538 
2539 	/*
2540 	 * Disable PCI-Express Baseline Error Handling
2541 	 */
2542 	device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL);
2543 	device_ctl &= ~PCIE_DEVCTL_ERR_MASK;
2544 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL, device_ctl);
2545 
2546 	/*
2547 	 * Disable PCI-Express Advanced Error Handling if Exists
2548 	 */
2549 	if (!PCIE_HAS_AER(bus_p))
2550 		goto root;
2551 
2552 	/* Disable Uncorrectable errors */
2553 	PCIE_AER_PUT(32, bus_p, PCIE_AER_UCE_MASK, PCIE_AER_UCE_BITS);
2554 
2555 	/* Disable Correctable errors */
2556 	PCIE_AER_PUT(32, bus_p, PCIE_AER_CE_MASK, PCIE_AER_CE_BITS);
2557 
2558 	/* Disable ECRC generation and checking */
2559 	if ((aer_reg = PCIE_AER_GET(32, bus_p, PCIE_AER_CTL)) !=
2560 	    PCI_CAP_EINVAL32) {
2561 		aer_reg &= ~(PCIE_AER_CTL_ECRC_GEN_ENA |
2562 		    PCIE_AER_CTL_ECRC_CHECK_ENA);
2563 
2564 		PCIE_AER_PUT(32, bus_p, PCIE_AER_CTL, aer_reg);
2565 	}
2566 	/*
2567 	 * Disable Secondary Uncorrectable errors if this is a bridge
2568 	 */
2569 	if (!PCIE_IS_PCIE_BDG(bus_p))
2570 		goto root;
2571 
2572 	PCIE_AER_PUT(32, bus_p, PCIE_AER_SUCE_MASK, PCIE_AER_SUCE_BITS);
2573 
2574 root:
2575 	/*
2576 	 * disable Root Control this is a Root device
2577 	 */
2578 	if (!PCIE_IS_ROOT(bus_p))
2579 		return;
2580 
2581 	if (!pcie_serr_disable_flag) {
2582 		device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_ROOTCTL);
2583 		device_ctl &= ~PCIE_ROOT_SYS_ERR;
2584 		PCIE_CAP_PUT(16, bus_p, PCIE_ROOTCTL, device_ctl);
2585 	}
2586 
2587 	if (!PCIE_HAS_AER(bus_p))
2588 		return;
2589 
2590 	if ((device_ctl = PCIE_CAP_GET(16, bus_p, PCIE_AER_RE_CMD)) !=
2591 	    PCI_CAP_EINVAL16) {
2592 		device_ctl &= ~pcie_root_error_cmd_default;
2593 		PCIE_CAP_PUT(16, bus_p, PCIE_AER_RE_CMD, device_ctl);
2594 	}
2595 }
2596 
2597 /*
2598  * Extract bdf from "reg" property.
2599  */
2600 int
2601 pcie_get_bdf_from_dip(dev_info_t *dip, pcie_req_id_t *bdf)
2602 {
2603 	pci_regspec_t	*regspec;
2604 	int		reglen;
2605 
2606 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
2607 	    "reg", (int **)&regspec, (uint_t *)&reglen) != DDI_SUCCESS)
2608 		return (DDI_FAILURE);
2609 
2610 	if (reglen < (sizeof (pci_regspec_t) / sizeof (int))) {
2611 		ddi_prop_free(regspec);
2612 		return (DDI_FAILURE);
2613 	}
2614 
2615 	/* Get phys_hi from first element.  All have same bdf. */
2616 	*bdf = (regspec->pci_phys_hi & (PCI_REG_BDFR_M ^ PCI_REG_REG_M)) >> 8;
2617 
2618 	ddi_prop_free(regspec);
2619 	return (DDI_SUCCESS);
2620 }
2621 
2622 dev_info_t *
2623 pcie_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
2624 {
2625 	dev_info_t *cdip = rdip;
2626 
2627 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
2628 		;
2629 
2630 	return (cdip);
2631 }
2632 
2633 uint32_t
2634 pcie_get_bdf_for_dma_xfer(dev_info_t *dip, dev_info_t *rdip)
2635 {
2636 	dev_info_t *cdip;
2637 
2638 	/*
2639 	 * As part of the probing, the PCI fcode interpreter may setup a DMA
2640 	 * request if a given card has a fcode on it using dip and rdip of the
2641 	 * hotplug connector i.e, dip and rdip of px/pcieb driver. In this
2642 	 * case, return a invalid value for the bdf since we cannot get to the
2643 	 * bdf value of the actual device which will be initiating this DMA.
2644 	 */
2645 	if (rdip == dip)
2646 		return (PCIE_INVALID_BDF);
2647 
2648 	cdip = pcie_get_my_childs_dip(dip, rdip);
2649 
2650 	/*
2651 	 * For a given rdip, return the bdf value of dip's (px or pcieb)
2652 	 * immediate child or secondary bus-id if dip is a PCIe2PCI bridge.
2653 	 *
2654 	 * XXX - For now, return a invalid bdf value for all PCI and PCI-X
2655 	 * devices since this needs more work.
2656 	 */
2657 	return (PCI_GET_PCIE2PCI_SECBUS(cdip) ?
2658 	    PCIE_INVALID_BDF : PCI_GET_BDF(cdip));
2659 }
2660 
2661 uint32_t
2662 pcie_get_aer_uce_mask()
2663 {
2664 	return (pcie_aer_uce_mask);
2665 }
2666 uint32_t
2667 pcie_get_aer_ce_mask()
2668 {
2669 	return (pcie_aer_ce_mask);
2670 }
2671 uint32_t
2672 pcie_get_aer_suce_mask()
2673 {
2674 	return (pcie_aer_suce_mask);
2675 }
2676 uint32_t
2677 pcie_get_serr_mask()
2678 {
2679 	return (pcie_serr_disable_flag);
2680 }
2681 
2682 void
2683 pcie_set_aer_uce_mask(uint32_t mask)
2684 {
2685 	pcie_aer_uce_mask = mask;
2686 	if (mask & PCIE_AER_UCE_UR)
2687 		pcie_base_err_default &= ~PCIE_DEVCTL_UR_REPORTING_EN;
2688 	else
2689 		pcie_base_err_default |= PCIE_DEVCTL_UR_REPORTING_EN;
2690 
2691 	if (mask & PCIE_AER_UCE_ECRC)
2692 		pcie_ecrc_value = 0;
2693 }
2694 
2695 void
2696 pcie_set_aer_ce_mask(uint32_t mask)
2697 {
2698 	pcie_aer_ce_mask = mask;
2699 }
2700 void
2701 pcie_set_aer_suce_mask(uint32_t mask)
2702 {
2703 	pcie_aer_suce_mask = mask;
2704 }
2705 void
2706 pcie_set_serr_mask(uint32_t mask)
2707 {
2708 	pcie_serr_disable_flag = mask;
2709 }
2710 
2711 /*
2712  * Is the rdip a child of dip.	Used for checking certain CTLOPS from bubbling
2713  * up erronously.  Ex.	ISA ctlops to a PCI-PCI Bridge.
2714  */
2715 boolean_t
2716 pcie_is_child(dev_info_t *dip, dev_info_t *rdip)
2717 {
2718 	dev_info_t	*cdip = ddi_get_child(dip);
2719 	for (; cdip; cdip = ddi_get_next_sibling(cdip))
2720 		if (cdip == rdip)
2721 			break;
2722 	return (cdip != NULL);
2723 }
2724 
2725 boolean_t
2726 pcie_is_link_disabled(dev_info_t *dip)
2727 {
2728 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2729 
2730 	if (PCIE_IS_PCIE(bus_p)) {
2731 		if (PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL) &
2732 		    PCIE_LINKCTL_LINK_DISABLE)
2733 			return (B_TRUE);
2734 	}
2735 	return (B_FALSE);
2736 }
2737 
2738 /*
2739  * Determines if there are any root ports attached to a root complex.
2740  *
2741  * dip - dip of root complex
2742  *
2743  * Returns - DDI_SUCCESS if there is at least one root port otherwise
2744  *	     DDI_FAILURE.
2745  */
2746 int
2747 pcie_root_port(dev_info_t *dip)
2748 {
2749 	int port_type;
2750 	uint16_t cap_ptr;
2751 	ddi_acc_handle_t config_handle;
2752 	dev_info_t *cdip = ddi_get_child(dip);
2753 
2754 	/*
2755 	 * Determine if any of the children of the passed in dip
2756 	 * are root ports.
2757 	 */
2758 	for (; cdip; cdip = ddi_get_next_sibling(cdip)) {
2759 
2760 		if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS)
2761 			continue;
2762 
2763 		if ((PCI_CAP_LOCATE(config_handle, PCI_CAP_ID_PCI_E,
2764 		    &cap_ptr)) == DDI_FAILURE) {
2765 			pci_config_teardown(&config_handle);
2766 			continue;
2767 		}
2768 
2769 		port_type = PCI_CAP_GET16(config_handle, 0, cap_ptr,
2770 		    PCIE_PCIECAP) & PCIE_PCIECAP_DEV_TYPE_MASK;
2771 
2772 		pci_config_teardown(&config_handle);
2773 
2774 		if (port_type == PCIE_PCIECAP_DEV_TYPE_ROOT)
2775 			return (DDI_SUCCESS);
2776 	}
2777 
2778 	/* No root ports were found */
2779 
2780 	return (DDI_FAILURE);
2781 }
2782 
2783 /*
2784  * Function that determines if a device a PCIe device.
2785  *
2786  * dip - dip of device.
2787  *
2788  * returns - DDI_SUCCESS if device is a PCIe device, otherwise DDI_FAILURE.
2789  */
2790 int
2791 pcie_dev(dev_info_t *dip)
2792 {
2793 	/* get parent device's device_type property */
2794 	char *device_type;
2795 	int rc = DDI_FAILURE;
2796 	dev_info_t *pdip = ddi_get_parent(dip);
2797 
2798 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip,
2799 	    DDI_PROP_DONTPASS, "device_type", &device_type)
2800 	    != DDI_PROP_SUCCESS) {
2801 		return (DDI_FAILURE);
2802 	}
2803 
2804 	if (strcmp(device_type, "pciex") == 0)
2805 		rc = DDI_SUCCESS;
2806 	else
2807 		rc = DDI_FAILURE;
2808 
2809 	ddi_prop_free(device_type);
2810 	return (rc);
2811 }
2812 
2813 /*
2814  * Function to map in a device's memory space.
2815  */
2816 static int
2817 pcie_map_phys(dev_info_t *dip, pci_regspec_t *phys_spec,
2818     caddr_t *addrp, ddi_acc_handle_t *handlep)
2819 {
2820 	ddi_map_req_t mr;
2821 	ddi_acc_hdl_t *hp;
2822 	int result;
2823 	ddi_device_acc_attr_t attr;
2824 
2825 	attr.devacc_attr_version = DDI_DEVICE_ATTR_V0;
2826 	attr.devacc_attr_endian_flags = DDI_STRUCTURE_LE_ACC;
2827 	attr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
2828 	attr.devacc_attr_access = DDI_CAUTIOUS_ACC;
2829 
2830 	*handlep = impl_acc_hdl_alloc(KM_SLEEP, NULL);
2831 	hp = impl_acc_hdl_get(*handlep);
2832 	hp->ah_vers = VERS_ACCHDL;
2833 	hp->ah_dip = dip;
2834 	hp->ah_rnumber = 0;
2835 	hp->ah_offset = 0;
2836 	hp->ah_len = 0;
2837 	hp->ah_acc = attr;
2838 
2839 	mr.map_op = DDI_MO_MAP_LOCKED;
2840 	mr.map_type = DDI_MT_REGSPEC;
2841 	mr.map_obj.rp = (struct regspec *)phys_spec;
2842 	mr.map_prot = PROT_READ | PROT_WRITE;
2843 	mr.map_flags = DDI_MF_KERNEL_MAPPING;
2844 	mr.map_handlep = hp;
2845 	mr.map_vers = DDI_MAP_VERSION;
2846 
2847 	result = ddi_map(dip, &mr, 0, 0, addrp);
2848 
2849 	if (result != DDI_SUCCESS) {
2850 		impl_acc_hdl_free(*handlep);
2851 		*handlep = (ddi_acc_handle_t)NULL;
2852 	} else {
2853 		hp->ah_addr = *addrp;
2854 	}
2855 
2856 	return (result);
2857 }
2858 
2859 /*
2860  * Map out memory that was mapped in with pcie_map_phys();
2861  */
2862 static void
2863 pcie_unmap_phys(ddi_acc_handle_t *handlep,  pci_regspec_t *ph)
2864 {
2865 	ddi_map_req_t mr;
2866 	ddi_acc_hdl_t *hp;
2867 
2868 	hp = impl_acc_hdl_get(*handlep);
2869 	ASSERT(hp);
2870 
2871 	mr.map_op = DDI_MO_UNMAP;
2872 	mr.map_type = DDI_MT_REGSPEC;
2873 	mr.map_obj.rp = (struct regspec *)ph;
2874 	mr.map_prot = PROT_READ | PROT_WRITE;
2875 	mr.map_flags = DDI_MF_KERNEL_MAPPING;
2876 	mr.map_handlep = hp;
2877 	mr.map_vers = DDI_MAP_VERSION;
2878 
2879 	(void) ddi_map(hp->ah_dip, &mr, hp->ah_offset,
2880 	    hp->ah_len, &hp->ah_addr);
2881 
2882 	impl_acc_hdl_free(*handlep);
2883 	*handlep = (ddi_acc_handle_t)NULL;
2884 }
2885 
2886 void
2887 pcie_set_rber_fatal(dev_info_t *dip, boolean_t val)
2888 {
2889 	pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2890 	bus_p->bus_pfd->pe_rber_fatal = val;
2891 }
2892 
2893 /*
2894  * Return parent Root Port's pe_rber_fatal value.
2895  */
2896 boolean_t
2897 pcie_get_rber_fatal(dev_info_t *dip)
2898 {
2899 	pcie_bus_t *bus_p = PCIE_DIP2UPBUS(dip);
2900 	pcie_bus_t *rp_bus_p = PCIE_DIP2UPBUS(bus_p->bus_rp_dip);
2901 	return (rp_bus_p->bus_pfd->pe_rber_fatal);
2902 }
2903 
2904 int
2905 pcie_ari_supported(dev_info_t *dip)
2906 {
2907 	uint32_t devcap2;
2908 	uint16_t pciecap;
2909 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2910 	uint8_t dev_type;
2911 
2912 	PCIE_DBG("pcie_ari_supported: dip=%p\n", dip);
2913 
2914 	if (bus_p == NULL)
2915 		return (PCIE_ARI_FORW_NOT_SUPPORTED);
2916 
2917 	dev_type = bus_p->bus_dev_type;
2918 
2919 	if ((dev_type != PCIE_PCIECAP_DEV_TYPE_DOWN) &&
2920 	    (dev_type != PCIE_PCIECAP_DEV_TYPE_ROOT))
2921 		return (PCIE_ARI_FORW_NOT_SUPPORTED);
2922 
2923 	if (pcie_disable_ari) {
2924 		PCIE_DBG("pcie_ari_supported: dip=%p: ARI Disabled\n", dip);
2925 		return (PCIE_ARI_FORW_NOT_SUPPORTED);
2926 	}
2927 
2928 	pciecap = PCIE_CAP_GET(16, bus_p, PCIE_PCIECAP);
2929 
2930 	if ((pciecap & PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0) {
2931 		PCIE_DBG("pcie_ari_supported: dip=%p: Not 2.0\n", dip);
2932 		return (PCIE_ARI_FORW_NOT_SUPPORTED);
2933 	}
2934 
2935 	devcap2 = PCIE_CAP_GET(32, bus_p, PCIE_DEVCAP2);
2936 
2937 	PCIE_DBG("pcie_ari_supported: dip=%p: DevCap2=0x%x\n",
2938 	    dip, devcap2);
2939 
2940 	if (devcap2 & PCIE_DEVCAP2_ARI_FORWARD) {
2941 		PCIE_DBG("pcie_ari_supported: "
2942 		    "dip=%p: ARI Forwarding is supported\n", dip);
2943 		return (PCIE_ARI_FORW_SUPPORTED);
2944 	}
2945 	return (PCIE_ARI_FORW_NOT_SUPPORTED);
2946 }
2947 
2948 int
2949 pcie_ari_enable(dev_info_t *dip)
2950 {
2951 	uint16_t devctl2;
2952 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2953 
2954 	PCIE_DBG("pcie_ari_enable: dip=%p\n", dip);
2955 
2956 	if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2957 		return (DDI_FAILURE);
2958 
2959 	devctl2 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL2);
2960 	devctl2 |= PCIE_DEVCTL2_ARI_FORWARD_EN;
2961 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL2, devctl2);
2962 
2963 	PCIE_DBG("pcie_ari_enable: dip=%p: writing 0x%x to DevCtl2\n",
2964 	    dip, devctl2);
2965 
2966 	return (DDI_SUCCESS);
2967 }
2968 
2969 int
2970 pcie_ari_disable(dev_info_t *dip)
2971 {
2972 	uint16_t devctl2;
2973 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2974 
2975 	PCIE_DBG("pcie_ari_disable: dip=%p\n", dip);
2976 
2977 	if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2978 		return (DDI_FAILURE);
2979 
2980 	devctl2 = PCIE_CAP_GET(16, bus_p, PCIE_DEVCTL2);
2981 	devctl2 &= ~PCIE_DEVCTL2_ARI_FORWARD_EN;
2982 	PCIE_CAP_PUT(16, bus_p, PCIE_DEVCTL2, devctl2);
2983 
2984 	PCIE_DBG("pcie_ari_disable: dip=%p: writing 0x%x to DevCtl2\n",
2985 	    dip, devctl2);
2986 
2987 	return (DDI_SUCCESS);
2988 }
2989 
2990 int
2991 pcie_ari_is_enabled(dev_info_t *dip)
2992 {
2993 	uint16_t devctl2;
2994 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
2995 
2996 	PCIE_DBG("pcie_ari_is_enabled: dip=%p\n", dip);
2997 
2998 	if (pcie_ari_supported(dip) == PCIE_ARI_FORW_NOT_SUPPORTED)
2999 		return (PCIE_ARI_FORW_DISABLED);
3000 
3001 	devctl2 = PCIE_CAP_GET(32, bus_p, PCIE_DEVCTL2);
3002 
3003 	PCIE_DBG("pcie_ari_is_enabled: dip=%p: DevCtl2=0x%x\n",
3004 	    dip, devctl2);
3005 
3006 	if (devctl2 & PCIE_DEVCTL2_ARI_FORWARD_EN) {
3007 		PCIE_DBG("pcie_ari_is_enabled: "
3008 		    "dip=%p: ARI Forwarding is enabled\n", dip);
3009 		return (PCIE_ARI_FORW_ENABLED);
3010 	}
3011 
3012 	return (PCIE_ARI_FORW_DISABLED);
3013 }
3014 
3015 int
3016 pcie_ari_device(dev_info_t *dip)
3017 {
3018 	ddi_acc_handle_t handle;
3019 	uint16_t cap_ptr;
3020 
3021 	PCIE_DBG("pcie_ari_device: dip=%p\n", dip);
3022 
3023 	/*
3024 	 * XXX - This function may be called before the bus_p structure
3025 	 * has been populated.  This code can be changed to remove
3026 	 * pci_config_setup()/pci_config_teardown() when the RFE
3027 	 * to populate the bus_p structures early in boot is putback.
3028 	 */
3029 
3030 	/* First make sure it is a PCIe device */
3031 
3032 	if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
3033 		return (PCIE_NOT_ARI_DEVICE);
3034 
3035 	if ((PCI_CAP_LOCATE(handle, PCI_CAP_ID_PCI_E, &cap_ptr))
3036 	    != DDI_SUCCESS) {
3037 		pci_config_teardown(&handle);
3038 		return (PCIE_NOT_ARI_DEVICE);
3039 	}
3040 
3041 	/* Locate the ARI Capability */
3042 
3043 	if ((PCI_CAP_LOCATE(handle, PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_ARI),
3044 	    &cap_ptr)) == DDI_FAILURE) {
3045 		pci_config_teardown(&handle);
3046 		return (PCIE_NOT_ARI_DEVICE);
3047 	}
3048 
3049 	/* ARI Capability was found so it must be a ARI device */
3050 	PCIE_DBG("pcie_ari_device: ARI Device dip=%p\n", dip);
3051 
3052 	pci_config_teardown(&handle);
3053 	return (PCIE_ARI_DEVICE);
3054 }
3055 
3056 int
3057 pcie_ari_get_next_function(dev_info_t *dip, int *func)
3058 {
3059 	uint32_t val;
3060 	uint16_t cap_ptr, next_function;
3061 	ddi_acc_handle_t handle;
3062 
3063 	/*
3064 	 * XXX - This function may be called before the bus_p structure
3065 	 * has been populated.  This code can be changed to remove
3066 	 * pci_config_setup()/pci_config_teardown() when the RFE
3067 	 * to populate the bus_p structures early in boot is putback.
3068 	 */
3069 
3070 	if (pci_config_setup(dip, &handle) != DDI_SUCCESS)
3071 		return (DDI_FAILURE);
3072 
3073 	if ((PCI_CAP_LOCATE(handle,
3074 	    PCI_CAP_XCFG_SPC(PCIE_EXT_CAP_ID_ARI), &cap_ptr)) == DDI_FAILURE) {
3075 		pci_config_teardown(&handle);
3076 		return (DDI_FAILURE);
3077 	}
3078 
3079 	val = PCI_CAP_GET32(handle, 0, cap_ptr, PCIE_ARI_CAP);
3080 
3081 	next_function = (val >> PCIE_ARI_CAP_NEXT_FUNC_SHIFT) &
3082 	    PCIE_ARI_CAP_NEXT_FUNC_MASK;
3083 
3084 	pci_config_teardown(&handle);
3085 
3086 	*func = next_function;
3087 
3088 	return (DDI_SUCCESS);
3089 }
3090 
3091 dev_info_t *
3092 pcie_func_to_dip(dev_info_t *dip, pcie_req_id_t function)
3093 {
3094 	pcie_req_id_t child_bdf;
3095 	dev_info_t *cdip;
3096 
3097 	for (cdip = ddi_get_child(dip); cdip;
3098 	    cdip = ddi_get_next_sibling(cdip)) {
3099 
3100 		if (pcie_get_bdf_from_dip(cdip, &child_bdf) == DDI_FAILURE)
3101 			return (NULL);
3102 
3103 		if ((child_bdf & PCIE_REQ_ID_ARI_FUNC_MASK) == function)
3104 			return (cdip);
3105 	}
3106 	return (NULL);
3107 }
3108 
3109 #ifdef	DEBUG
3110 
3111 static void
3112 pcie_print_bus(pcie_bus_t *bus_p)
3113 {
3114 	pcie_dbg("\tbus_dip = 0x%p\n", bus_p->bus_dip);
3115 	pcie_dbg("\tbus_fm_flags = 0x%x\n", bus_p->bus_fm_flags);
3116 
3117 	pcie_dbg("\tbus_bdf = 0x%x\n", bus_p->bus_bdf);
3118 	pcie_dbg("\tbus_dev_ven_id = 0x%x\n", bus_p->bus_dev_ven_id);
3119 	pcie_dbg("\tbus_rev_id = 0x%x\n", bus_p->bus_rev_id);
3120 	pcie_dbg("\tbus_hdr_type = 0x%x\n", bus_p->bus_hdr_type);
3121 	pcie_dbg("\tbus_dev_type = 0x%x\n", bus_p->bus_dev_type);
3122 	pcie_dbg("\tbus_bdg_secbus = 0x%x\n", bus_p->bus_bdg_secbus);
3123 	pcie_dbg("\tbus_pcie_off = 0x%x\n", bus_p->bus_pcie_off);
3124 	pcie_dbg("\tbus_aer_off = 0x%x\n", bus_p->bus_aer_off);
3125 	pcie_dbg("\tbus_pcix_off = 0x%x\n", bus_p->bus_pcix_off);
3126 	pcie_dbg("\tbus_ecc_ver = 0x%x\n", bus_p->bus_ecc_ver);
3127 }
3128 
3129 /*
3130  * For debugging purposes set pcie_dbg_print != 0 to see printf messages
3131  * during interrupt.
3132  *
3133  * When a proper solution is in place this code will disappear.
3134  * Potential solutions are:
3135  * o circular buffers
3136  * o taskq to print at lower pil
3137  */
3138 int pcie_dbg_print = 0;
3139 void
3140 pcie_dbg(char *fmt, ...)
3141 {
3142 	va_list ap;
3143 
3144 	if (!pcie_debug_flags) {
3145 		return;
3146 	}
3147 	va_start(ap, fmt);
3148 	if (servicing_interrupt()) {
3149 		if (pcie_dbg_print) {
3150 			prom_vprintf(fmt, ap);
3151 		}
3152 	} else {
3153 		prom_vprintf(fmt, ap);
3154 	}
3155 	va_end(ap);
3156 }
3157 #endif	/* DEBUG */
3158 
3159 #if defined(__x86)
3160 static void
3161 pcie_check_io_mem_range(ddi_acc_handle_t cfg_hdl, boolean_t *empty_io_range,
3162     boolean_t *empty_mem_range)
3163 {
3164 	uint8_t	class, subclass;
3165 	uint_t	val;
3166 
3167 	class = pci_config_get8(cfg_hdl, PCI_CONF_BASCLASS);
3168 	subclass = pci_config_get8(cfg_hdl, PCI_CONF_SUBCLASS);
3169 
3170 	if ((class == PCI_CLASS_BRIDGE) && (subclass == PCI_BRIDGE_PCI)) {
3171 		val = (((uint_t)pci_config_get8(cfg_hdl, PCI_BCNF_IO_BASE_LOW) &
3172 		    PCI_BCNF_IO_MASK) << 8);
3173 		/*
3174 		 * Assuming that a zero based io_range[0] implies an
3175 		 * invalid I/O range.  Likewise for mem_range[0].
3176 		 */
3177 		if (val == 0)
3178 			*empty_io_range = B_TRUE;
3179 		val = (((uint_t)pci_config_get16(cfg_hdl, PCI_BCNF_MEM_BASE) &
3180 		    PCI_BCNF_MEM_MASK) << 16);
3181 		if (val == 0)
3182 			*empty_mem_range = B_TRUE;
3183 	}
3184 }
3185 
3186 #endif /* defined(__x86) */
3187 
3188 boolean_t
3189 pcie_link_bw_supported(dev_info_t *dip)
3190 {
3191 	uint32_t linkcap;
3192 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3193 
3194 	if (!PCIE_IS_PCIE(bus_p)) {
3195 		return (B_FALSE);
3196 	}
3197 
3198 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3199 		return (B_FALSE);
3200 	}
3201 
3202 	linkcap = PCIE_CAP_GET(32, bus_p, PCIE_LINKCAP);
3203 	return ((linkcap & PCIE_LINKCAP_LINK_BW_NOTIFY_CAP) != 0);
3204 }
3205 
3206 int
3207 pcie_link_bw_enable(dev_info_t *dip)
3208 {
3209 	uint16_t linkctl;
3210 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3211 
3212 	if (pcie_disable_lbw != 0) {
3213 		return (DDI_FAILURE);
3214 	}
3215 
3216 	if (!pcie_link_bw_supported(dip)) {
3217 		return (DDI_FAILURE);
3218 	}
3219 
3220 	mutex_init(&bus_p->bus_lbw_mutex, NULL, MUTEX_DRIVER, NULL);
3221 	cv_init(&bus_p->bus_lbw_cv, NULL, CV_DRIVER, NULL);
3222 	linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3223 	linkctl |= PCIE_LINKCTL_LINK_BW_INTR_EN;
3224 	linkctl |= PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
3225 	PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
3226 
3227 	bus_p->bus_lbw_pbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3228 	bus_p->bus_lbw_cbuf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3229 	bus_p->bus_lbw_state |= PCIE_LBW_S_ENABLED;
3230 
3231 	return (DDI_SUCCESS);
3232 }
3233 
3234 int
3235 pcie_link_bw_disable(dev_info_t *dip)
3236 {
3237 	uint16_t linkctl;
3238 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3239 
3240 	if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
3241 		return (DDI_FAILURE);
3242 	}
3243 
3244 	mutex_enter(&bus_p->bus_lbw_mutex);
3245 	while ((bus_p->bus_lbw_state &
3246 	    (PCIE_LBW_S_DISPATCHED | PCIE_LBW_S_RUNNING)) != 0) {
3247 		cv_wait(&bus_p->bus_lbw_cv, &bus_p->bus_lbw_mutex);
3248 	}
3249 	mutex_exit(&bus_p->bus_lbw_mutex);
3250 
3251 	linkctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3252 	linkctl &= ~PCIE_LINKCTL_LINK_BW_INTR_EN;
3253 	linkctl &= ~PCIE_LINKCTL_LINK_AUTO_BW_INTR_EN;
3254 	PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, linkctl);
3255 
3256 	bus_p->bus_lbw_state &= ~PCIE_LBW_S_ENABLED;
3257 	kmem_free(bus_p->bus_lbw_pbuf, MAXPATHLEN);
3258 	kmem_free(bus_p->bus_lbw_cbuf, MAXPATHLEN);
3259 	bus_p->bus_lbw_pbuf = NULL;
3260 	bus_p->bus_lbw_cbuf = NULL;
3261 
3262 	mutex_destroy(&bus_p->bus_lbw_mutex);
3263 	cv_destroy(&bus_p->bus_lbw_cv);
3264 
3265 	return (DDI_SUCCESS);
3266 }
3267 
3268 void
3269 pcie_link_bw_taskq(void *arg)
3270 {
3271 	dev_info_t *dip = arg;
3272 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3273 	dev_info_t *cdip;
3274 	boolean_t again;
3275 	sysevent_t *se;
3276 	sysevent_value_t se_val;
3277 	sysevent_id_t eid;
3278 	sysevent_attr_list_t *ev_attr_list;
3279 	int circular;
3280 
3281 top:
3282 	ndi_devi_enter(dip, &circular);
3283 	se = NULL;
3284 	ev_attr_list = NULL;
3285 	mutex_enter(&bus_p->bus_lbw_mutex);
3286 	bus_p->bus_lbw_state &= ~PCIE_LBW_S_DISPATCHED;
3287 	bus_p->bus_lbw_state |= PCIE_LBW_S_RUNNING;
3288 	mutex_exit(&bus_p->bus_lbw_mutex);
3289 
3290 	/*
3291 	 * Update our own speeds as we've likely changed something.
3292 	 */
3293 	pcie_capture_speeds(dip);
3294 
3295 	/*
3296 	 * Walk our children. We only care about updating this on function 0
3297 	 * because the PCIe specification requires that these all be the same
3298 	 * otherwise.
3299 	 */
3300 	for (cdip = ddi_get_child(dip); cdip != NULL;
3301 	    cdip = ddi_get_next_sibling(cdip)) {
3302 		pcie_bus_t *cbus_p = PCIE_DIP2BUS(cdip);
3303 
3304 		if (cbus_p == NULL) {
3305 			continue;
3306 		}
3307 
3308 		if ((cbus_p->bus_bdf & PCIE_REQ_ID_FUNC_MASK) != 0) {
3309 			continue;
3310 		}
3311 
3312 		/*
3313 		 * It's possible that this can fire while a child is otherwise
3314 		 * only partially constructed. Therefore, if we don't have the
3315 		 * config handle, don't bother updating the child.
3316 		 */
3317 		if (cbus_p->bus_cfg_hdl == NULL) {
3318 			continue;
3319 		}
3320 
3321 		pcie_capture_speeds(cdip);
3322 		break;
3323 	}
3324 
3325 	se = sysevent_alloc(EC_PCIE, ESC_PCIE_LINK_STATE,
3326 	    ILLUMOS_KERN_PUB "pcie", SE_SLEEP);
3327 
3328 	(void) ddi_pathname(dip, bus_p->bus_lbw_pbuf);
3329 	se_val.value_type = SE_DATA_TYPE_STRING;
3330 	se_val.value.sv_string = bus_p->bus_lbw_pbuf;
3331 	if (sysevent_add_attr(&ev_attr_list, PCIE_EV_DETECTOR_PATH, &se_val,
3332 	    SE_SLEEP) != 0) {
3333 		ndi_devi_exit(dip, circular);
3334 		goto err;
3335 	}
3336 
3337 	if (cdip != NULL) {
3338 		(void) ddi_pathname(cdip, bus_p->bus_lbw_cbuf);
3339 
3340 		se_val.value_type = SE_DATA_TYPE_STRING;
3341 		se_val.value.sv_string = bus_p->bus_lbw_cbuf;
3342 
3343 		/*
3344 		 * If this fails, that's OK. We'd rather get the event off and
3345 		 * there's a chance that there may not be anything there for us.
3346 		 */
3347 		(void) sysevent_add_attr(&ev_attr_list, PCIE_EV_CHILD_PATH,
3348 		    &se_val, SE_SLEEP);
3349 	}
3350 
3351 	ndi_devi_exit(dip, circular);
3352 
3353 	/*
3354 	 * Before we generate and send down a sysevent, we need to tell the
3355 	 * system that parts of the devinfo cache need to be invalidated. While
3356 	 * the function below takes several args, it ignores them all. Because
3357 	 * this is a global invalidation, we don't bother trying to do much more
3358 	 * than requesting a global invalidation, lest we accidentally kick off
3359 	 * several in a row.
3360 	 */
3361 	ddi_prop_cache_invalidate(DDI_DEV_T_NONE, NULL, NULL, 0);
3362 
3363 	if (sysevent_attach_attributes(se, ev_attr_list) != 0) {
3364 		goto err;
3365 	}
3366 	ev_attr_list = NULL;
3367 
3368 	if (log_sysevent(se, SE_SLEEP, &eid) != 0) {
3369 		goto err;
3370 	}
3371 
3372 err:
3373 	sysevent_free_attr(ev_attr_list);
3374 	sysevent_free(se);
3375 
3376 	mutex_enter(&bus_p->bus_lbw_mutex);
3377 	bus_p->bus_lbw_state &= ~PCIE_LBW_S_RUNNING;
3378 	cv_broadcast(&bus_p->bus_lbw_cv);
3379 	again = (bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) != 0;
3380 	mutex_exit(&bus_p->bus_lbw_mutex);
3381 
3382 	if (again) {
3383 		goto top;
3384 	}
3385 }
3386 
3387 int
3388 pcie_link_bw_intr(dev_info_t *dip)
3389 {
3390 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3391 	uint16_t linksts;
3392 	uint16_t flags = PCIE_LINKSTS_LINK_BW_MGMT | PCIE_LINKSTS_AUTO_BW;
3393 
3394 	if ((bus_p->bus_lbw_state & PCIE_LBW_S_ENABLED) == 0) {
3395 		return (DDI_INTR_UNCLAIMED);
3396 	}
3397 
3398 	linksts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3399 	if ((linksts & flags) == 0) {
3400 		return (DDI_INTR_UNCLAIMED);
3401 	}
3402 
3403 	/*
3404 	 * Check if we've already dispatched this event. If we have already
3405 	 * dispatched it, then there's nothing else to do, we coalesce multiple
3406 	 * events.
3407 	 */
3408 	mutex_enter(&bus_p->bus_lbw_mutex);
3409 	bus_p->bus_lbw_nevents++;
3410 	if ((bus_p->bus_lbw_state & PCIE_LBW_S_DISPATCHED) == 0) {
3411 		if ((bus_p->bus_lbw_state & PCIE_LBW_S_RUNNING) == 0) {
3412 			taskq_dispatch_ent(pcie_link_tq, pcie_link_bw_taskq,
3413 			    dip, 0, &bus_p->bus_lbw_ent);
3414 		}
3415 
3416 		bus_p->bus_lbw_state |= PCIE_LBW_S_DISPATCHED;
3417 	}
3418 	mutex_exit(&bus_p->bus_lbw_mutex);
3419 
3420 	PCIE_CAP_PUT(16, bus_p, PCIE_LINKSTS, flags);
3421 	return (DDI_INTR_CLAIMED);
3422 }
3423 
3424 int
3425 pcie_link_set_target(dev_info_t *dip, pcie_link_speed_t speed)
3426 {
3427 	uint16_t ctl2, rval;
3428 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3429 
3430 	if (!PCIE_IS_PCIE(bus_p)) {
3431 		return (ENOTSUP);
3432 	}
3433 
3434 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3435 		return (ENOTSUP);
3436 	}
3437 
3438 	if (bus_p->bus_pcie_vers < 2) {
3439 		return (ENOTSUP);
3440 	}
3441 
3442 	switch (speed) {
3443 	case PCIE_LINK_SPEED_2_5:
3444 		rval = PCIE_LINKCTL2_TARGET_SPEED_2_5;
3445 		break;
3446 	case PCIE_LINK_SPEED_5:
3447 		rval = PCIE_LINKCTL2_TARGET_SPEED_5;
3448 		break;
3449 	case PCIE_LINK_SPEED_8:
3450 		rval = PCIE_LINKCTL2_TARGET_SPEED_8;
3451 		break;
3452 	case PCIE_LINK_SPEED_16:
3453 		rval = PCIE_LINKCTL2_TARGET_SPEED_16;
3454 		break;
3455 	case PCIE_LINK_SPEED_32:
3456 		rval = PCIE_LINKCTL2_TARGET_SPEED_32;
3457 		break;
3458 	case PCIE_LINK_SPEED_64:
3459 		rval = PCIE_LINKCTL2_TARGET_SPEED_64;
3460 		break;
3461 	default:
3462 		return (EINVAL);
3463 	}
3464 
3465 	mutex_enter(&bus_p->bus_speed_mutex);
3466 	if ((bus_p->bus_sup_speed & speed) == 0) {
3467 		mutex_exit(&bus_p->bus_speed_mutex);
3468 		return (ENOTSUP);
3469 	}
3470 
3471 	bus_p->bus_target_speed = speed;
3472 	bus_p->bus_speed_flags |= PCIE_LINK_F_ADMIN_TARGET;
3473 
3474 	ctl2 = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL2);
3475 	ctl2 &= ~PCIE_LINKCTL2_TARGET_SPEED_MASK;
3476 	ctl2 |= rval;
3477 	PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL2, ctl2);
3478 	mutex_exit(&bus_p->bus_speed_mutex);
3479 
3480 	/*
3481 	 * Make sure our updates have been reflected in devinfo.
3482 	 */
3483 	pcie_capture_speeds(dip);
3484 
3485 	return (0);
3486 }
3487 
3488 int
3489 pcie_link_retrain(dev_info_t *dip)
3490 {
3491 	uint16_t ctl;
3492 	pcie_bus_t *bus_p = PCIE_DIP2BUS(dip);
3493 
3494 	if (!PCIE_IS_PCIE(bus_p)) {
3495 		return (ENOTSUP);
3496 	}
3497 
3498 	if (!PCIE_IS_RP(bus_p) && !PCIE_IS_SWD(bus_p)) {
3499 		return (ENOTSUP);
3500 	}
3501 
3502 	/*
3503 	 * The PCIe specification suggests that we make sure that the link isn't
3504 	 * in training before issuing this command in case there was a state
3505 	 * machine transition prior to when we got here. We wait and then go
3506 	 * ahead and issue the command anyways.
3507 	 */
3508 	for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
3509 		uint16_t sts;
3510 
3511 		sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3512 		if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
3513 			break;
3514 		delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
3515 	}
3516 
3517 	ctl = PCIE_CAP_GET(16, bus_p, PCIE_LINKCTL);
3518 	ctl |= PCIE_LINKCTL_RETRAIN_LINK;
3519 	PCIE_CAP_PUT(16, bus_p, PCIE_LINKCTL, ctl);
3520 
3521 	/*
3522 	 * Wait again to see if it clears before returning to the user.
3523 	 */
3524 	for (uint32_t i = 0; i < pcie_link_retrain_count; i++) {
3525 		uint16_t sts;
3526 
3527 		sts = PCIE_CAP_GET(16, bus_p, PCIE_LINKSTS);
3528 		if ((sts & PCIE_LINKSTS_LINK_TRAINING) == 0)
3529 			break;
3530 		delay(drv_usectohz(pcie_link_retrain_delay_ms * 1000));
3531 	}
3532 
3533 	return (0);
3534 }
3535 
3536 /*
3537  * Here we're going through and grabbing information about a given PCIe device.
3538  * Our situation is a little bit complicated at this point. This gets invoked
3539  * both during early initialization and during hotplug events. We cannot rely on
3540  * the device node having been fully set up, that is, while the pcie_bus_t
3541  * normally contains a ddi_acc_handle_t for configuration space, that may not be
3542  * valid yet as this can occur before child initialization or we may be dealing
3543  * with a function that will never have a handle.
3544  *
3545  * However, we should always have a fully furnished pcie_bus_t, which means that
3546  * we can get its bdf and use that to access the devices configuration space.
3547  */
3548 static int
3549 pcie_fabric_feature_scan(dev_info_t *dip, void *arg)
3550 {
3551 	pcie_bus_t *bus_p;
3552 	uint32_t devcap;
3553 	uint16_t mps;
3554 	dev_info_t *rcdip;
3555 	pcie_fabric_data_t *fab = arg;
3556 
3557 	/*
3558 	 * Skip over non-PCIe devices. If we encounter something here, we don't
3559 	 * bother going through any of its children because we don't have reason
3560 	 * to believe that a PCIe device that this will impact will exist below
3561 	 * this. While it is possible that there's a PCIe fabric downstream an
3562 	 * intermediate old PCI/PCI-X bus, at that point, we'll still trigger
3563 	 * our complex fabric detection and use the minimums.
3564 	 *
3565 	 * The reason this doesn't trigger an immediate flagging as a complex
3566 	 * case like the one below is because we could be scanning a device that
3567 	 * is a nexus driver and has children already (albeit that would be
3568 	 * somewhat surprising as we don't anticipate being called at this
3569 	 * point).
3570 	 */
3571 	if (pcie_dev(dip) != DDI_SUCCESS) {
3572 		return (DDI_WALK_PRUNECHILD);
3573 	}
3574 
3575 	/*
3576 	 * If we fail to find a pcie_bus_t for some reason, that's somewhat
3577 	 * surprising. We log this fact and set the complex flag and indicate it
3578 	 * was because of this case. This immediately transitions us to a
3579 	 * "complex" case which means use the minimal, safe, settings.
3580 	 */
3581 	bus_p = PCIE_DIP2BUS(dip);
3582 	if (bus_p == NULL) {
3583 		dev_err(dip, CE_WARN, "failed to find associated pcie_bus_t "
3584 		    "during fabric scan");
3585 		fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3586 		return (DDI_WALK_TERMINATE);
3587 	}
3588 
3589 	/*
3590 	 * In a similar case, there is hardware out there which is a PCIe
3591 	 * device, but does not advertise a PCIe capability. An example of this
3592 	 * is the IDT Tsi382A which can hide its PCIe capability. If this is
3593 	 * the case, we immediately terminate scanning and flag this as a
3594 	 * 'complex' case which causes us to use guaranteed safe settings.
3595 	 */
3596 	if (bus_p->bus_pcie_off == 0) {
3597 		dev_err(dip, CE_WARN, "encountered PCIe device without PCIe "
3598 		    "capability");
3599 		fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3600 		return (DDI_WALK_TERMINATE);
3601 	}
3602 
3603 	rcdip = pcie_get_rc_dip(dip);
3604 
3605 	/*
3606 	 * First, start by determining what the device's tagging and max packet
3607 	 * size is. All PCIe devices will always have the 8-bit tag information
3608 	 * as this has existed since PCIe 1.0. 10-bit tagging requires a V2
3609 	 * PCIe capability. 14-bit requires the DEV3 cap. If we are missing a
3610 	 * version or capability, then we always treat that as lacking the bits
3611 	 * in the fabric.
3612 	 */
3613 	ASSERT3U(bus_p->bus_pcie_off, !=, 0);
3614 	devcap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3615 	    PCIE_DEVCAP);
3616 	mps = devcap & PCIE_DEVCAP_MAX_PAYLOAD_MASK;
3617 	if (mps < fab->pfd_mps_found) {
3618 		fab->pfd_mps_found = mps;
3619 	}
3620 
3621 	if ((devcap & PCIE_DEVCAP_EXT_TAG_8BIT) == 0) {
3622 		fab->pfd_tag_found &= ~PCIE_TAG_8B;
3623 	}
3624 
3625 	if (bus_p->bus_pcie_vers == PCIE_PCIECAP_VER_2_0) {
3626 		uint32_t devcap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3627 		    bus_p->bus_pcie_off + PCIE_DEVCAP2);
3628 		if ((devcap2 & PCIE_DEVCAP2_10B_TAG_COMP_SUP) == 0) {
3629 			fab->pfd_tag_found &= ~PCIE_TAG_10B_COMP;
3630 		}
3631 	} else {
3632 		fab->pfd_tag_found &= ~PCIE_TAG_10B_COMP;
3633 	}
3634 
3635 	if (bus_p->bus_dev3_off != 0) {
3636 		uint32_t devcap3 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3637 		    bus_p->bus_dev3_off + PCIE_DEVCAP3);
3638 		if ((devcap3 & PCIE_DEVCAP3_14B_TAG_COMP_SUP) == 0) {
3639 			fab->pfd_tag_found &= ~PCIE_TAG_14B_COMP;
3640 		}
3641 	} else {
3642 		fab->pfd_tag_found &= ~PCIE_TAG_14B_COMP;
3643 	}
3644 
3645 	/*
3646 	 * Now that we have captured device information, we must go and ask
3647 	 * questions of the topology here. The big theory statement enumerates
3648 	 * several types of cases. The big question we need to answer is have we
3649 	 * encountered a hotpluggable bridge that means we need to mark this as
3650 	 * complex.
3651 	 *
3652 	 * The big theory statement notes several different kinds of hotplug
3653 	 * topologies that exist that we can theoretically support. Right now we
3654 	 * opt to keep our lives simple and focus solely on (4) and (5). These
3655 	 * can both be summarized by a single, fairly straightforward rule:
3656 	 *
3657 	 * The only allowed hotpluggable entity is a root port.
3658 	 *
3659 	 * The reason that this can work and detect cases like (6), (7), and our
3660 	 * other invalid ones is that the hotplug code will scan and find all
3661 	 * children before we are called into here.
3662 	 */
3663 	if (bus_p->bus_hp_sup_modes != 0) {
3664 		/*
3665 		 * We opt to terminate in this case because there's no value in
3666 		 * scanning the rest of the tree at this point.
3667 		 */
3668 		if (!PCIE_IS_RP(bus_p)) {
3669 			fab->pfd_flags |= PCIE_FABRIC_F_COMPLEX;
3670 			return (DDI_WALK_TERMINATE);
3671 		}
3672 
3673 		fab->pfd_flags |= PCIE_FABRIC_F_RP_HP;
3674 	}
3675 
3676 	/*
3677 	 * As our walk starts at a root port, we need to make sure that we don't
3678 	 * pick up any of its siblings and their children as those would be
3679 	 * different PCIe fabric domains for us to scan. In many hardware
3680 	 * platforms multiple root ports are all at the same level in the tree.
3681 	 */
3682 	if (bus_p->bus_rp_dip == dip) {
3683 		return (DDI_WALK_PRUNESIB);
3684 	}
3685 
3686 	return (DDI_WALK_CONTINUE);
3687 }
3688 
3689 static int
3690 pcie_fabric_feature_set(dev_info_t *dip, void *arg)
3691 {
3692 	pcie_bus_t *bus_p;
3693 	dev_info_t *rcdip;
3694 	pcie_fabric_data_t *fab = arg;
3695 	uint32_t devcap, devctl;
3696 
3697 	if (pcie_dev(dip) != DDI_SUCCESS) {
3698 		return (DDI_WALK_PRUNECHILD);
3699 	}
3700 
3701 	/*
3702 	 * The missing bus_t sent us into the complex case previously. We still
3703 	 * need to make sure all devices have values we expect here and thus
3704 	 * don't terminate like the above. The same is true for the case where
3705 	 * there is no PCIe capability.
3706 	 */
3707 	bus_p = PCIE_DIP2BUS(dip);
3708 	if (bus_p == NULL || bus_p->bus_pcie_off == 0) {
3709 		return (DDI_WALK_CONTINUE);
3710 	}
3711 	rcdip = pcie_get_rc_dip(dip);
3712 
3713 	devcap = pci_cfgacc_get32(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3714 	    PCIE_DEVCAP);
3715 	devctl = pci_cfgacc_get16(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3716 	    PCIE_DEVCTL);
3717 
3718 	if ((devcap & PCIE_DEVCAP_EXT_TAG_8BIT) != 0 &&
3719 	    (fab->pfd_tag_act & PCIE_TAG_8B) != 0) {
3720 		devctl |= PCIE_DEVCTL_EXT_TAG_FIELD_EN;
3721 	}
3722 
3723 	devctl &= ~PCIE_DEVCTL_MAX_PAYLOAD_MASK;
3724 	ASSERT0(fab->pfd_mps_act & ~PCIE_DEVCAP_MAX_PAYLOAD_MASK);
3725 	devctl |= fab->pfd_mps_act << PCIE_DEVCTL_MAX_PAYLOAD_SHIFT;
3726 
3727 	pci_cfgacc_put16(rcdip, bus_p->bus_bdf, bus_p->bus_pcie_off +
3728 	    PCIE_DEVCTL, devctl);
3729 
3730 	if (bus_p->bus_pcie_vers == PCIE_PCIECAP_VER_2_0 &&
3731 	    (fab->pfd_tag_act & PCIE_TAG_10B_COMP) != 0) {
3732 		uint32_t devcap2 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3733 		    bus_p->bus_pcie_off + PCIE_DEVCAP2);
3734 
3735 		if ((devcap2 & PCIE_DEVCAP2_10B_TAG_REQ_SUP) == 0) {
3736 			uint16_t devctl2 = pci_cfgacc_get16(rcdip,
3737 			    bus_p->bus_bdf, bus_p->bus_pcie_off + PCIE_DEVCTL2);
3738 			devctl2 |= PCIE_DEVCTL2_10B_TAG_REQ_EN;
3739 			pci_cfgacc_put16(rcdip, bus_p->bus_bdf,
3740 			    bus_p->bus_pcie_off + PCIE_DEVCTL2, devctl2);
3741 		}
3742 	}
3743 
3744 	if (bus_p->bus_dev3_off != 0 &&
3745 	    (fab->pfd_tag_act & PCIE_TAG_14B_COMP) != 0) {
3746 		uint32_t devcap3 = pci_cfgacc_get32(rcdip, bus_p->bus_bdf,
3747 		    bus_p->bus_dev3_off + PCIE_DEVCAP3);
3748 
3749 		if ((devcap3 & PCIE_DEVCAP3_14B_TAG_REQ_SUP) == 0) {
3750 			uint16_t devctl3 = pci_cfgacc_get16(rcdip,
3751 			    bus_p->bus_bdf, bus_p->bus_dev3_off + PCIE_DEVCTL3);
3752 			devctl3 |= PCIE_DEVCTL3_14B_TAG_REQ_EN;
3753 			pci_cfgacc_put16(rcdip, bus_p->bus_bdf,
3754 			    bus_p->bus_pcie_off + PCIE_DEVCTL2, devctl3);
3755 		}
3756 	}
3757 
3758 	/*
3759 	 * As our walk starts at a root port, we need to make sure that we don't
3760 	 * pick up any of its siblings and their children as those would be
3761 	 * different PCIe fabric domains for us to scan. In many hardware
3762 	 * platforms multiple root ports are all at the same level in the tree.
3763 	 */
3764 	if (bus_p->bus_rp_dip == dip) {
3765 		return (DDI_WALK_PRUNESIB);
3766 	}
3767 
3768 	return (DDI_WALK_CONTINUE);
3769 }
3770 
3771 /*
3772  * This is used to scan and determine the total set of PCIe fabric settings that
3773  * we should have in the system for everything downstream of this specified root
3774  * port. Note, it is only really safe to call this while working from the
3775  * perspective of a root port as we will be walking down the entire device tree.
3776  *
3777  * However, our callers, particularly hoptlug, don't have all the information
3778  * we'd like. In particular, we need to check that:
3779  *
3780  *   o This is actually a PCIe device.
3781  *   o That this is a root port (see the big theory statement to understand this
3782  *     constraint).
3783  */
3784 void
3785 pcie_fabric_setup(dev_info_t *dip)
3786 {
3787 	pcie_bus_t *bus_p;
3788 	pcie_fabric_data_t *fab;
3789 	dev_info_t *pdip;
3790 	int circular_count;
3791 
3792 	bus_p = PCIE_DIP2BUS(dip);
3793 	if (bus_p == NULL || !PCIE_IS_RP(bus_p)) {
3794 		return;
3795 	}
3796 
3797 	VERIFY3P(bus_p->bus_fab, !=, NULL);
3798 	fab = bus_p->bus_fab;
3799 
3800 	/*
3801 	 * For us to call ddi_walk_devs(), our parent needs to be held.
3802 	 * ddi_walk_devs() will take care of grabbing our dip as part of its
3803 	 * walk before we iterate over our children.
3804 	 *
3805 	 * A reasonable question to ask here is why is it safe to ask for our
3806 	 * parent? In this case, because we have entered here through some
3807 	 * thread that's operating on us whether as part of attach or a hotplug
3808 	 * event, our dip somewhat by definition has to be valid. If we were
3809 	 * looking at our dip's children and then asking them for a parent, then
3810 	 * that would be a race condition.
3811 	 */
3812 	pdip = ddi_get_parent(dip);
3813 	VERIFY3P(pdip, !=, NULL);
3814 	ndi_devi_enter(pdip, &circular_count);
3815 	fab->pfd_flags |= PCIE_FABRIC_F_SCANNING;
3816 
3817 	/*
3818 	 * Reinitialize the tracking structure to basically set the maximum
3819 	 * caps. These will be chipped away during the scan.
3820 	 */
3821 	fab->pfd_mps_found = PCIE_DEVCAP_MAX_PAYLOAD_4096;
3822 	fab->pfd_tag_found = PCIE_TAG_ALL;
3823 	fab->pfd_flags &= ~PCIE_FABRIC_F_COMPLEX;
3824 
3825 	ddi_walk_devs(dip, pcie_fabric_feature_scan, fab);
3826 
3827 	if ((fab->pfd_flags & PCIE_FABRIC_F_COMPLEX) != 0) {
3828 		fab->pfd_tag_act = PCIE_TAG_5B;
3829 		fab->pfd_mps_act = PCIE_DEVCAP_MAX_PAYLOAD_128;
3830 	} else {
3831 		fab->pfd_tag_act = fab->pfd_tag_found;
3832 		fab->pfd_mps_act = fab->pfd_mps_found;
3833 	}
3834 
3835 	ddi_walk_devs(dip, pcie_fabric_feature_set, fab);
3836 
3837 	fab->pfd_flags &= ~PCIE_FABRIC_F_SCANNING;
3838 	ndi_devi_exit(pdip, circular_count);
3839 }
3840