/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2019, Joyent, Inc. * Copyright 2024 Oxide Computer Company */ /* * Nexus Driver for AMD Zen family systems. The purpose of this driver is to * provide access to the following resources in a single, centralized fashion: * * - The per-chip Data Fabric * - The North Bridge * - The System Management Network (SMN) * * This is a nexus driver as once we have attached to all the requisite * components, we will enumerate child devices which consume this functionality. * * ------------------------ * Mapping Devices Together * ------------------------ * * The operating system needs to expose things like temperature sensors and DRAM * configuration registers in terms of things that are meaningful to the system * such as logical CPUs, cores, etc. This driver attaches to the PCI devices * that represent the northbridge, data fabrics, and dies. Note that there are * multiple northbridge and DF devices (one each per die) and this driver maps * all of these three things together. Unfortunately, this requires some * acrobatics as there is no direct way to map a northbridge to its * corresponding die. Instead, we map a CPU die to a data fabric PCI device and * a data fabric PCI device to a corresponding northbridge PCI device. This * transitive relationship allows us to map from between northbridge and die. * * As each data fabric device is attached, based on vendor and device portions * of the PCI ID, we add it to the DF stubs list in the global amdzen_t * structure, amdzen_data->azn_df_stubs. We must now map these to logical CPUs. * * In current Zen based products, there is a direct mapping between processor * nodes and a data fabric PCI device: all of the devices are on PCI Bus 0 and * start from Device 0x18, so device 0x18 maps to processor node 0, 0x19 to * processor node 1, etc. This means that to map a logical CPU to a data fabric * device, we take its processor node id, add it to 0x18 and find the PCI device * that is on bus 0 with that ID number. We already discovered the DF devices as * described above. * * The northbridge PCI device has a well-defined device and function, but the * bus that it is on varies. Each die has its own set of assigned PCI buses and * its northbridge device is on the first die-specific bus. This implies that * the northbridges do not show up on PCI bus 0, as that is the PCI bus that all * of the data fabric devices are on and is not assigned to any particular die. * Additionally, while the northbridge on the lowest-numbered PCI bus * intuitively corresponds to processor node zero, hardware does not guarantee * this. Because we don't want to be at the mercy of firmware, we don't rely on * this ordering assumption, though we have yet to find a system that deviates * from it, either. * * One of the registers in the data fabric device's function 0 * (AMDZEN_DF_F0_CFG_ADDR_CTL) happens to identify the first PCI bus that is * associated with the processor node. This means that we can map a data fabric * device to a northbridge by finding the northbridge whose PCI bus ID matches * the value in the corresponding data fabric's AMDZEN_DF_F0_CFG_ADDR_CTL. * * Given all of the above, we can map a northbridge to a data fabric device and * a die to a data fabric device. Because these are 1:1 mappings, there is a * transitive relationship from northbridge to die. and therefore we know which * northbridge is associated with which processor die. This is summarized in the * following image: * * +-------+ +------------------------------------+ +--------------+ * | Die 0 |---->| Data Fabric PCI BDF 0/18/0 |---->| Northbridge | * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 10 | | PCI 10/0/0 | * ... +------------------------------------+ +--------------+ * +-------+ +------------------------------------+ +--------------+ * | Die n |---->| Data Fabric PCI BDF 0/18+n/0 |---->| Northbridge | * +-------+ | AMDZEN_DF_F0_CFG_ADDR_CTL: bus 133 | | PCI 133/0/0 | * +------------------------------------+ +--------------+ * * Note, the PCI buses used by the northbridges here are arbitrary examples that * do not necessarily reflect actual hardware values; however, the * bus/device/function (BDF) of the data fabric accurately models hardware. All * BDF values are in hex. * * Starting with the Rome generation of processors (Family 17h Model 30-3Fh), * AMD has multiple northbridges on a given die. All of these northbridges share * the same data fabric and system management network port. From our perspective * this means that some of the northbridge devices will be redundant and that we * no longer have a 1:1 mapping between the northbridge and the data fabric * devices. Every data fabric will have a northbridge, but not every northbridge * will have a data fabric device mapped. Because we're always trying to map * from a die to a northbridge and not the reverse, the fact that there are * extra northbridge devices hanging around that we don't know about shouldn't * be a problem. * * ------------------------------- * Attach and Detach Complications * ------------------------------- * * We need to map different PCI devices together. Each device is attached to a * amdzen_stub driver to facilitate integration with the rest of the kernel PCI * machinery and so we have to manage multiple dev_info_t structures, each of * which may be independently attached and detached. * * This is not particularly complex for attach: our _init routine allocates the * necessary mutex and list structures at module load time, and as each stub is * attached, it calls into this code to be added to the appropriate list. When * the nexus itself is attached, we walk the PCI device tree accumulating a * counter for all devices we expect to be attached. Once the scan is complete * and all such devices are accounted for (stub registration may be happening * asynchronously with respect to nexus attach), we initialize the nexus device * and the attach is complete. * * Most other device drivers support instances that can be brought back after * detach, provided they are associated with an active minor node in the * /devices file system. This driver is different. Once a stub device has been * attached, we do not permit detaching the nexus driver instance, as the kernel * does not give us interlocking guarantees between nexus and stub driver attach * and detach. It is simplest to just unconditionally fail detach once a stub * has attached. * * --------------- * Exposed Devices * --------------- * * Rather than try and have all of the different functions that could be * provided in one driver, we have a nexus driver that tries to load child * pseudo-device drivers that provide specific pieces of functionality. * * ------- * Locking * ------- * * The amdzen_data structure contains a single lock, azn_mutex. * * The various client functions here are intended for our nexus's direct * children, but have been designed in case someone else should depends on this * driver. Once a DF has been discovered, the set of entities inside of it * (adf_nents, adf_ents[]) is considered static, constant data, and iteration * over them does not require locking. However, the discovery of the amd_df_t * does. In addition, locking is required whenever performing register accesses * to the DF or SMN. * * To summarize, one must hold the lock in the following circumstances: * * - Looking up DF structures * - Reading or writing to DF registers * - Reading or writing to SMN registers * * In general, it is preferred that the lock be held across an entire client * operation if possible. The only time this becomes an issue are when we have * callbacks into our callers (ala amdzen_c_df_iter()) as they may recursively * call into us. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "amdzen.h" #include "amdzen_client.h" #include "amdzen_topo.h" amdzen_t *amdzen_data; /* * Internal minor nodes for devices that the nexus provides itself. */ #define AMDZEN_MINOR_TOPO 0 /* * Array of northbridge IDs that we care about. */ static const uint16_t amdzen_nb_ids[] = { /* Family 17h Ryzen, Epyc Models 00h-0fh (Zen uarch) */ 0x1450, /* Family 17h Raven Ridge, Kestrel, Dali Models 10h-2fh (Zen uarch) */ 0x15d0, /* Family 17h/19h Rome, Milan, Matisse, Vermeer Zen 2/Zen 3 uarch */ 0x1480, /* Family 17h/19h Renoir, Cezanne, Van Gogh Zen 2/3 uarch */ 0x1630, /* Family 19h Genoa and Bergamo */ 0x14a4, /* Family 17h Mendocino, Family 19h Rembrandt */ 0x14b5, /* Family 19h Raphael, Family 1Ah 40-4fh */ 0x14d8, /* Family 19h Phoenix */ 0x14e8, /* Family 1Ah Turin */ 0x153a, /* Family 1Ah 20-2fh */ 0x1507 }; typedef struct { char *acd_name; amdzen_child_t acd_addr; /* * This indicates whether or not we should issue warnings to users when * something happens specific to this instance. The main reason we don't * want to is for optional devices that may not be installed as they are * for development purposes (e.g. usmn, zen_udf); however, if there is * an issue with the others we still want to know. */ bool acd_warn; } amdzen_child_data_t; static const amdzen_child_data_t amdzen_children[] = { { "smntemp", AMDZEN_C_SMNTEMP, true }, { "usmn", AMDZEN_C_USMN, false }, { "zen_udf", AMDZEN_C_ZEN_UDF, false }, { "zen_umc", AMDZEN_C_ZEN_UMC, true } }; static uint8_t amdzen_stub_get8(amdzen_stub_t *stub, off_t reg) { return (pci_config_get8(stub->azns_cfgspace, reg)); } static uint16_t amdzen_stub_get16(amdzen_stub_t *stub, off_t reg) { return (pci_config_get16(stub->azns_cfgspace, reg)); } static uint32_t amdzen_stub_get32(amdzen_stub_t *stub, off_t reg) { return (pci_config_get32(stub->azns_cfgspace, reg)); } static uint64_t amdzen_stub_get64(amdzen_stub_t *stub, off_t reg) { return (pci_config_get64(stub->azns_cfgspace, reg)); } static void amdzen_stub_put8(amdzen_stub_t *stub, off_t reg, uint8_t val) { pci_config_put8(stub->azns_cfgspace, reg, val); } static void amdzen_stub_put16(amdzen_stub_t *stub, off_t reg, uint16_t val) { pci_config_put16(stub->azns_cfgspace, reg, val); } static void amdzen_stub_put32(amdzen_stub_t *stub, off_t reg, uint32_t val) { pci_config_put32(stub->azns_cfgspace, reg, val); } static uint64_t amdzen_df_read_regdef(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def, uint8_t inst, boolean_t do_64) { df_reg_def_t ficaa; df_reg_def_t ficad; uint32_t val = 0; df_rev_t df_rev = azn->azn_dfs[0].adf_rev; VERIFY(MUTEX_HELD(&azn->azn_mutex)); ASSERT3U(def.drd_gens & df_rev, ==, df_rev); val = DF_FICAA_V2_SET_TARG_INST(val, 1); val = DF_FICAA_V2_SET_FUNC(val, def.drd_func); val = DF_FICAA_V2_SET_INST(val, inst); val = DF_FICAA_V2_SET_64B(val, do_64 ? 1 : 0); switch (df_rev) { case DF_REV_2: case DF_REV_3: case DF_REV_3P5: ficaa = DF_FICAA_V2; ficad = DF_FICAD_LO_V2; /* * Both here and in the DFv4 case, the register ignores the * lower 2 bits. That is we can only address and encode things * in units of 4 bytes. */ val = DF_FICAA_V2_SET_REG(val, def.drd_reg >> 2); break; case DF_REV_4: case DF_REV_4D2: ficaa = DF_FICAA_V4; ficad = DF_FICAD_LO_V4; val = DF_FICAA_V4_SET_REG(val, def.drd_reg >> 2); break; default: panic("encountered unexpected DF rev: %u", df_rev); } amdzen_stub_put32(df->adf_funcs[ficaa.drd_func], ficaa.drd_reg, val); if (do_64) { return (amdzen_stub_get64(df->adf_funcs[ficad.drd_func], ficad.drd_reg)); } else { return (amdzen_stub_get32(df->adf_funcs[ficad.drd_func], ficad.drd_reg)); } } /* * Perform a targeted 32-bit indirect read to a specific instance and function. */ static uint32_t amdzen_df_read32(amdzen_t *azn, amdzen_df_t *df, uint8_t inst, const df_reg_def_t def) { return (amdzen_df_read_regdef(azn, df, def, inst, B_FALSE)); } /* * For a broadcast read, just go to the underlying PCI function and perform a * read. At this point in time, we don't believe we need to use the FICAA/FICAD * to access it (though it does have a broadcast mode). */ static uint32_t amdzen_df_read32_bcast(amdzen_t *azn, amdzen_df_t *df, const df_reg_def_t def) { VERIFY(MUTEX_HELD(&azn->azn_mutex)); return (amdzen_stub_get32(df->adf_funcs[def.drd_func], def.drd_reg)); } static uint32_t amdzen_smn_read(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg) { const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); VERIFY(MUTEX_HELD(&azn->azn_mutex)); amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); switch (SMN_REG_SIZE(reg)) { case 1: return ((uint32_t)amdzen_stub_get8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off)); case 2: return ((uint32_t)amdzen_stub_get16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off)); case 4: return (amdzen_stub_get32(df->adf_nb, AMDZEN_NB_SMN_DATA)); default: panic("unreachable invalid SMN register size %u", SMN_REG_SIZE(reg)); } } static void amdzen_smn_write(amdzen_t *azn, amdzen_df_t *df, const smn_reg_t reg, const uint32_t val) { const uint32_t base_addr = SMN_REG_ADDR_BASE(reg); const uint32_t addr_off = SMN_REG_ADDR_OFF(reg); VERIFY(SMN_REG_IS_NATURALLY_ALIGNED(reg)); VERIFY(SMN_REG_VALUE_FITS(reg, val)); VERIFY(MUTEX_HELD(&azn->azn_mutex)); amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_ADDR, base_addr); switch (SMN_REG_SIZE(reg)) { case 1: amdzen_stub_put8(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, (uint8_t)val); break; case 2: amdzen_stub_put16(df->adf_nb, AMDZEN_NB_SMN_DATA + addr_off, (uint16_t)val); break; case 4: amdzen_stub_put32(df->adf_nb, AMDZEN_NB_SMN_DATA, val); break; default: panic("unreachable invalid SMN register size %u", SMN_REG_SIZE(reg)); } } /* * This is an unfortunate necessity due to the evolution of the CCM DF values. */ static inline boolean_t amdzen_df_at_least(const amdzen_df_t *df, uint8_t major, uint8_t minor) { return (df->adf_major > major || (df->adf_major == major && df->adf_minor >= minor)); } static amdzen_df_t * amdzen_df_find(amdzen_t *azn, uint_t dfno) { uint_t i; ASSERT(MUTEX_HELD(&azn->azn_mutex)); if (dfno >= azn->azn_ndfs) { return (NULL); } for (i = 0; i < azn->azn_ndfs; i++) { amdzen_df_t *df = &azn->azn_dfs[i]; if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) { continue; } if (dfno == 0) { return (df); } dfno--; } return (NULL); } static amdzen_df_ent_t * amdzen_df_ent_find_by_instid(amdzen_df_t *df, uint8_t instid) { for (uint_t i = 0; i < df->adf_nents; i++) { amdzen_df_ent_t *ent = &df->adf_ents[i]; if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { continue; } if (ent->adfe_inst_id == instid) { return (ent); } } return (NULL); } /* * Client functions that are used by nexus children. */ int amdzen_c_smn_read(uint_t dfno, const smn_reg_t reg, uint32_t *valp) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; if (!SMN_REG_SIZE_IS_VALID(reg)) return (EINVAL); if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) return (EINVAL); mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, dfno); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { mutex_exit(&azn->azn_mutex); return (ENXIO); } *valp = amdzen_smn_read(azn, df, reg); mutex_exit(&azn->azn_mutex); return (0); } int amdzen_c_smn_write(uint_t dfno, const smn_reg_t reg, const uint32_t val) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; if (!SMN_REG_SIZE_IS_VALID(reg)) return (EINVAL); if (!SMN_REG_IS_NATURALLY_ALIGNED(reg)) return (EINVAL); if (!SMN_REG_VALUE_FITS(reg, val)) return (EOVERFLOW); mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, dfno); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } if ((df->adf_flags & AMDZEN_DF_F_FOUND_NB) == 0) { mutex_exit(&azn->azn_mutex); return (ENXIO); } amdzen_smn_write(azn, df, reg, val); mutex_exit(&azn->azn_mutex); return (0); } uint_t amdzen_c_df_count(void) { uint_t ret; amdzen_t *azn = amdzen_data; mutex_enter(&azn->azn_mutex); ret = azn->azn_ndfs; mutex_exit(&azn->azn_mutex); return (ret); } df_rev_t amdzen_c_df_rev(void) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; df_rev_t rev; /* * Always use the first DF instance to determine what we're using. Our * current assumption, which seems to generally be true, is that the * given DF revisions are the same in a given system when the DFs are * directly connected. */ mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, 0); if (df == NULL) { rev = DF_REV_UNKNOWN; } else { rev = df->adf_rev; } mutex_exit(&azn->azn_mutex); return (rev); } int amdzen_c_df_read32(uint_t dfno, uint8_t inst, const df_reg_def_t def, uint32_t *valp) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, dfno); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } if (df->adf_rev == DF_REV_UNKNOWN) { mutex_exit(&azn->azn_mutex); return (ENOTSUP); } *valp = amdzen_df_read_regdef(azn, df, def, inst, B_FALSE); mutex_exit(&azn->azn_mutex); return (0); } int amdzen_c_df_read64(uint_t dfno, uint8_t inst, const df_reg_def_t def, uint64_t *valp) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, dfno); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } if (df->adf_rev == DF_REV_UNKNOWN) { mutex_exit(&azn->azn_mutex); return (ENOTSUP); } *valp = amdzen_df_read_regdef(azn, df, def, inst, B_TRUE); mutex_exit(&azn->azn_mutex); return (0); } int amdzen_c_df_iter(uint_t dfno, zen_df_type_t type, amdzen_c_iter_f func, void *arg) { amdzen_df_t *df; amdzen_t *azn = amdzen_data; df_type_t df_type; uint8_t df_subtype; /* * Unlike other calls here, we hold our lock only to find the DF here. * The main reason for this is the nature of the callback function. * Folks are iterating over instances so they can call back into us. If * you look at the locking statement, the thing that is most volatile * right here and what we need to protect is the DF itself and * subsequent register accesses to it. The actual data about which * entities exist is static and so once we have found a DF we should * hopefully be in good shape as they only come, but don't go. */ mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, dfno); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } mutex_exit(&azn->azn_mutex); switch (type) { case ZEN_DF_TYPE_CS_UMC: df_type = DF_TYPE_CS; /* * In the original Zeppelin DFv2 die there was no subtype field * used for the CS. The UMC is the only type and has a subtype * of zero. */ if (df->adf_rev != DF_REV_2) { df_subtype = DF_CS_SUBTYPE_UMC; } else { df_subtype = 0; } break; case ZEN_DF_TYPE_CCM_CPU: df_type = DF_TYPE_CCM; if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) { df_subtype = DF_CCM_SUBTYPE_CPU_V4P1; } else { df_subtype = DF_CCM_SUBTYPE_CPU_V2; } break; default: return (EINVAL); } for (uint_t i = 0; i < df->adf_nents; i++) { amdzen_df_ent_t *ent = &df->adf_ents[i]; /* * Some DF components are not considered enabled and therefore * will end up having bogus values in their ID fields. If we do * not have an enable flag set, we must skip this node. */ if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) continue; if (ent->adfe_type == df_type && ent->adfe_subtype == df_subtype) { int ret = func(dfno, ent->adfe_fabric_id, ent->adfe_inst_id, arg); if (ret != 0) { return (ret); } } } return (0); } int amdzen_c_df_fabric_decomp(df_fabric_decomp_t *decomp) { const amdzen_df_t *df; amdzen_t *azn = amdzen_data; mutex_enter(&azn->azn_mutex); df = amdzen_df_find(azn, 0); if (df == NULL) { mutex_exit(&azn->azn_mutex); return (ENOENT); } *decomp = df->adf_decomp; mutex_exit(&azn->azn_mutex); return (0); } static boolean_t amdzen_create_child(amdzen_t *azn, const amdzen_child_data_t *acd) { int ret; dev_info_t *child; if (ndi_devi_alloc(azn->azn_dip, acd->acd_name, (pnode_t)DEVI_SID_NODEID, &child) != NDI_SUCCESS) { dev_err(azn->azn_dip, CE_WARN, "!failed to allocate child " "dip for %s", acd->acd_name); return (B_FALSE); } ddi_set_parent_data(child, (void *)acd); if ((ret = ndi_devi_online(child, 0)) != NDI_SUCCESS) { if (acd->acd_warn) { dev_err(azn->azn_dip, CE_WARN, "!failed to online " "child dip %s: %d", acd->acd_name, ret); } return (B_FALSE); } return (B_TRUE); } static boolean_t amdzen_map_dfs(amdzen_t *azn) { amdzen_stub_t *stub; ASSERT(MUTEX_HELD(&azn->azn_mutex)); for (stub = list_head(&azn->azn_df_stubs); stub != NULL; stub = list_next(&azn->azn_df_stubs, stub)) { amdzen_df_t *df; uint_t dfno; dfno = stub->azns_dev - AMDZEN_DF_FIRST_DEVICE; if (dfno > AMDZEN_MAX_DFS) { dev_err(stub->azns_dip, CE_WARN, "encountered df " "device with illegal DF PCI b/d/f: 0x%x/%x/%x", stub->azns_bus, stub->azns_dev, stub->azns_func); goto err; } df = &azn->azn_dfs[dfno]; if (stub->azns_func >= AMDZEN_MAX_DF_FUNCS) { dev_err(stub->azns_dip, CE_WARN, "encountered df " "device with illegal DF PCI b/d/f: 0x%x/%x/%x", stub->azns_bus, stub->azns_dev, stub->azns_func); goto err; } if (df->adf_funcs[stub->azns_func] != NULL) { dev_err(stub->azns_dip, CE_WARN, "encountered " "duplicate df device with DF PCI b/d/f: 0x%x/%x/%x", stub->azns_bus, stub->azns_dev, stub->azns_func); goto err; } df->adf_funcs[stub->azns_func] = stub; } return (B_TRUE); err: azn->azn_flags |= AMDZEN_F_DEVICE_ERROR; return (B_FALSE); } static boolean_t amdzen_check_dfs(amdzen_t *azn) { uint_t i; boolean_t ret = B_TRUE; for (i = 0; i < AMDZEN_MAX_DFS; i++) { amdzen_df_t *df = &azn->azn_dfs[i]; uint_t count = 0; /* * We require all platforms to have DFs functions 0-6. Not all * platforms have DF function 7. */ for (uint_t func = 0; func < AMDZEN_MAX_DF_FUNCS - 1; func++) { if (df->adf_funcs[func] != NULL) { count++; } } if (count == 0) continue; if (count != 7) { ret = B_FALSE; dev_err(azn->azn_dip, CE_WARN, "df %u devices " "incomplete", i); } else { df->adf_flags |= AMDZEN_DF_F_VALID; azn->azn_ndfs++; } } return (ret); } static const uint8_t amdzen_df_rome_ids[0x2b] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48 }; /* * Check the first df entry to see if it belongs to Rome or Milan. If so, then * it uses the disjoint ID space. */ static boolean_t amdzen_is_rome_style(uint_t id) { return (id == 0x1490 || id == 0x1650); } /* * Deal with the differences between between how a CCM subtype is indicated * across CPU generations. */ static boolean_t amdzen_dfe_is_ccm(const amdzen_df_t *df, const amdzen_df_ent_t *ent) { if (ent->adfe_type != DF_TYPE_CCM) { return (B_FALSE); } if (df->adf_rev >= DF_REV_4 && amdzen_df_at_least(df, 4, 1)) { return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V4P1); } else { return (ent->adfe_subtype == DF_CCM_SUBTYPE_CPU_V2); } } /* * To be able to do most other things we want to do, we must first determine * what revision of the DF (data fabric) that we're using. * * Snapshot the df version. This was added explicitly in DFv4.0, around the Zen * 4 timeframe and allows us to tell apart different version of the DF register * set, most usefully when various subtypes were added. * * Older versions can theoretically be told apart based on usage of reserved * registers. We walk these in the following order, starting with the newest rev * and walking backwards to tell things apart: * * o v3.5 -> Check function 1, register 0x150. This was reserved prior * to this point. This is actually DF_FIDMASK0_V3P5. We are supposed * to check bits [7:0]. * * o v3.0 -> Check function 1, register 0x208. The low byte (7:0) was * changed to indicate a component mask. This is non-zero * in the 3.0 generation. This is actually DF_FIDMASK_V2. * * o v2.0 -> This is just the not that case. Presumably v1 wasn't part * of the Zen generation. * * Because we don't know what version we are yet, we do not use the normal * versioned register accesses which would check what DF version we are and * would want to use the normal indirect register accesses (which also require * us to know the version). We instead do direct broadcast reads. */ static void amdzen_determine_df_vers(amdzen_t *azn, amdzen_df_t *df) { uint32_t val; df_reg_def_t rd = DF_FBICNT; val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); df->adf_major = DF_FBICNT_V4_GET_MAJOR(val); df->adf_minor = DF_FBICNT_V4_GET_MINOR(val); if (df->adf_major == 0 && df->adf_minor == 0) { rd = DF_FIDMASK0_V3P5; val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); if (bitx32(val, 7, 0) != 0) { df->adf_major = 3; df->adf_minor = 5; df->adf_rev = DF_REV_3P5; } else { rd = DF_FIDMASK_V2; val = amdzen_stub_get32(df->adf_funcs[rd.drd_func], rd.drd_reg); if (bitx32(val, 7, 0) != 0) { df->adf_major = 3; df->adf_minor = 0; df->adf_rev = DF_REV_3; } else { df->adf_major = 2; df->adf_minor = 0; df->adf_rev = DF_REV_2; } } } else if (df->adf_major == 4 && df->adf_minor >= 2) { /* * These are devices that have the newer memory layout that * moves the DF::DramBaseAddress to 0x200. Please see the df.h * theory statement for more information. */ df->adf_rev = DF_REV_4D2; } else if (df->adf_major == 4) { df->adf_rev = DF_REV_4; } else { df->adf_rev = DF_REV_UNKNOWN; } } /* * All of the different versions of the DF have different ways of getting at and * answering the question of how do I break a fabric ID into a corresponding * socket, die, and component. Importantly the goal here is to obtain, cache, * and normalize: * * o The DF System Configuration * o The various Mask registers * o The Node ID */ static void amdzen_determine_fabric_decomp(amdzen_t *azn, amdzen_df_t *df) { uint32_t mask; df_fabric_decomp_t *decomp = &df->adf_decomp; switch (df->adf_rev) { case DF_REV_2: df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V2); switch (DF_SYSCFG_V2_GET_MY_TYPE(df->adf_syscfg)) { case DF_DIE_TYPE_CPU: mask = amdzen_df_read32_bcast(azn, df, DF_DIEMASK_CPU_V2); break; case DF_DIE_TYPE_APU: mask = amdzen_df_read32_bcast(azn, df, DF_DIEMASK_APU_V2); break; default: panic("DF thinks we're not on a CPU!"); } df->adf_mask0 = mask; /* * DFv2 is a bit different in how the fabric mask register is * phrased. Logically a fabric ID is broken into something that * uniquely identifies a "node" (a particular die on a socket) * and something that identifies a "component", e.g. a memory * controller. * * Starting with DFv3, these registers logically called out how * to separate the fabric ID first into a node and a component. * Then the node was then broken down into a socket and die. In * DFv2, there is no separate mask and shift of a node. Instead * the socket and die are absolute offsets into the fabric ID * rather than relative offsets into the node ID. As such, when * we encounter DFv2, we fake up a node mask and shift and make * it look like DFv3+. */ decomp->dfd_node_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) | DF_DIEMASK_V2_GET_DIE_MASK(mask); decomp->dfd_node_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask); decomp->dfd_comp_mask = DF_DIEMASK_V2_GET_COMP_MASK(mask); decomp->dfd_comp_shift = 0; decomp->dfd_sock_mask = DF_DIEMASK_V2_GET_SOCK_MASK(mask) >> decomp->dfd_node_shift; decomp->dfd_die_mask = DF_DIEMASK_V2_GET_DIE_MASK(mask) >> decomp->dfd_node_shift; decomp->dfd_sock_shift = DF_DIEMASK_V2_GET_SOCK_SHIFT(mask) - decomp->dfd_node_shift; decomp->dfd_die_shift = DF_DIEMASK_V2_GET_DIE_SHIFT(mask) - decomp->dfd_node_shift; ASSERT3U(decomp->dfd_die_shift, ==, 0); /* * There is no register in the actual data fabric with the node * ID in DFv2 that we have found. Instead we take the first * entity's fabric ID and transform it into the node id. */ df->adf_nodeid = (df->adf_ents[0].adfe_fabric_id & decomp->dfd_node_mask) >> decomp->dfd_node_shift; break; case DF_REV_3: df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3); df->adf_mask0 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK0_V3); df->adf_mask1 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK1_V3); decomp->dfd_sock_mask = DF_FIDMASK1_V3_GET_SOCK_MASK(df->adf_mask1); decomp->dfd_sock_shift = DF_FIDMASK1_V3_GET_SOCK_SHIFT(df->adf_mask1); decomp->dfd_die_mask = DF_FIDMASK1_V3_GET_DIE_MASK(df->adf_mask1); decomp->dfd_die_shift = 0; decomp->dfd_node_mask = DF_FIDMASK0_V3_GET_NODE_MASK(df->adf_mask0); decomp->dfd_node_shift = DF_FIDMASK1_V3_GET_NODE_SHIFT(df->adf_mask1); decomp->dfd_comp_mask = DF_FIDMASK0_V3_GET_COMP_MASK(df->adf_mask0); decomp->dfd_comp_shift = 0; df->adf_nodeid = DF_SYSCFG_V3_GET_NODE_ID(df->adf_syscfg); break; case DF_REV_3P5: df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V3P5); df->adf_mask0 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK0_V3P5); df->adf_mask1 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK1_V3P5); df->adf_mask2 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK2_V3P5); decomp->dfd_sock_mask = DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); decomp->dfd_sock_shift = DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); decomp->dfd_die_mask = DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); decomp->dfd_die_shift = 0; decomp->dfd_node_mask = DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); decomp->dfd_node_shift = DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); decomp->dfd_comp_mask = DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); decomp->dfd_comp_shift = 0; df->adf_nodeid = DF_SYSCFG_V3P5_GET_NODE_ID(df->adf_syscfg); break; case DF_REV_4: case DF_REV_4D2: df->adf_syscfg = amdzen_df_read32_bcast(azn, df, DF_SYSCFG_V4); df->adf_mask0 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK0_V4); df->adf_mask1 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK1_V4); df->adf_mask2 = amdzen_df_read32_bcast(azn, df, DF_FIDMASK2_V4); /* * The DFv4 registers are at a different location in the DF; * however, the actual layout of fields is the same as DFv3.5. * This is why you see V3P5 below. */ decomp->dfd_sock_mask = DF_FIDMASK2_V3P5_GET_SOCK_MASK(df->adf_mask2); decomp->dfd_sock_shift = DF_FIDMASK1_V3P5_GET_SOCK_SHIFT(df->adf_mask1); decomp->dfd_die_mask = DF_FIDMASK2_V3P5_GET_DIE_MASK(df->adf_mask2); decomp->dfd_die_shift = 0; decomp->dfd_node_mask = DF_FIDMASK0_V3P5_GET_NODE_MASK(df->adf_mask0); decomp->dfd_node_shift = DF_FIDMASK1_V3P5_GET_NODE_SHIFT(df->adf_mask1); decomp->dfd_comp_mask = DF_FIDMASK0_V3P5_GET_COMP_MASK(df->adf_mask0); decomp->dfd_comp_shift = 0; df->adf_nodeid = DF_SYSCFG_V4_GET_NODE_ID(df->adf_syscfg); break; default: panic("encountered suspicious, previously rejected DF " "rev: 0x%x", df->adf_rev); } } /* * The purpose of this function is to map CCMs to the corresponding CCDs that * exist. This is not an obvious thing as there is no direct mapping in the data * fabric between these IDs. * * Prior to DFv4, a given CCM was only ever connected to at most one CCD. * Starting in DFv4 a given CCM may have one or two SDP (scalable data ports) * that connect to CCDs. These may be connected to the same CCD or a different * one. When both ports are enabled we must check whether or not the port is * considered to be in wide mode. When wide mode is enabled then the two ports * are connected to a single CCD. If wide mode is disabled then the two ports * are connected to separate CCDs. * * The physical number of a CCD, which is how we determine the SMN aperture to * use, is based on the CCM ID. In most sockets we have seen up to a maximum of * 8 CCMs. When a CCM is connected to more than one CCD we have determined based * on some hints from AMD's ACPI information that the numbering is assumed to be * that CCM's number plus the total number of CCMs. * * More concretely, the SP5 Genoa/Bergamo Zen 4 platform has 8 CCMs. When there * are more than 8 CCDs installed then CCM 0 maps to CCDs 0 and 8. CCM 1 to CCDs * 1 and 9, etc. CCMs 4-7 map 1:1 to CCDs 4-7. However, the placement of CCDs * within the package has changed across generations. * * Notably in Rome and Milan (Zen 2/3) it appears that each quadrant had an * increasing number of CCDs. So CCDs 0/1 were together, 2/3, 4/5, and 6/7. This * meant that in cases where only a subset of CCDs were populated it'd forcibly * disable the higher CCD in a group (but with DFv3 the CCM would still be * enabled). So a 4 CCD config would generally enable CCDs 0, 2, 4, and 6 say. * This was almost certainly done to balance the NUMA config. * * Instead, starting in Genoa (Zen 4) the CCMs are round-robined around the * quadrants so CCMs (CCDs) 0 (0/8) and 4 (4) are together, 1 (1/9) and 5 (5), * etc. This is also why we more often see disabled CCMs in Genoa, but not in * Rome/Milan. * * When we're operating in wide mode and therefore both SDPs are connected to a * single CCD, we've always found that the lower CCD index will be used by the * system and the higher one is not considered present. Therefore, when * operating in wide mode, we need to make sure that whenever we have a non-zero * value for SDPs being connected that we rewrite this to only appear as a * single CCD is present. It's conceivable (though hard to imagine) that we * could get a value of 0b10 indicating that only the upper SDP link is active * for some reason. */ static void amdzen_setup_df_ccm(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *dfe, uint32_t ccmno) { amdzen_ccm_data_t *ccm = &dfe->adfe_data.aded_ccm; uint32_t ccd_en; if (df->adf_rev >= DF_REV_4) { uint32_t val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, DF_CCD_EN_V4); ccd_en = DF_CCD_EN_V4_GET_CCD_EN(val); val = amdzen_df_read32(azn, df, dfe->adfe_inst_id, DF_CCMCFG4_V4); if (DF_CCMCFG4_V4_GET_WIDE_EN(val) != 0 && ccd_en != 0) { ccd_en = 0x1; } } else { ccd_en = 0x1; } for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { ccm->acd_ccd_en[i] = (ccd_en & (1 << i)) != 0; if (ccm->acd_ccd_en[i] == 0) continue; ccm->acd_ccd_id[i] = ccmno + i * df->adf_nccm; ccm->acd_nccds++; } } /* * Initialize our knowledge about a given series of nodes on the data fabric. */ static void amdzen_setup_df(amdzen_t *azn, amdzen_df_t *df) { uint_t i; uint32_t val, ccmno; amdzen_determine_df_vers(azn, df); switch (df->adf_rev) { case DF_REV_2: case DF_REV_3: case DF_REV_3P5: val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V2); break; case DF_REV_4: case DF_REV_4D2: val = amdzen_df_read32_bcast(azn, df, DF_CFG_ADDR_CTL_V4); break; default: dev_err(azn->azn_dip, CE_WARN, "encountered unsupported DF " "revision: 0x%x", df->adf_rev); return; } df->adf_nb_busno = DF_CFG_ADDR_CTL_GET_BUS_NUM(val); val = amdzen_df_read32_bcast(azn, df, DF_FBICNT); df->adf_nents = DF_FBICNT_GET_COUNT(val); if (df->adf_nents == 0) return; df->adf_ents = kmem_zalloc(sizeof (amdzen_df_ent_t) * df->adf_nents, KM_SLEEP); for (i = 0; i < df->adf_nents; i++) { amdzen_df_ent_t *dfe = &df->adf_ents[i]; uint8_t inst = i; /* * Unfortunately, Rome uses a discontinuous instance ID pattern * while everything else we can find uses a contiguous instance * ID pattern. This means that for Rome, we need to adjust the * indexes that we iterate over, though the total number of * entries is right. This was carried over into Milan, but not * Genoa. */ if (amdzen_is_rome_style(df->adf_funcs[0]->azns_did)) { if (inst >= ARRAY_SIZE(amdzen_df_rome_ids)) { dev_err(azn->azn_dip, CE_WARN, "Rome family " "processor reported more ids than the PPR, " "resetting %u to instance zero", inst); inst = 0; } else { inst = amdzen_df_rome_ids[inst]; } } dfe->adfe_drvid = inst; dfe->adfe_info0 = amdzen_df_read32(azn, df, inst, DF_FBIINFO0); if (df->adf_rev <= DF_REV_4) { dfe->adfe_info1 = amdzen_df_read32(azn, df, inst, DF_FBIINFO1); dfe->adfe_info2 = amdzen_df_read32(azn, df, inst, DF_FBIINFO2); } dfe->adfe_info3 = amdzen_df_read32(azn, df, inst, DF_FBIINFO3); dfe->adfe_type = DF_FBIINFO0_GET_TYPE(dfe->adfe_info0); dfe->adfe_subtype = DF_FBIINFO0_GET_SUBTYPE(dfe->adfe_info0); /* * The enabled flag was not present in Zen 1. Simulate it by * checking for a non-zero register instead. */ if (DF_FBIINFO0_V3_GET_ENABLED(dfe->adfe_info0) || (df->adf_rev == DF_REV_2 && dfe->adfe_info0 != 0)) { dfe->adfe_flags |= AMDZEN_DFE_F_ENABLED; } if (DF_FBIINFO0_GET_HAS_MCA(dfe->adfe_info0)) { dfe->adfe_flags |= AMDZEN_DFE_F_MCA; } /* * Starting with DFv4 there is no instance ID in the fabric info * 3 register, so we instead grab it out of the driver ID which * is what it should be anyways. */ if (df->adf_rev >= DF_REV_4) { dfe->adfe_inst_id = dfe->adfe_drvid; } else { dfe->adfe_inst_id = DF_FBIINFO3_GET_INSTID(dfe->adfe_info3); } switch (df->adf_rev) { case DF_REV_2: dfe->adfe_fabric_id = DF_FBIINFO3_V2_GET_BLOCKID(dfe->adfe_info3); break; case DF_REV_3: dfe->adfe_fabric_id = DF_FBIINFO3_V3_GET_BLOCKID(dfe->adfe_info3); break; case DF_REV_3P5: dfe->adfe_fabric_id = DF_FBIINFO3_V3P5_GET_BLOCKID(dfe->adfe_info3); break; case DF_REV_4: case DF_REV_4D2: dfe->adfe_fabric_id = DF_FBIINFO3_V4_GET_BLOCKID(dfe->adfe_info3); break; default: panic("encountered suspicious, previously rejected DF " "rev: 0x%x", df->adf_rev); } /* * Record information about a subset of DF entities that we've * found. Currently we're tracking this only for CCMs. */ if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) continue; if (amdzen_dfe_is_ccm(df, dfe)) { df->adf_nccm++; } } /* * Now that we have filled in all of our info, attempt to fill in * specific information about different types of instances. */ ccmno = 0; for (uint_t i = 0; i < df->adf_nents; i++) { amdzen_df_ent_t *dfe = &df->adf_ents[i]; if ((dfe->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) continue; /* * Perform type and sub-type specific initialization. Currently * limited to CCMs. */ switch (dfe->adfe_type) { case DF_TYPE_CCM: amdzen_setup_df_ccm(azn, df, dfe, ccmno); ccmno++; break; default: break; } } amdzen_determine_fabric_decomp(azn, df); } static void amdzen_find_nb(amdzen_t *azn, amdzen_df_t *df) { amdzen_stub_t *stub; for (stub = list_head(&azn->azn_nb_stubs); stub != NULL; stub = list_next(&azn->azn_nb_stubs, stub)) { if (stub->azns_bus == df->adf_nb_busno) { df->adf_flags |= AMDZEN_DF_F_FOUND_NB; df->adf_nb = stub; return; } } } /* * We need to be careful using this function as different AMD generations have * acted in different ways when there is a missing CCD. We've found that in * hardware where the CCM is enabled but there is no CCD attached, it generally * is safe (i.e. DFv3 on Rome), but on DFv4 if we ask for a CCD that would * correspond to a disabled CCM then the firmware may inject a fatal error * (which is hopefully something missing in our RAS/MCA-X enablement). * * Put differently if this doesn't correspond to an Enabled CCM and you know the * number of valid CCDs on this, don't use it. */ static boolean_t amdzen_ccd_present(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno) { smn_reg_t die_reg = SMUPWR_CCD_DIE_ID(ccdno); uint32_t val = amdzen_smn_read(azn, df, die_reg); if (val == SMN_EINVAL32) { return (B_FALSE); } ASSERT3U(ccdno, ==, SMUPWR_CCD_DIE_ID_GET(val)); return (B_TRUE); } static uint32_t amdzen_ccd_thread_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno) { smn_reg_t reg; if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) { reg = L3SOC_THREAD_EN(ccdno); } else { reg = SMUPWR_THREAD_EN(ccdno); } return (amdzen_smn_read(azn, df, reg)); } static uint32_t amdzen_ccd_core_en(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno) { smn_reg_t reg; if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) { reg = L3SOC_CORE_EN(ccdno); } else { reg = SMUPWR_CORE_EN(ccdno); } return (amdzen_smn_read(azn, df, reg)); } static void amdzen_ccd_info(amdzen_t *azn, amdzen_df_t *df, uint32_t ccdno, uint32_t *nccxp, uint32_t *nlcorep, uint32_t *nthrp) { uint32_t nccx, nlcore, smt; if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN5) { smn_reg_t reg = L3SOC_THREAD_CFG(ccdno); uint32_t val = amdzen_smn_read(azn, df, reg); nccx = L3SOC_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; nlcore = L3SOC_THREAD_CFG_GET_CORE_COUNT(val) + 1; smt = L3SOC_THREAD_CFG_GET_SMT_MODE(val); } else { smn_reg_t reg = SMUPWR_THREAD_CFG(ccdno); uint32_t val = amdzen_smn_read(azn, df, reg); nccx = SMUPWR_THREAD_CFG_GET_COMPLEX_COUNT(val) + 1; nlcore = SMUPWR_THREAD_CFG_GET_CORE_COUNT(val) + 1; smt = SMUPWR_THREAD_CFG_GET_SMT_MODE(val); } if (nccxp != NULL) { *nccxp = nccx; } if (nlcorep != NULL) { *nlcorep = nlcore; } if (nthrp != NULL) { /* The L3::L3SOC and SMU::PWR values are the same here */ if (smt == SMUPWR_THREAD_CFG_SMT_MODE_SMT) { *nthrp = 2; } else { *nthrp = 1; } } } static void amdzen_initpkg_to_apic(amdzen_t *azn, const uint32_t pkg0, const uint32_t pkg7) { uint32_t nsock, nccd, nccx, ncore, nthr, extccx; uint32_t nsock_bits, nccd_bits, nccx_bits, ncore_bits, nthr_bits; amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; /* * These are all 0 based values, meaning that we need to add one to each * of them. However, we skip this because to calculate the number of * bits to cover an entity we would subtract one. */ nthr = SCFCTP_PMREG_INITPKG0_GET_SMTEN(pkg0); ncore = SCFCTP_PMREG_INITPKG7_GET_N_CORES(pkg7); nccx = SCFCTP_PMREG_INITPKG7_GET_N_CCXS(pkg7); nccd = SCFCTP_PMREG_INITPKG7_GET_N_DIES(pkg7); nsock = SCFCTP_PMREG_INITPKG7_GET_N_SOCKETS(pkg7); if (uarchrev_uarch(azn->azn_uarchrev) >= X86_UARCH_AMD_ZEN4) { extccx = SCFCTP_PMREG_INITPKG7_ZEN4_GET_16TAPIC(pkg7); } else { extccx = 0; } nthr_bits = highbit(nthr); ncore_bits = highbit(ncore); nccx_bits = highbit(nccx); nccd_bits = highbit(nccd); nsock_bits = highbit(nsock); apic->aad_thread_shift = 0; apic->aad_thread_mask = (1 << nthr_bits) - 1; apic->aad_core_shift = nthr_bits; if (ncore_bits > 0) { apic->aad_core_mask = (1 << ncore_bits) - 1; apic->aad_core_mask <<= apic->aad_core_shift; } else { apic->aad_core_mask = 0; } /* * The APIC_16T_MODE bit indicates that the total shift to start the CCX * should be at 4 bits if it's not. It doesn't mean that the CCX portion * of the value should take up four bits. In the common Genoa case, * nccx_bits will be zero. */ apic->aad_ccx_shift = apic->aad_core_shift + ncore_bits; if (extccx != 0 && apic->aad_ccx_shift < 4) { apic->aad_ccx_shift = 4; } if (nccx_bits > 0) { apic->aad_ccx_mask = (1 << nccx_bits) - 1; apic->aad_ccx_mask <<= apic->aad_ccx_shift; } else { apic->aad_ccx_mask = 0; } apic->aad_ccd_shift = apic->aad_ccx_shift + nccx_bits; if (nccd_bits > 0) { apic->aad_ccd_mask = (1 << nccd_bits) - 1; apic->aad_ccd_mask <<= apic->aad_ccd_shift; } else { apic->aad_ccd_mask = 0; } apic->aad_sock_shift = apic->aad_ccd_shift + nccd_bits; if (nsock_bits > 0) { apic->aad_sock_mask = (1 << nsock_bits) - 1; apic->aad_sock_mask <<= apic->aad_sock_shift; } else { apic->aad_sock_mask = 0; } /* * Currently all supported Zen 2+ platforms only have a single die per * socket as compared to Zen 1. So this is always kept at zero. */ apic->aad_die_mask = 0; apic->aad_die_shift = 0; } /* * We would like to determine what the logical APIC decomposition is on Zen 3 * and newer family parts. While there is information added to CPUID in the form * of leaf 8X26, that isn't present in Zen 3, so instead we go to what we * believe is the underlying source of the CPUID data. * * Fundamentally there are a series of registers in SMN space that relate to the * SCFCTP. Coincidentally, there is one of these for each core and there are a * pair of related SMN registers. L3::SCFCTP::PMREG_INITPKG0 contains * information about a given's core logical and physical IDs. More interestingly * for this particular case, L3::SCFCTP::PMREG_INITPKG7, contains the overall * total number of logical entities. We've been promised that this has to be * the same across the fabric. That's all well and good, but this begs the * question of how do we actually get there. The above is a core-specific * register and requires that we understand information about which CCDs and * CCXs are actually present. * * So we are starting with a data fabric that has some CCM present. The CCM * entries in the data fabric may be tagged with our ENABLED flag. * Unfortunately, that can be true regardless of whether or not it's actually * present or not. As a result, we go to another chunk of SMN space registers, * SMU::PWR. These contain information about the CCDs, the physical cores that * are enabled, and related. So we will first walk the DF entities and see if we * can read its SMN::PWR::CCD_DIE_ID. If we get back a value of all 1s then * there is nothing present. Otherwise, we should get back something that * matches information in the data fabric. * * With that in hand, we can read the SMU::PWR::CORE_ENABLE register to * determine which physical cores are enabled in the CCD/CCX. That will finally * give us an index to get to our friend INITPKG7. */ static boolean_t amdzen_determine_apic_decomp_initpkg(amdzen_t *azn) { amdzen_df_t *df = &azn->azn_dfs[0]; uint32_t ccdno = 0; for (uint_t i = 0; i < df->adf_nents; i++) { const amdzen_df_ent_t *ent = &df->adf_ents[i]; if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) continue; if (amdzen_dfe_is_ccm(df, ent)) { uint32_t val, nccx, pkg7, pkg0; smn_reg_t pkg7_reg, pkg0_reg; int core_bit; uint8_t pccxno, pcoreno; if (!amdzen_ccd_present(azn, df, ccdno)) { ccdno++; continue; } /* * This die actually exists. Switch over to the core * enable register to find one to ask about physically. */ amdzen_ccd_info(azn, df, ccdno, &nccx, NULL, NULL); val = amdzen_ccd_core_en(azn, df, ccdno); if (val == 0) { ccdno++; continue; } /* * There exists an enabled physical core. Find the first * index of it and map it to the corresponding CCD and * CCX. ddi_ffs is the bit index, but we want the * physical core number, hence the -1. */ core_bit = ddi_ffs(val); ASSERT3S(core_bit, !=, 0); pcoreno = core_bit - 1; /* * Unfortunately SMU::PWR::THREAD_CONFIGURATION gives us * the Number of logical cores that are present in the * complex, not the total number of physical cores. * Right now we do assume that the physical and logical * ccx numbering is equivalent (we have no other way of * knowing if it is or isn't right now) and that we'd * always have CCX0 before CCX1. AMD seems to suggest we * can assume this, though it is a worrisome assumption. */ pccxno = pcoreno / azn->azn_ncore_per_ccx; ASSERT3U(pccxno, <, nccx); pkg7_reg = SCFCTP_PMREG_INITPKG7(ccdno, pccxno, pcoreno); pkg7 = amdzen_smn_read(azn, df, pkg7_reg); pkg0_reg = SCFCTP_PMREG_INITPKG0(ccdno, pccxno, pcoreno); pkg0 = amdzen_smn_read(azn, df, pkg0_reg); amdzen_initpkg_to_apic(azn, pkg0, pkg7); return (B_TRUE); } } return (B_FALSE); } /* * We have the fun job of trying to figure out what the correct form of the APIC * decomposition should be and how to break that into its logical components. * The way that we get at this is generation-specific unfortunately. Here's how * it works out: * * Zen 1-2 This era of CPUs are deceptively simple. The PPR for a given * family defines exactly how the APIC ID is broken into logical * components and it's fixed. That is, depending on whether or * not SMT is enabled. Zen 1 and Zen 2 use different schemes for * constructing this. The way that we're supposed to check if SMT * is enabled is to use AMD leaf 8X1E and ask how many threads per * core there are. We use the x86 feature set to determine that * instead. * * More specifically the Zen 1 scheme is 7 bits long. The bits have * the following meanings. * * [6] Socket ID * [5:4] Node ID * [3] Logical CCX ID * With SMT Without SMT * [2:1] Logical Core ID [2] hardcoded to zero * [0] Thread ID [1:0] Logical Core ID * * The following is the Zen 2 scheme assuming SMT. The Zen 2 scheme * without SMT shifts everything to the right by one bit. * * [7] Socket ID * [6:4] Logical CCD ID * [3] Logical CCX ID * [2:1] Logical Core ID * [0] Thread ID * * Zen 3 Zen 3 CPUs moved past the fixed APIC ID format that Zen 1 and * Zen 2 had, but also don't give us the nice way of discovering * this via CPUID that Zen 4 did. The APIC ID id uses a given * number of bits for each logical component that exists, but the * exact number varies based on what's actually present. To get at * this we use a piece of data that is embedded in the SCFCTP * (Scalable Control Fabric, Clocks, Test, Power Gating). This can * be used to determine how many logical entities of each kind the * system thinks exist. While we could use the various CPUID * topology items to try to speed this up, they don't tell us the * die information that we need to do this. * * Zen 4+ Zen 4 introduced CPUID leaf 8000_0026h which gives us a means * for determining how to extract the CCD, CCX, and related pieces * out of the device. One thing we have to be aware of is that when * the CCD and CCX shift are the same, that means that there is * only a single CCX and therefore have to take that into account * appropriately. This is the case generally on Zen 4 platforms, * but not on Bergamo. Until we can confirm the actual CPUID leaf * values that we receive in the cases of Bergamo and others, we * opt instead to use the same SCFCTP scheme as Zen 3. */ static boolean_t amdzen_determine_apic_decomp(amdzen_t *azn) { amdzen_apic_decomp_t *apic = &azn->azn_apic_decomp; boolean_t smt = is_x86_feature(x86_featureset, X86FSET_HTT); switch (uarchrev_uarch(azn->azn_uarchrev)) { case X86_UARCH_AMD_ZEN1: case X86_UARCH_AMD_ZENPLUS: apic->aad_sock_mask = 0x40; apic->aad_sock_shift = 6; apic->aad_die_mask = 0x30; apic->aad_die_shift = 4; apic->aad_ccd_mask = 0; apic->aad_ccd_shift = 0; apic->aad_ccx_mask = 0x08; apic->aad_ccx_shift = 3; if (smt) { apic->aad_core_mask = 0x06; apic->aad_core_shift = 1; apic->aad_thread_mask = 0x1; apic->aad_thread_shift = 0; } else { apic->aad_core_mask = 0x03; apic->aad_core_shift = 0; apic->aad_thread_mask = 0; apic->aad_thread_shift = 0; } break; case X86_UARCH_AMD_ZEN2: if (smt) { apic->aad_sock_mask = 0x80; apic->aad_sock_shift = 7; apic->aad_die_mask = 0; apic->aad_die_shift = 0; apic->aad_ccd_mask = 0x70; apic->aad_ccd_shift = 4; apic->aad_ccx_mask = 0x08; apic->aad_ccx_shift = 3; apic->aad_core_mask = 0x06; apic->aad_core_shift = 1; apic->aad_thread_mask = 0x01; apic->aad_thread_shift = 0; } else { apic->aad_sock_mask = 0x40; apic->aad_sock_shift = 6; apic->aad_die_mask = 0; apic->aad_die_shift = 0; apic->aad_ccd_mask = 0x38; apic->aad_ccd_shift = 3; apic->aad_ccx_mask = 0x04; apic->aad_ccx_shift = 2; apic->aad_core_mask = 0x3; apic->aad_core_shift = 0; apic->aad_thread_mask = 0; apic->aad_thread_shift = 0; } break; case X86_UARCH_AMD_ZEN3: case X86_UARCH_AMD_ZEN4: case X86_UARCH_AMD_ZEN5: return (amdzen_determine_apic_decomp_initpkg(azn)); default: return (B_FALSE); } return (B_TRUE); } /* * Snapshot the number of cores that can exist in a CCX based on the Zen * microarchitecture revision. In Zen 1-4 this has been a constant number * regardless of the actual CPU Family. In Zen 5 this varies based upon whether * or not dense dies are being used. */ static void amdzen_determine_ncore_per_ccx(amdzen_t *azn) { switch (uarchrev_uarch(azn->azn_uarchrev)) { case X86_UARCH_AMD_ZEN1: case X86_UARCH_AMD_ZENPLUS: case X86_UARCH_AMD_ZEN2: azn->azn_ncore_per_ccx = 4; break; case X86_UARCH_AMD_ZEN3: case X86_UARCH_AMD_ZEN4: azn->azn_ncore_per_ccx = 8; break; case X86_UARCH_AMD_ZEN5: if (chiprev_family(azn->azn_chiprev) == X86_PF_AMD_DENSE_TURIN) { azn->azn_ncore_per_ccx = 16; } else { azn->azn_ncore_per_ccx = 8; } break; default: panic("asked about non-Zen or unknown uarch"); } } /* * Attempt to determine a logical CCD number of a given CCD where we don't have * hardware support for L3::SCFCTP::PMREG_INITPKG* (e.g. pre-Zen 3 systems). * The CCD numbers that we have are the in the physical space. Likely because of * how the orientation of CCM numbers map to physical locations and the layout * of them within the package, we haven't found a good way using the core DFv3 * registers to determine if a given CCD is actually present or not as generally * all the CCMs are left enabled. Instead we use SMU::PWR::DIE_ID as a proxy to * determine CCD presence. */ static uint32_t amdzen_ccd_log_id_zen2(amdzen_t *azn, amdzen_df_t *df, const amdzen_df_ent_t *targ) { uint32_t smnid = 0; uint32_t logid = 0; for (uint_t i = 0; i < df->adf_nents; i++) { const amdzen_df_ent_t *ent = &df->adf_ents[i]; if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) { continue; } if (ent->adfe_inst_id == targ->adfe_inst_id) { return (logid); } if (ent->adfe_type == targ->adfe_type && ent->adfe_subtype == targ->adfe_subtype) { boolean_t present = amdzen_ccd_present(azn, df, smnid); smnid++; if (present) { logid++; } } } panic("asked to match against invalid DF entity %p in df %p", targ, df); } static void amdzen_ccd_fill_core_initpkg0(amdzen_t *azn, amdzen_df_t *df, amdzen_topo_ccd_t *ccd, amdzen_topo_ccx_t *ccx, amdzen_topo_core_t *core, boolean_t *ccd_set, boolean_t *ccx_set) { smn_reg_t pkg0_reg; uint32_t pkg0; pkg0_reg = SCFCTP_PMREG_INITPKG0(ccd->atccd_phys_no, ccx->atccx_phys_no, core->atcore_phys_no); pkg0 = amdzen_smn_read(azn, df, pkg0_reg); core->atcore_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CORE(pkg0); if (!*ccx_set) { ccx->atccx_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_CCX(pkg0); *ccx_set = B_TRUE; } if (!*ccd_set) { ccd->atccd_log_no = SCFCTP_PMREG_INITPKG0_GET_LOG_DIE(pkg0); *ccd_set = B_TRUE; } } /* * Attempt to fill in the physical topology information for this given CCD. * There are a few steps to this that we undertake to perform this as follows: * * 1) First we determine whether the CCD is actually present or not by reading * SMU::PWR::DIE_ID. CCDs that are not installed will still have an enabled DF * entry it appears, but the request for the die ID will returns an invalid * read (all 1s). This die ID should match what we think of as the SMN number * below. If not, we're in trouble and the rest of this is in question. * * 2) We use the SMU::PWR registers to determine how many logical and physical * cores are present in this CCD and how they are split amongst the CCX. Here we * need to encode the CPU to CCX core size rankings. Through this process we * determine and fill out which threads and cores are enabled. * * 3) In Zen 3+ we then will read each core's INITPK0 values to ensure that we * have a proper physical to logical mapping, at which point we can fill in the * APIC IDs. For Zen 2, we will set the AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN to * indicate that we just mapped the first logical processor to the first enabled * core. * * 4) Once we have the logical IDs determined we will construct the APIC ID that * we expect this to have. * * Steps (2) - (4) are intertwined and done together. */ static void amdzen_ccd_fill_topo(amdzen_t *azn, amdzen_df_t *df, amdzen_df_ent_t *ent, amdzen_topo_ccd_t *ccd) { uint32_t nccx, core_en, thread_en; uint32_t nlcore_per_ccx, nthreads_per_core; uint32_t sockid, dieid, compid; const uint32_t ccdno = ccd->atccd_phys_no; const x86_uarch_t uarch = uarchrev_uarch(azn->azn_uarchrev); boolean_t pkg0_ids, logccd_set = B_FALSE; ASSERT(MUTEX_HELD(&azn->azn_mutex)); if (!amdzen_ccd_present(azn, df, ccdno)) { ccd->atccd_err = AMDZEN_TOPO_CCD_E_CCD_MISSING; return; } amdzen_ccd_info(azn, df, ccdno, &nccx, &nlcore_per_ccx, &nthreads_per_core); ASSERT3U(nccx, <=, AMDZEN_TOPO_CCD_MAX_CCX); core_en = amdzen_ccd_core_en(azn, df, ccdno); thread_en = amdzen_ccd_thread_en(azn, df, ccdno); /* * The BSP is never enabled in a conventional sense and therefore the * bit is reserved and left as 0. As the BSP should be in the first CCD, * we go through and OR back in the bit lest we think the thread isn't * enabled. */ if (ccdno == 0) { thread_en |= 1; } ccd->atccd_phys_no = ccdno; if (uarch >= X86_UARCH_AMD_ZEN3) { pkg0_ids = B_TRUE; } else { ccd->atccd_flags |= AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN; pkg0_ids = B_FALSE; /* * Determine the CCD logical ID for Zen 2 now since this doesn't * rely upon needing a valid physical core. */ ccd->atccd_log_no = amdzen_ccd_log_id_zen2(azn, df, ent); logccd_set = B_TRUE; } /* * To construct the APIC ID we need to know the socket and die (not CCD) * this is on. We deconstruct the CCD's fabric ID to determine that. */ zen_fabric_id_decompose(&df->adf_decomp, ent->adfe_fabric_id, &sockid, &dieid, &compid); /* * At this point we have all the information about the CCD, the number * of CCX instances, and which physical cores and threads are enabled. * Currently we assume that if we have one CCX enabled, then it is * always CCX0. We cannot find evidence of a two CCX supporting part * that doesn't always ship with both CCXs present and enabled. */ ccd->atccd_nlog_ccx = ccd->atccd_nphys_ccx = nccx; for (uint32_t ccxno = 0; ccxno < nccx; ccxno++) { amdzen_topo_ccx_t *ccx = &ccd->atccd_ccx[ccxno]; const uint32_t core_mask = (1 << azn->azn_ncore_per_ccx) - 1; const uint32_t core_shift = ccxno * azn->azn_ncore_per_ccx; const uint32_t ccx_core_en = (core_en >> core_shift) & core_mask; boolean_t logccx_set = B_FALSE; ccd->atccd_ccx_en[ccxno] = 1; ccx->atccx_phys_no = ccxno; ccx->atccx_nphys_cores = azn->azn_ncore_per_ccx; ccx->atccx_nlog_cores = nlcore_per_ccx; if (!pkg0_ids) { ccx->atccx_log_no = ccx->atccx_phys_no; logccx_set = B_TRUE; } for (uint32_t coreno = 0, logcorezen2 = 0; coreno < azn->azn_ncore_per_ccx; coreno++) { amdzen_topo_core_t *core = &ccx->atccx_cores[coreno]; if ((ccx_core_en & (1 << coreno)) == 0) { continue; } ccx->atccx_core_en[coreno] = 1; core->atcore_phys_no = coreno; /* * Now that we have the physical core number present, we * must determine the logical core number and fill out * the logical CCX/CCD if it has not been set. We must * do this before we attempt to look at which threads * are enabled, because that operates based upon logical * core number. * * For Zen 2 we do not have INITPKG0 at our disposal. We * currently assume (and tag for userland with the * AMDZEN_TOPO_CCD_F_CORE_PHYS_UNKNOWN flag) that we are * mapping logical cores to physicals in the order of * appearance. */ if (pkg0_ids) { amdzen_ccd_fill_core_initpkg0(azn, df, ccd, ccx, core, &logccd_set, &logccx_set); } else { core->atcore_log_no = logcorezen2; logcorezen2++; } /* * Determining which bits to use for the thread is a bit * weird here. Thread IDs within a CCX are logical, but * there are always physically spaced CCX sizes. See the * comment at the definition for SMU::PWR::THREAD_ENABLE * for more information. */ const uint32_t thread_shift = (ccx->atccx_nphys_cores * ccx->atccx_log_no + core->atcore_log_no) * nthreads_per_core; const uint32_t thread_mask = (nthreads_per_core << 1) - 1; const uint32_t core_thread_en = (thread_en >> thread_shift) & thread_mask; core->atcore_nthreads = nthreads_per_core; core->atcore_thr_en[0] = core_thread_en & 0x01; core->atcore_thr_en[1] = core_thread_en & 0x02; #ifdef DEBUG if (nthreads_per_core == 1) { VERIFY0(core->atcore_thr_en[1]); } #endif for (uint32_t thrno = 0; thrno < core->atcore_nthreads; thrno++) { ASSERT3U(core->atcore_thr_en[thrno], !=, 0); zen_apic_id_compose(&azn->azn_apic_decomp, sockid, dieid, ccd->atccd_log_no, ccx->atccx_log_no, core->atcore_log_no, thrno, &core->atcore_apicids[thrno]); } } ASSERT3U(logccx_set, ==, B_TRUE); ASSERT3U(logccd_set, ==, B_TRUE); } } static void amdzen_nexus_init(void *arg) { uint_t i; amdzen_t *azn = arg; /* * Assign the requisite identifying information for this CPU. */ azn->azn_uarchrev = cpuid_getuarchrev(CPU); azn->azn_chiprev = cpuid_getchiprev(CPU); /* * Go through all of the stubs and assign the DF entries. */ mutex_enter(&azn->azn_mutex); if (!amdzen_map_dfs(azn) || !amdzen_check_dfs(azn)) { azn->azn_flags |= AMDZEN_F_MAP_ERROR; goto done; } for (i = 0; i < AMDZEN_MAX_DFS; i++) { amdzen_df_t *df = &azn->azn_dfs[i]; if ((df->adf_flags & AMDZEN_DF_F_VALID) == 0) continue; amdzen_setup_df(azn, df); amdzen_find_nb(azn, df); } amdzen_determine_ncore_per_ccx(azn); if (amdzen_determine_apic_decomp(azn)) { azn->azn_flags |= AMDZEN_F_APIC_DECOMP_VALID; } /* * Not all children may be installed. As such, we do not treat the * failure of a child as fatal to the driver. */ mutex_exit(&azn->azn_mutex); for (i = 0; i < ARRAY_SIZE(amdzen_children); i++) { (void) amdzen_create_child(azn, &amdzen_children[i]); } mutex_enter(&azn->azn_mutex); done: azn->azn_flags &= ~AMDZEN_F_ATTACH_DISPATCHED; azn->azn_flags |= AMDZEN_F_ATTACH_COMPLETE; azn->azn_taskqid = TASKQID_INVALID; cv_broadcast(&azn->azn_cv); mutex_exit(&azn->azn_mutex); } static int amdzen_stub_scan_cb(dev_info_t *dip, void *arg) { amdzen_t *azn = arg; uint16_t vid, did; int *regs; uint_t nregs, i; boolean_t match = B_FALSE; if (dip == ddi_root_node()) { return (DDI_WALK_CONTINUE); } /* * If a node in question is not a pci node, then we have no interest in * it as all the stubs that we care about are related to pci devices. */ if (strncmp("pci", ddi_get_name(dip), 3) != 0) { return (DDI_WALK_PRUNECHILD); } /* * If we can't get a device or vendor ID and prove that this is an AMD * part, then we don't care about it. */ vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "vendor-id", PCI_EINVAL16); did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "device-id", PCI_EINVAL16); if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { return (DDI_WALK_CONTINUE); } if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { return (DDI_WALK_CONTINUE); } for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { if (amdzen_nb_ids[i] == did) { match = B_TRUE; } } if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { return (DDI_WALK_CONTINUE); } if (nregs == 0) { ddi_prop_free(regs); return (DDI_WALK_CONTINUE); } if (PCI_REG_BUS_G(regs[0]) == AMDZEN_DF_BUSNO && PCI_REG_DEV_G(regs[0]) >= AMDZEN_DF_FIRST_DEVICE) { match = B_TRUE; } ddi_prop_free(regs); if (match) { mutex_enter(&azn->azn_mutex); azn->azn_nscanned++; mutex_exit(&azn->azn_mutex); } return (DDI_WALK_CONTINUE); } static void amdzen_stub_scan(void *arg) { amdzen_t *azn = arg; mutex_enter(&azn->azn_mutex); azn->azn_nscanned = 0; mutex_exit(&azn->azn_mutex); ddi_walk_devs(ddi_root_node(), amdzen_stub_scan_cb, azn); mutex_enter(&azn->azn_mutex); azn->azn_flags &= ~AMDZEN_F_SCAN_DISPATCHED; azn->azn_flags |= AMDZEN_F_SCAN_COMPLETE; if (azn->azn_nscanned == 0) { azn->azn_flags |= AMDZEN_F_UNSUPPORTED; azn->azn_taskqid = TASKQID_INVALID; cv_broadcast(&azn->azn_cv); } else if (azn->azn_npresent == azn->azn_nscanned) { azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_nexus_init, azn, TQ_SLEEP); } mutex_exit(&azn->azn_mutex); } /* * Unfortunately we can't really let the stubs detach as we may need them to be * available for client operations. We may be able to improve this if we know * that the actual nexus is going away. However, as long as it's active, we need * all the stubs. */ int amdzen_detach_stub(dev_info_t *dip, ddi_detach_cmd_t cmd) { if (cmd == DDI_SUSPEND) { return (DDI_SUCCESS); } return (DDI_FAILURE); } int amdzen_attach_stub(dev_info_t *dip, ddi_attach_cmd_t cmd) { int *regs, reg; uint_t nregs, i; uint16_t vid, did; amdzen_stub_t *stub; amdzen_t *azn = amdzen_data; boolean_t valid = B_FALSE; boolean_t nb = B_FALSE; if (cmd == DDI_RESUME) { return (DDI_SUCCESS); } else if (cmd != DDI_ATTACH) { return (DDI_FAILURE); } /* * Make sure that the stub that we've been asked to attach is a pci type * device. If not, then there is no reason for us to proceed. */ if (strncmp("pci", ddi_get_name(dip), 3) != 0) { dev_err(dip, CE_WARN, "asked to attach a bad AMD Zen nexus " "stub: %s", ddi_get_name(dip)); return (DDI_FAILURE); } vid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "vendor-id", PCI_EINVAL16); did = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "device-id", PCI_EINVAL16); if (vid == PCI_EINVAL16 || did == PCI_EINVAL16) { dev_err(dip, CE_WARN, "failed to get PCI ID properties"); return (DDI_FAILURE); } if (vid != AMDZEN_PCI_VID_AMD && vid != AMDZEN_PCI_VID_HYGON) { dev_err(dip, CE_WARN, "expected vendor ID (0x%x), found 0x%x", cpuid_getvendor(CPU) == X86_VENDOR_HYGON ? AMDZEN_PCI_VID_HYGON : AMDZEN_PCI_VID_AMD, vid); return (DDI_FAILURE); } if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, "reg", ®s, &nregs) != DDI_PROP_SUCCESS) { dev_err(dip, CE_WARN, "failed to get 'reg' property"); return (DDI_FAILURE); } if (nregs == 0) { ddi_prop_free(regs); dev_err(dip, CE_WARN, "missing 'reg' property values"); return (DDI_FAILURE); } reg = *regs; ddi_prop_free(regs); for (i = 0; i < ARRAY_SIZE(amdzen_nb_ids); i++) { if (amdzen_nb_ids[i] == did) { valid = B_TRUE; nb = B_TRUE; } } if (!valid && PCI_REG_BUS_G(reg) == AMDZEN_DF_BUSNO && PCI_REG_DEV_G(reg) >= AMDZEN_DF_FIRST_DEVICE) { valid = B_TRUE; nb = B_FALSE; } if (!valid) { dev_err(dip, CE_WARN, "device %s didn't match the nexus list", ddi_get_name(dip)); return (DDI_FAILURE); } stub = kmem_alloc(sizeof (amdzen_stub_t), KM_SLEEP); if (pci_config_setup(dip, &stub->azns_cfgspace) != DDI_SUCCESS) { dev_err(dip, CE_WARN, "failed to set up config space"); kmem_free(stub, sizeof (amdzen_stub_t)); return (DDI_FAILURE); } stub->azns_dip = dip; stub->azns_vid = vid; stub->azns_did = did; stub->azns_bus = PCI_REG_BUS_G(reg); stub->azns_dev = PCI_REG_DEV_G(reg); stub->azns_func = PCI_REG_FUNC_G(reg); ddi_set_driver_private(dip, stub); mutex_enter(&azn->azn_mutex); azn->azn_npresent++; if (nb) { list_insert_tail(&azn->azn_nb_stubs, stub); } else { list_insert_tail(&azn->azn_df_stubs, stub); } if ((azn->azn_flags & AMDZEN_F_TASKQ_MASK) == AMDZEN_F_SCAN_COMPLETE && azn->azn_nscanned == azn->azn_npresent) { azn->azn_flags |= AMDZEN_F_ATTACH_DISPATCHED; azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_nexus_init, azn, TQ_SLEEP); } mutex_exit(&azn->azn_mutex); return (DDI_SUCCESS); } static int amdzen_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop, void *arg, void *result) { char buf[32]; dev_info_t *child; const amdzen_child_data_t *acd; switch (ctlop) { case DDI_CTLOPS_REPORTDEV: if (rdip == NULL) { return (DDI_FAILURE); } cmn_err(CE_CONT, "amdzen nexus: %s@%s, %s%d\n", ddi_node_name(rdip), ddi_get_name_addr(rdip), ddi_driver_name(rdip), ddi_get_instance(rdip)); break; case DDI_CTLOPS_INITCHILD: child = arg; if (child == NULL) { dev_err(dip, CE_WARN, "!no child passed for " "DDI_CTLOPS_INITCHILD"); } acd = ddi_get_parent_data(child); if (acd == NULL) { dev_err(dip, CE_WARN, "!missing child parent data"); return (DDI_FAILURE); } if (snprintf(buf, sizeof (buf), "%d", acd->acd_addr) >= sizeof (buf)) { dev_err(dip, CE_WARN, "!failed to construct device " "addr due to overflow"); return (DDI_FAILURE); } ddi_set_name_addr(child, buf); break; case DDI_CTLOPS_UNINITCHILD: child = arg; if (child == NULL) { dev_err(dip, CE_WARN, "!no child passed for " "DDI_CTLOPS_UNINITCHILD"); } ddi_set_name_addr(child, NULL); break; default: return (ddi_ctlops(dip, rdip, ctlop, arg, result)); } return (DDI_SUCCESS); } static int amdzen_topo_open(dev_t *devp, int flag, int otyp, cred_t *credp) { minor_t m; amdzen_t *azn = amdzen_data; if (crgetzoneid(credp) != GLOBAL_ZONEID || secpolicy_sys_config(credp, B_FALSE) != 0) { return (EPERM); } if ((flag & (FEXCL | FNDELAY | FNONBLOCK)) != 0) { return (EINVAL); } if (otyp != OTYP_CHR) { return (EINVAL); } m = getminor(*devp); if (m != AMDZEN_MINOR_TOPO) { return (ENXIO); } mutex_enter(&azn->azn_mutex); if ((azn->azn_flags & AMDZEN_F_IOCTL_MASK) != AMDZEN_F_ATTACH_COMPLETE) { mutex_exit(&azn->azn_mutex); return (ENOTSUP); } mutex_exit(&azn->azn_mutex); return (0); } static int amdzen_topo_ioctl_base(amdzen_t *azn, intptr_t arg, int mode) { amdzen_topo_base_t base; bzero(&base, sizeof (base)); mutex_enter(&azn->azn_mutex); base.atb_ndf = azn->azn_ndfs; if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { mutex_exit(&azn->azn_mutex); return (ENOTSUP); } base.atb_apic_decomp = azn->azn_apic_decomp; for (uint_t i = 0; i < azn->azn_ndfs; i++) { const amdzen_df_t *df = &azn->azn_dfs[i]; base.atb_maxdfent = MAX(base.atb_maxdfent, df->adf_nents); if (i == 0) { base.atb_rev = df->adf_rev; base.atb_df_decomp = df->adf_decomp; } } mutex_exit(&azn->azn_mutex); if (ddi_copyout(&base, (void *)(uintptr_t)arg, sizeof (base), mode & FKIOCTL) != 0) { return (EFAULT); } return (0); } /* * Fill in the peers. We only have this information prior to DF 4D2. The way we * do is this is to just fill in all the entries and then zero out the ones that * aren't valid. */ static void amdzen_topo_ioctl_df_fill_peers(const amdzen_df_t *df, const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent) { topo_ent->atde_npeers = DF_FBIINFO0_GET_FTI_PCNT(ent->adfe_info0); if (df->adf_rev >= DF_REV_4D2) { bzero(topo_ent->atde_peers, sizeof (topo_ent->atde_npeers)); return; } topo_ent->atde_peers[0] = DF_FBINFO1_GET_FTI0_NINSTID(ent->adfe_info1); topo_ent->atde_peers[1] = DF_FBINFO1_GET_FTI1_NINSTID(ent->adfe_info1); topo_ent->atde_peers[2] = DF_FBINFO1_GET_FTI2_NINSTID(ent->adfe_info1); topo_ent->atde_peers[3] = DF_FBINFO1_GET_FTI3_NINSTID(ent->adfe_info1); topo_ent->atde_peers[4] = DF_FBINFO2_GET_FTI4_NINSTID(ent->adfe_info2); topo_ent->atde_peers[5] = DF_FBINFO2_GET_FTI5_NINSTID(ent->adfe_info2); for (uint32_t i = topo_ent->atde_npeers; i < AMDZEN_TOPO_DF_MAX_PEERS; i++) { topo_ent->atde_peers[i] = 0; } } static void amdzen_topo_ioctl_df_fill_ccm(const amdzen_df_ent_t *ent, amdzen_topo_df_ent_t *topo_ent) { const amdzen_ccm_data_t *ccm = &ent->adfe_data.aded_ccm; amdzen_topo_ccm_data_t *topo_ccm = &topo_ent->atde_data.atded_ccm; topo_ccm->atcd_nccds = ccm->acd_nccds; for (uint32_t i = 0; i < DF_MAX_CCDS_PER_CCM; i++) { topo_ccm->atcd_ccd_en[i] = ccm->acd_ccd_en[i]; topo_ccm->atcd_ccd_ids[i] = ccm->acd_ccd_id[i]; } } static int amdzen_topo_ioctl_df(amdzen_t *azn, intptr_t arg, int mode) { uint_t model; uint32_t max_ents, nwritten; const amdzen_df_t *df; amdzen_topo_df_t topo_df; #ifdef _MULTI_DATAMODEL amdzen_topo_df32_t topo_df32; #endif model = ddi_model_convert_from(mode); switch (model) { #ifdef _MULTI_DATAMODEL case DDI_MODEL_ILP32: if (ddi_copyin((void *)(uintptr_t)arg, &topo_df32, sizeof (topo_df32), mode & FKIOCTL) != 0) { return (EFAULT); } bzero(&topo_df, sizeof (topo_df)); topo_df.atd_dfno = topo_df32.atd_dfno; topo_df.atd_df_buf_nents = topo_df32.atd_df_buf_nents; topo_df.atd_df_ents = (void *)(uintptr_t)topo_df32.atd_df_ents; break; #endif case DDI_MODEL_NONE: if (ddi_copyin((void *)(uintptr_t)arg, &topo_df, sizeof (topo_df), mode & FKIOCTL) != 0) { return (EFAULT); } break; default: return (ENOTSUP); } mutex_enter(&azn->azn_mutex); if (topo_df.atd_dfno >= azn->azn_ndfs) { mutex_exit(&azn->azn_mutex); return (EINVAL); } df = &azn->azn_dfs[topo_df.atd_dfno]; topo_df.atd_nodeid = df->adf_nodeid; topo_df.atd_sockid = (df->adf_nodeid & df->adf_decomp.dfd_sock_mask) >> df->adf_decomp.dfd_sock_shift; topo_df.atd_dieid = (df->adf_nodeid & df->adf_decomp.dfd_die_mask) >> df->adf_decomp.dfd_die_shift; topo_df.atd_rev = df->adf_rev; topo_df.atd_major = df->adf_major; topo_df.atd_minor = df->adf_minor; topo_df.atd_df_act_nents = df->adf_nents; max_ents = MIN(topo_df.atd_df_buf_nents, df->adf_nents); if (topo_df.atd_df_ents == NULL) { topo_df.atd_df_buf_nvalid = 0; mutex_exit(&azn->azn_mutex); goto copyout; } nwritten = 0; for (uint32_t i = 0; i < max_ents; i++) { amdzen_topo_df_ent_t topo_ent; const amdzen_df_ent_t *ent = &df->adf_ents[i]; /* * We opt not to include disabled elements right now. They * generally don't have a valid type and there isn't much useful * information we can get from them. This can be changed if we * find a use case for them for userland topo. */ if ((ent->adfe_flags & AMDZEN_DFE_F_ENABLED) == 0) continue; bzero(&topo_ent, sizeof (topo_ent)); topo_ent.atde_type = ent->adfe_type; topo_ent.atde_subtype = ent->adfe_subtype; topo_ent.atde_fabric_id = ent->adfe_fabric_id; topo_ent.atde_inst_id = ent->adfe_inst_id; amdzen_topo_ioctl_df_fill_peers(df, ent, &topo_ent); if (amdzen_dfe_is_ccm(df, ent)) { amdzen_topo_ioctl_df_fill_ccm(ent, &topo_ent); } if (ddi_copyout(&topo_ent, &topo_df.atd_df_ents[nwritten], sizeof (topo_ent), mode & FKIOCTL) != 0) { mutex_exit(&azn->azn_mutex); return (EFAULT); } nwritten++; } mutex_exit(&azn->azn_mutex); topo_df.atd_df_buf_nvalid = nwritten; copyout: switch (model) { #ifdef _MULTI_DATAMODEL case DDI_MODEL_ILP32: topo_df32.atd_nodeid = topo_df.atd_nodeid; topo_df32.atd_sockid = topo_df.atd_sockid; topo_df32.atd_dieid = topo_df.atd_dieid; topo_df32.atd_rev = topo_df.atd_rev; topo_df32.atd_major = topo_df.atd_major; topo_df32.atd_minor = topo_df.atd_minor; topo_df32.atd_df_buf_nvalid = topo_df.atd_df_buf_nvalid; topo_df32.atd_df_act_nents = topo_df.atd_df_act_nents; if (ddi_copyout(&topo_df32, (void *)(uintptr_t)arg, sizeof (topo_df32), mode & FKIOCTL) != 0) { return (EFAULT); } break; #endif case DDI_MODEL_NONE: if (ddi_copyout(&topo_df, (void *)(uintptr_t)arg, sizeof (topo_df), mode & FKIOCTL) != 0) { return (EFAULT); } break; default: break; } return (0); } static int amdzen_topo_ioctl_ccd(amdzen_t *azn, intptr_t arg, int mode) { amdzen_topo_ccd_t ccd, *ccdp; amdzen_df_t *df; amdzen_df_ent_t *ent; amdzen_ccm_data_t *ccm; uint32_t ccdno; size_t copyin_size = offsetof(amdzen_topo_ccd_t, atccd_err); /* * Only copy in the identifying information so that way we can ensure * the rest of the structure we return to the user doesn't contain * anything unexpected in it. */ bzero(&ccd, sizeof (ccd)); if (ddi_copyin((void *)(uintptr_t)arg, &ccd, copyin_size, mode & FKIOCTL) != 0) { return (EFAULT); } mutex_enter(&azn->azn_mutex); if ((azn->azn_flags & AMDZEN_F_APIC_DECOMP_VALID) == 0) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_NO_APIC_DECOMP; goto copyout; } df = amdzen_df_find(azn, ccd.atccd_dfno); if (df == NULL) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_DFNO; goto copyout; } /* * We don't have enough information to know how to construct this * information in Zen 1 at this time, so refuse. */ if (df->adf_rev <= DF_REV_2) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_SOC_UNSUPPORTED; goto copyout; } ent = amdzen_df_ent_find_by_instid(df, ccd.atccd_instid); if (ent == NULL) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_BAD_INSTID; goto copyout; } if (!amdzen_dfe_is_ccm(df, ent)) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; goto copyout; } ccm = &ent->adfe_data.aded_ccm; for (ccdno = 0; ccdno < DF_MAX_CCDS_PER_CCM; ccdno++) { if (ccm->acd_ccd_en[ccdno] != 0 && ccm->acd_ccd_id[ccdno] == ccd.atccd_phys_no) { break; } } if (ccdno == DF_MAX_CCDS_PER_CCM) { ccd.atccd_err = AMDZEN_TOPO_CCD_E_NOT_A_CCD; goto copyout; } if (ccm->acd_ccd_data[ccdno] == NULL) { /* * We don't actually have this data. Go fill it out and save it * for future use. */ ccdp = kmem_zalloc(sizeof (amdzen_topo_ccd_t), KM_NOSLEEP_LAZY); if (ccdp == NULL) { mutex_exit(&azn->azn_mutex); return (ENOMEM); } ccdp->atccd_dfno = ccd.atccd_dfno; ccdp->atccd_instid = ccd.atccd_instid; ccdp->atccd_phys_no = ccd.atccd_phys_no; amdzen_ccd_fill_topo(azn, df, ent, ccdp); ccm->acd_ccd_data[ccdno] = ccdp; } ASSERT3P(ccm->acd_ccd_data[ccdno], !=, NULL); bcopy(ccm->acd_ccd_data[ccdno], &ccd, sizeof (ccd)); copyout: mutex_exit(&azn->azn_mutex); if (ddi_copyout(&ccd, (void *)(uintptr_t)arg, sizeof (ccd), mode & FKIOCTL) != 0) { return (EFAULT); } return (0); } static int amdzen_topo_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) { int ret; amdzen_t *azn = amdzen_data; if (getminor(dev) != AMDZEN_MINOR_TOPO) { return (ENXIO); } if ((mode & FREAD) == 0) { return (EBADF); } switch (cmd) { case AMDZEN_TOPO_IOCTL_BASE: ret = amdzen_topo_ioctl_base(azn, arg, mode); break; case AMDZEN_TOPO_IOCTL_DF: ret = amdzen_topo_ioctl_df(azn, arg, mode); break; case AMDZEN_TOPO_IOCTL_CCD: ret = amdzen_topo_ioctl_ccd(azn, arg, mode); break; default: ret = ENOTTY; break; } return (ret); } static int amdzen_topo_close(dev_t dev, int flag, int otyp, cred_t *credp) { if (otyp != OTYP_CHR) { return (EINVAL); } if (getminor(dev) != AMDZEN_MINOR_TOPO) { return (ENXIO); } return (0); } static int amdzen_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { amdzen_t *azn = amdzen_data; if (cmd == DDI_RESUME) { return (DDI_SUCCESS); } else if (cmd != DDI_ATTACH) { return (DDI_FAILURE); } mutex_enter(&azn->azn_mutex); if (azn->azn_dip != NULL) { dev_err(dip, CE_WARN, "driver is already attached!"); mutex_exit(&azn->azn_mutex); return (DDI_FAILURE); } if (ddi_create_minor_node(dip, "topo", S_IFCHR, AMDZEN_MINOR_TOPO, DDI_PSEUDO, 0) != 0) { dev_err(dip, CE_WARN, "failed to create topo minor node!"); mutex_exit(&azn->azn_mutex); return (DDI_FAILURE); } azn->azn_dip = dip; azn->azn_taskqid = taskq_dispatch(system_taskq, amdzen_stub_scan, azn, TQ_SLEEP); azn->azn_flags |= AMDZEN_F_SCAN_DISPATCHED; mutex_exit(&azn->azn_mutex); return (DDI_SUCCESS); } static int amdzen_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { amdzen_t *azn = amdzen_data; if (cmd == DDI_SUSPEND) { return (DDI_SUCCESS); } else if (cmd != DDI_DETACH) { return (DDI_FAILURE); } mutex_enter(&azn->azn_mutex); while (azn->azn_taskqid != TASKQID_INVALID) { cv_wait(&azn->azn_cv, &azn->azn_mutex); } /* * If we've attached any stub drivers, e.g. this platform is important * for us, then we fail detach. */ if (!list_is_empty(&azn->azn_df_stubs) || !list_is_empty(&azn->azn_nb_stubs)) { mutex_exit(&azn->azn_mutex); return (DDI_FAILURE); } ddi_remove_minor_node(azn->azn_dip, NULL); azn->azn_dip = NULL; mutex_exit(&azn->azn_mutex); return (DDI_SUCCESS); } static void amdzen_free(void) { if (amdzen_data == NULL) { return; } VERIFY(list_is_empty(&amdzen_data->azn_df_stubs)); list_destroy(&amdzen_data->azn_df_stubs); VERIFY(list_is_empty(&amdzen_data->azn_nb_stubs)); list_destroy(&amdzen_data->azn_nb_stubs); cv_destroy(&amdzen_data->azn_cv); mutex_destroy(&amdzen_data->azn_mutex); kmem_free(amdzen_data, sizeof (amdzen_t)); amdzen_data = NULL; } static void amdzen_alloc(void) { amdzen_data = kmem_zalloc(sizeof (amdzen_t), KM_SLEEP); mutex_init(&amdzen_data->azn_mutex, NULL, MUTEX_DRIVER, NULL); list_create(&amdzen_data->azn_df_stubs, sizeof (amdzen_stub_t), offsetof(amdzen_stub_t, azns_link)); list_create(&amdzen_data->azn_nb_stubs, sizeof (amdzen_stub_t), offsetof(amdzen_stub_t, azns_link)); cv_init(&amdzen_data->azn_cv, NULL, CV_DRIVER, NULL); } static struct cb_ops amdzen_topo_cb_ops = { .cb_open = amdzen_topo_open, .cb_close = amdzen_topo_close, .cb_strategy = nodev, .cb_print = nodev, .cb_dump = nodev, .cb_read = nodev, .cb_write = nodev, .cb_ioctl = amdzen_topo_ioctl, .cb_devmap = nodev, .cb_mmap = nodev, .cb_segmap = nodev, .cb_chpoll = nochpoll, .cb_prop_op = ddi_prop_op, .cb_flag = D_MP, .cb_rev = CB_REV, .cb_aread = nodev, .cb_awrite = nodev }; struct bus_ops amdzen_bus_ops = { .busops_rev = BUSO_REV, .bus_map = nullbusmap, .bus_dma_map = ddi_no_dma_map, .bus_dma_allochdl = ddi_no_dma_allochdl, .bus_dma_freehdl = ddi_no_dma_freehdl, .bus_dma_bindhdl = ddi_no_dma_bindhdl, .bus_dma_unbindhdl = ddi_no_dma_unbindhdl, .bus_dma_flush = ddi_no_dma_flush, .bus_dma_win = ddi_no_dma_win, .bus_dma_ctl = ddi_no_dma_mctl, .bus_prop_op = ddi_bus_prop_op, .bus_ctl = amdzen_bus_ctl }; static struct dev_ops amdzen_dev_ops = { .devo_rev = DEVO_REV, .devo_refcnt = 0, .devo_getinfo = nodev, .devo_identify = nulldev, .devo_probe = nulldev, .devo_attach = amdzen_attach, .devo_detach = amdzen_detach, .devo_reset = nodev, .devo_quiesce = ddi_quiesce_not_needed, .devo_bus_ops = &amdzen_bus_ops, .devo_cb_ops = &amdzen_topo_cb_ops }; static struct modldrv amdzen_modldrv = { .drv_modops = &mod_driverops, .drv_linkinfo = "AMD Zen Nexus Driver", .drv_dev_ops = &amdzen_dev_ops }; static struct modlinkage amdzen_modlinkage = { .ml_rev = MODREV_1, .ml_linkage = { &amdzen_modldrv, NULL } }; int _init(void) { int ret; if (cpuid_getvendor(CPU) != X86_VENDOR_AMD && cpuid_getvendor(CPU) != X86_VENDOR_HYGON) { return (ENOTSUP); } if ((ret = mod_install(&amdzen_modlinkage)) == 0) { amdzen_alloc(); } return (ret); } int _info(struct modinfo *modinfop) { return (mod_info(&amdzen_modlinkage, modinfop)); } int _fini(void) { int ret; if ((ret = mod_remove(&amdzen_modlinkage)) == 0) { amdzen_free(); } return (ret); }