1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitops.h> 38 #include <sys/bus.h> 39 #include <sys/cpu_topology.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/queue.h> 43 #include <sys/sensors.h> 44 45 #include <bus/pci/pcivar.h> 46 #include <bus/pci/pcireg.h> 47 #include <bus/pci/pci_cfgreg.h> 48 #include <bus/pci/pcib_private.h> 49 50 #include "pcib_if.h" 51 52 #include <dev/misc/dimm/dimm.h> 53 #include <dev/misc/ecc/e5_imc_reg.h> 54 #include <dev/misc/ecc/e5_imc_var.h> 55 56 struct ecc_e5_dimm { 57 TAILQ_ENTRY(ecc_e5_dimm) dimm_link; 58 struct dimm_softc *dimm_softc; 59 struct ksensor dimm_sensor; 60 }; 61 62 struct ecc_e5_rank { 63 struct ecc_e5_dimm *rank_dimm_sc; 64 }; 65 66 struct ecc_e5_softc { 67 device_t ecc_dev; 68 const struct e5_imc_chan *ecc_chan; 69 int ecc_node; 70 int ecc_rank_cnt; 71 struct ecc_e5_rank ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX]; 72 struct sensor_task *ecc_senstask; 73 TAILQ_HEAD(, ecc_e5_dimm) ecc_dimm; 74 }; 75 76 #define ecc_printf(sc, fmt, arg...) \ 77 device_printf((sc)->ecc_dev, fmt , ##arg) 78 79 static int ecc_e5_probe(device_t); 80 static int ecc_e5_attach(device_t); 81 static int ecc_e5_detach(device_t); 82 static void ecc_e5_shutdown(device_t); 83 84 static void ecc_e5_sensor_task(void *); 85 86 #define ECC_E5_CHAN(v, imc, c, c_ext) \ 87 { \ 88 .did = PCI_E5V##v##_IMC##imc##_ERROR_CHN##c##_DID_ID, \ 89 .slot = PCISLOT_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 90 .func = PCIFUNC_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 91 .desc = "Intel E5 v" #v " ECC", \ 92 \ 93 E5_IMC_CHAN_FIELDS(v, imc, c, c_ext) \ 94 } 95 96 #define ECC_E5_CHAN_V2(c) ECC_E5_CHAN(2, 0, c, c) 97 #define ECC_E5_CHAN_IMC0_V3(c) ECC_E5_CHAN(3, 0, c, c) 98 #define ECC_E5_CHAN_IMC1_V3(c, c_ext) ECC_E5_CHAN(3, 1, c, c_ext) 99 #define ECC_E5_CHAN_END E5_IMC_CHAN_END 100 101 static const struct e5_imc_chan ecc_e5_chans[] = { 102 ECC_E5_CHAN_V2(0), 103 ECC_E5_CHAN_V2(1), 104 ECC_E5_CHAN_V2(2), 105 ECC_E5_CHAN_V2(3), 106 107 ECC_E5_CHAN_IMC0_V3(0), 108 ECC_E5_CHAN_IMC0_V3(1), 109 ECC_E5_CHAN_IMC0_V3(2), 110 ECC_E5_CHAN_IMC0_V3(3), 111 ECC_E5_CHAN_IMC1_V3(0, 2), /* IMC1 chan0 -> channel2 */ 112 ECC_E5_CHAN_IMC1_V3(1, 3), /* IMC1 chan1 -> channel3 */ 113 114 ECC_E5_CHAN_END 115 }; 116 117 #undef ECC_E5_CHAN_END 118 #undef ECC_E5_CHAN_V2 119 #undef ECC_E5_CHAN 120 121 static device_method_t ecc_e5_methods[] = { 122 /* Device interface */ 123 DEVMETHOD(device_probe, ecc_e5_probe), 124 DEVMETHOD(device_attach, ecc_e5_attach), 125 DEVMETHOD(device_detach, ecc_e5_detach), 126 DEVMETHOD(device_shutdown, ecc_e5_shutdown), 127 DEVMETHOD(device_suspend, bus_generic_suspend), 128 DEVMETHOD(device_resume, bus_generic_resume), 129 DEVMETHOD_END 130 }; 131 132 static driver_t ecc_e5_driver = { 133 "ecc", 134 ecc_e5_methods, 135 sizeof(struct ecc_e5_softc) 136 }; 137 static devclass_t ecc_devclass; 138 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL); 139 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1); 140 MODULE_DEPEND(ecc_e5, dimm, 1, 1, 1); 141 MODULE_VERSION(ecc_e5, 1); 142 143 static int 144 ecc_e5_probe(device_t dev) 145 { 146 const struct e5_imc_chan *c; 147 uint16_t vid, did; 148 int slot, func; 149 150 vid = pci_get_vendor(dev); 151 if (vid != PCI_E5_IMC_VID_ID) 152 return ENXIO; 153 154 did = pci_get_device(dev); 155 slot = pci_get_slot(dev); 156 func = pci_get_function(dev); 157 158 for (c = ecc_e5_chans; c->desc != NULL; ++c) { 159 if (c->did == did && c->slot == slot && c->func == func) { 160 struct ecc_e5_softc *sc = device_get_softc(dev); 161 int node; 162 163 node = e5_imc_node_probe(dev, c); 164 if (node < 0) 165 break; 166 167 device_set_desc(dev, c->desc); 168 169 sc->ecc_chan = c; 170 sc->ecc_node = node; 171 return 0; 172 } 173 } 174 return ENXIO; 175 } 176 177 static int 178 ecc_e5_attach(device_t dev) 179 { 180 struct ecc_e5_softc *sc = device_get_softc(dev); 181 int dimm, rank, error, cpuid; 182 const cpu_node_t *node; 183 uint32_t mcmtr; 184 185 TAILQ_INIT(&sc->ecc_dimm); 186 sc->ecc_dev = dev; 187 188 mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, sc->ecc_chan, 189 PCI_E5_IMC_CPGC_MCMTR); 190 if (bootverbose) { 191 if (sc->ecc_chan->ver == E5_IMC_CHAN_VER3 && 192 (mcmtr & PCI_E5V3_IMC_CPGC_MCMTR_DDR4)) 193 ecc_printf(sc, "DDR4\n"); 194 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) == 195 PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3) { 196 ecc_printf(sc, "native %s\n", 197 sc->ecc_chan->ver == E5_IMC_CHAN_VER2 ? 198 "DDR3" : "DDR"); 199 } 200 } 201 202 rank = 0; 203 for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) { 204 struct ecc_e5_dimm *dimm_sc; 205 struct ksensor *sens; 206 const char *width; 207 uint32_t dimmmtr; 208 int rank_cnt, r; 209 int density; 210 int val; 211 212 dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan, 213 PCI_E5_IMC_CTAD_DIMMMTR(dimm)); 214 215 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0) 216 continue; 217 218 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT); 219 switch (val) { 220 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR: 221 rank_cnt = 1; 222 break; 223 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR: 224 rank_cnt = 2; 225 break; 226 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR: 227 rank_cnt = 4; 228 break; 229 case PCI_E5V3_IMC_CTAD_DIMMMTR_RANK_CNT_8R: 230 if (sc->ecc_chan->ver >= E5_IMC_CHAN_VER3) { 231 rank_cnt = 8; 232 break; 233 } 234 /* FALL THROUGH */ 235 default: 236 ecc_printf(sc, "unknown rank count 0x%x\n", val); 237 error = ENXIO; 238 goto failed; 239 } 240 241 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH); 242 switch (val) { 243 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4: 244 width = "x4"; 245 break; 246 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8: 247 width = "x8"; 248 break; 249 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16: 250 width = "x16"; 251 break; 252 default: 253 ecc_printf(sc, "unknown ddr3 width 0x%x\n", val); 254 error = ENXIO; 255 goto failed; 256 } 257 258 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY); 259 switch (val) { 260 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G: 261 density = 2; 262 break; 263 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G: 264 density = 4; 265 break; 266 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G: 267 density = 8; 268 break; 269 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G: 270 if (sc->ecc_chan->ver < E5_IMC_CHAN_VER3) { 271 density = 1; 272 break; 273 } 274 /* FALL THROUGH */ 275 default: 276 ecc_printf(sc, "unknown ddr3 density 0x%x\n", val); 277 error = ENXIO; 278 goto failed; 279 } 280 281 if (bootverbose) { 282 ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n", 283 dimm, density * rank_cnt * 2, 284 rank_cnt, width, density); 285 } 286 287 dimm_sc = kmalloc(sizeof(*dimm_sc), M_DEVBUF, 288 M_WAITOK | M_ZERO); 289 dimm_sc->dimm_softc = 290 dimm_create(sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 291 292 sens = &dimm_sc->dimm_sensor; 293 ksnprintf(sens->desc, sizeof(sens->desc), 294 "node%d chan%d DIMM%d ecc", 295 sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 296 sens->type = SENSOR_ECC; 297 sensor_set(sens, 0, SENSOR_S_OK); 298 dimm_sensor_attach(dimm_sc->dimm_softc, sens); 299 300 TAILQ_INSERT_TAIL(&sc->ecc_dimm, dimm_sc, dimm_link); 301 302 for (r = 0; r < rank_cnt; ++r) { 303 struct ecc_e5_rank *rk; 304 305 if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) { 306 ecc_printf(sc, "too many ranks\n"); 307 error = ENXIO; 308 goto failed; 309 } 310 311 rk = &sc->ecc_rank[rank]; 312 rk->rank_dimm_sc = dimm_sc; 313 ++rank; 314 } 315 } 316 sc->ecc_rank_cnt = rank; 317 318 if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) { 319 ecc_printf(sc, "ECC is not enabled\n"); 320 return 0; 321 } 322 323 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) { 324 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 325 uint32_t thr, mask; 326 int ofs; 327 328 ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2); 329 if (rank & 1) 330 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI; 331 else 332 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_LO; 333 334 thr = pci_read_config(sc->ecc_dev, ofs, 4); 335 dimm_set_ecc_thresh(rk->rank_dimm_sc->dimm_softc, 336 __SHIFTOUT(thr, mask)); 337 } 338 339 cpuid = -1; 340 node = get_cpu_node_by_chipid(sc->ecc_node); 341 if (node != NULL && node->child_no > 0) { 342 cpuid = BSRCPUMASK(node->members); 343 if (bootverbose) { 344 device_printf(dev, "node%d chan%d -> cpu%d\n", 345 sc->ecc_node, sc->ecc_chan->chan_ext, cpuid); 346 } 347 } 348 sc->ecc_senstask = sensor_task_register2(sc, ecc_e5_sensor_task, 349 1, cpuid); 350 351 return 0; 352 failed: 353 ecc_e5_detach(dev); 354 return error; 355 } 356 357 static void 358 ecc_e5_sensor_task(void *xsc) 359 { 360 struct ecc_e5_softc *sc = xsc; 361 uint32_t err_ranks, val; 362 363 val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4); 364 365 err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS); 366 while (err_ranks != 0) { 367 int rank; 368 369 rank = ffs(err_ranks) - 1; 370 err_ranks &= ~(1 << rank); 371 372 if (rank < sc->ecc_rank_cnt) { 373 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 374 struct ecc_e5_dimm *dimm_sc = rk->rank_dimm_sc; 375 uint32_t err, mask; 376 int ofs, ecc_cnt; 377 378 ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2); 379 if (rank & 1) 380 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI; 381 else 382 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO; 383 384 err = pci_read_config(sc->ecc_dev, ofs, 4); 385 ecc_cnt = __SHIFTOUT(err, mask); 386 387 dimm_sensor_ecc_set(dimm_sc->dimm_softc, 388 &dimm_sc->dimm_sensor, ecc_cnt, TRUE); 389 } else { 390 ecc_printf(sc, "channel%d rank%d critical error\n", 391 sc->ecc_chan->chan_ext, rank); 392 } 393 } 394 395 if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) { 396 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 397 val, 4); 398 } 399 } 400 401 static void 402 ecc_e5_stop(device_t dev) 403 { 404 struct ecc_e5_softc *sc = device_get_softc(dev); 405 406 if (sc->ecc_senstask != NULL) { 407 sensor_task_unregister2(sc->ecc_senstask); 408 sc->ecc_senstask = NULL; 409 } 410 } 411 412 static int 413 ecc_e5_detach(device_t dev) 414 { 415 struct ecc_e5_softc *sc = device_get_softc(dev); 416 struct ecc_e5_dimm *dimm_sc; 417 418 ecc_e5_stop(dev); 419 420 while ((dimm_sc = TAILQ_FIRST(&sc->ecc_dimm)) != NULL) { 421 TAILQ_REMOVE(&sc->ecc_dimm, dimm_sc, dimm_link); 422 dimm_sensor_detach(dimm_sc->dimm_softc, &dimm_sc->dimm_sensor); 423 dimm_destroy(dimm_sc->dimm_softc); 424 425 kfree(dimm_sc, M_DEVBUF); 426 } 427 return 0; 428 } 429 430 static void 431 ecc_e5_shutdown(device_t dev) 432 { 433 ecc_e5_stop(dev); 434 } 435