1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitops.h> 38 #include <sys/bus.h> 39 #include <sys/cpu_topology.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/queue.h> 43 #include <sys/sensors.h> 44 45 #include <bus/pci/pcivar.h> 46 #include <bus/pci/pcireg.h> 47 #include <bus/pci/pci_cfgreg.h> 48 #include <bus/pci/pcib_private.h> 49 50 #include "pcib_if.h" 51 52 #include <dev/misc/dimm/dimm.h> 53 #include <dev/misc/ecc/e5_imc_reg.h> 54 #include <dev/misc/ecc/e5_imc_var.h> 55 56 struct ecc_e5_dimm { 57 TAILQ_ENTRY(ecc_e5_dimm) dimm_link; 58 struct dimm_softc *dimm_softc; 59 struct ksensor dimm_sensor; 60 }; 61 62 struct ecc_e5_rank { 63 struct ecc_e5_dimm *rank_dimm_sc; 64 }; 65 66 struct ecc_e5_softc { 67 device_t ecc_dev; 68 const struct e5_imc_chan *ecc_chan; 69 int ecc_node; 70 int ecc_rank_cnt; 71 struct ecc_e5_rank ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX]; 72 struct sensor_task *ecc_senstask; 73 TAILQ_HEAD(, ecc_e5_dimm) ecc_dimm; 74 }; 75 76 #define ecc_printf(sc, fmt, arg...) \ 77 device_printf((sc)->ecc_dev, fmt , ##arg) 78 79 static int ecc_e5_probe(device_t); 80 static int ecc_e5_attach(device_t); 81 static int ecc_e5_detach(device_t); 82 static void ecc_e5_shutdown(device_t); 83 84 static void ecc_e5_sensor_task(void *); 85 86 #define ECC_E5_CHAN(v, imc, c, c_ext) \ 87 { \ 88 .did = PCI_E5V##v##_IMC##imc##_ERROR_CHN##c##_DID_ID, \ 89 .slot = PCISLOT_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 90 .func = PCIFUNC_E5V##v##_IMC##imc##_ERROR_CHN##c, \ 91 .desc = "Intel E5 v" #v " ECC", \ 92 \ 93 E5_IMC_CHAN_FIELDS(v, imc, c, c_ext) \ 94 } 95 96 #define ECC_E5_CHAN_V2(c) ECC_E5_CHAN(2, 0, c, c) 97 #define ECC_E5_CHAN_IMC0_V3(c) ECC_E5_CHAN(3, 0, c, c) 98 #define ECC_E5_CHAN_IMC1_V3(c, c_ext) ECC_E5_CHAN(3, 1, c, c_ext) 99 #define ECC_E5_CHAN_END E5_IMC_CHAN_END 100 101 static const struct e5_imc_chan ecc_e5_chans[] = { 102 ECC_E5_CHAN_V2(0), 103 ECC_E5_CHAN_V2(1), 104 ECC_E5_CHAN_V2(2), 105 ECC_E5_CHAN_V2(3), 106 107 ECC_E5_CHAN_IMC0_V3(0), 108 ECC_E5_CHAN_IMC0_V3(1), 109 ECC_E5_CHAN_IMC0_V3(2), 110 ECC_E5_CHAN_IMC0_V3(3), 111 ECC_E5_CHAN_IMC1_V3(0, 2), /* IMC1 chan0 -> channel2 */ 112 ECC_E5_CHAN_IMC1_V3(1, 3), /* IMC1 chan1 -> channel3 */ 113 114 ECC_E5_CHAN_END 115 }; 116 117 #undef ECC_E5_CHAN_END 118 #undef ECC_E5_CHAN_V2 119 #undef ECC_E5_CHAN 120 121 static device_method_t ecc_e5_methods[] = { 122 /* Device interface */ 123 DEVMETHOD(device_probe, ecc_e5_probe), 124 DEVMETHOD(device_attach, ecc_e5_attach), 125 DEVMETHOD(device_detach, ecc_e5_detach), 126 DEVMETHOD(device_shutdown, ecc_e5_shutdown), 127 DEVMETHOD(device_suspend, bus_generic_suspend), 128 DEVMETHOD(device_resume, bus_generic_resume), 129 DEVMETHOD_END 130 }; 131 132 static driver_t ecc_e5_driver = { 133 "ecc", 134 ecc_e5_methods, 135 sizeof(struct ecc_e5_softc) 136 }; 137 static devclass_t ecc_devclass; 138 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL); 139 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1); 140 MODULE_DEPEND(ecc_e5, dimm, 1, 1, 1); 141 142 static int 143 ecc_e5_probe(device_t dev) 144 { 145 const struct e5_imc_chan *c; 146 uint16_t vid, did; 147 int slot, func; 148 149 vid = pci_get_vendor(dev); 150 if (vid != PCI_E5_IMC_VID_ID) 151 return ENXIO; 152 153 did = pci_get_device(dev); 154 slot = pci_get_slot(dev); 155 func = pci_get_function(dev); 156 157 for (c = ecc_e5_chans; c->desc != NULL; ++c) { 158 if (c->did == did && c->slot == slot && c->func == func) { 159 struct ecc_e5_softc *sc = device_get_softc(dev); 160 int node; 161 162 node = e5_imc_node_probe(dev, c); 163 if (node < 0) 164 break; 165 166 device_set_desc(dev, c->desc); 167 168 sc->ecc_chan = c; 169 sc->ecc_node = node; 170 return 0; 171 } 172 } 173 return ENXIO; 174 } 175 176 static int 177 ecc_e5_attach(device_t dev) 178 { 179 struct ecc_e5_softc *sc = device_get_softc(dev); 180 int dimm, rank, error, cpuid; 181 const cpu_node_t *node; 182 uint32_t mcmtr; 183 184 TAILQ_INIT(&sc->ecc_dimm); 185 sc->ecc_dev = dev; 186 187 mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, sc->ecc_chan, 188 PCI_E5_IMC_CPGC_MCMTR); 189 if (bootverbose) { 190 if (sc->ecc_chan->ver == E5_IMC_CHAN_VER3 && 191 (mcmtr & PCI_E5V3_IMC_CPGC_MCMTR_DDR4)) 192 ecc_printf(sc, "DDR4\n"); 193 if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) == 194 PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3) { 195 ecc_printf(sc, "native %s\n", 196 sc->ecc_chan->ver == E5_IMC_CHAN_VER2 ? 197 "DDR3" : "DDR"); 198 } 199 } 200 201 rank = 0; 202 for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) { 203 struct ecc_e5_dimm *dimm_sc; 204 struct ksensor *sens; 205 const char *width; 206 uint32_t dimmmtr; 207 int rank_cnt, r; 208 int density; 209 int val; 210 211 dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan, 212 PCI_E5_IMC_CTAD_DIMMMTR(dimm)); 213 214 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0) 215 continue; 216 217 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT); 218 switch (val) { 219 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR: 220 rank_cnt = 1; 221 break; 222 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR: 223 rank_cnt = 2; 224 break; 225 case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR: 226 rank_cnt = 4; 227 break; 228 case PCI_E5V3_IMC_CTAD_DIMMMTR_RANK_CNT_8R: 229 if (sc->ecc_chan->ver >= E5_IMC_CHAN_VER3) { 230 rank_cnt = 8; 231 break; 232 } 233 /* FALL THROUGH */ 234 default: 235 ecc_printf(sc, "unknown rank count 0x%x\n", val); 236 error = ENXIO; 237 goto failed; 238 } 239 240 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH); 241 switch (val) { 242 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4: 243 width = "x4"; 244 break; 245 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8: 246 width = "x8"; 247 break; 248 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16: 249 width = "x16"; 250 break; 251 default: 252 ecc_printf(sc, "unknown ddr3 width 0x%x\n", val); 253 error = ENXIO; 254 goto failed; 255 } 256 257 val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY); 258 switch (val) { 259 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G: 260 density = 2; 261 break; 262 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G: 263 density = 4; 264 break; 265 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G: 266 density = 8; 267 break; 268 case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G: 269 if (sc->ecc_chan->ver < E5_IMC_CHAN_VER3) { 270 density = 1; 271 break; 272 } 273 /* FALL THROUGH */ 274 default: 275 ecc_printf(sc, "unknown ddr3 density 0x%x\n", val); 276 error = ENXIO; 277 goto failed; 278 } 279 280 if (bootverbose) { 281 ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n", 282 dimm, density * rank_cnt * 2, 283 rank_cnt, width, density); 284 } 285 286 dimm_sc = kmalloc(sizeof(*dimm_sc), M_DEVBUF, 287 M_WAITOK | M_ZERO); 288 dimm_sc->dimm_softc = 289 dimm_create(sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 290 291 sens = &dimm_sc->dimm_sensor; 292 ksnprintf(sens->desc, sizeof(sens->desc), 293 "node%d chan%d DIMM%d ecc", 294 sc->ecc_node, sc->ecc_chan->chan_ext, dimm); 295 sens->type = SENSOR_ECC; 296 sensor_set(sens, 0, SENSOR_S_OK); 297 dimm_sensor_attach(dimm_sc->dimm_softc, sens); 298 299 TAILQ_INSERT_TAIL(&sc->ecc_dimm, dimm_sc, dimm_link); 300 301 for (r = 0; r < rank_cnt; ++r) { 302 struct ecc_e5_rank *rk; 303 304 if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) { 305 ecc_printf(sc, "too many ranks\n"); 306 error = ENXIO; 307 goto failed; 308 } 309 310 rk = &sc->ecc_rank[rank]; 311 rk->rank_dimm_sc = dimm_sc; 312 ++rank; 313 } 314 } 315 sc->ecc_rank_cnt = rank; 316 317 if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) { 318 ecc_printf(sc, "ECC is not enabled\n"); 319 return 0; 320 } 321 322 for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) { 323 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 324 uint32_t thr, mask; 325 int ofs; 326 327 ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2); 328 if (rank & 1) 329 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI; 330 else 331 mask = PCI_E5_IMC_ERROR_COR_ERR_TH_LO; 332 333 thr = pci_read_config(sc->ecc_dev, ofs, 4); 334 dimm_set_ecc_thresh(rk->rank_dimm_sc->dimm_softc, 335 __SHIFTOUT(thr, mask)); 336 } 337 338 cpuid = -1; 339 node = get_cpu_node_by_chipid(sc->ecc_node); 340 if (node != NULL && node->child_no > 0) { 341 cpuid = BSRCPUMASK(node->members); 342 if (bootverbose) { 343 device_printf(dev, "node%d chan%d -> cpu%d\n", 344 sc->ecc_node, sc->ecc_chan->chan_ext, cpuid); 345 } 346 } 347 sc->ecc_senstask = sensor_task_register2(sc, ecc_e5_sensor_task, 348 1, cpuid); 349 350 return 0; 351 failed: 352 ecc_e5_detach(dev); 353 return error; 354 } 355 356 static void 357 ecc_e5_sensor_task(void *xsc) 358 { 359 struct ecc_e5_softc *sc = xsc; 360 uint32_t err_ranks, val; 361 362 val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4); 363 364 err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS); 365 while (err_ranks != 0) { 366 int rank; 367 368 rank = ffs(err_ranks) - 1; 369 err_ranks &= ~(1 << rank); 370 371 if (rank < sc->ecc_rank_cnt) { 372 const struct ecc_e5_rank *rk = &sc->ecc_rank[rank]; 373 struct ecc_e5_dimm *dimm_sc = rk->rank_dimm_sc; 374 uint32_t err, mask; 375 int ofs, ecc_cnt; 376 377 ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2); 378 if (rank & 1) 379 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI; 380 else 381 mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO; 382 383 err = pci_read_config(sc->ecc_dev, ofs, 4); 384 ecc_cnt = __SHIFTOUT(err, mask); 385 386 dimm_sensor_ecc_set(dimm_sc->dimm_softc, 387 &dimm_sc->dimm_sensor, ecc_cnt, TRUE); 388 } else { 389 ecc_printf(sc, "channel%d rank%d critical error\n", 390 sc->ecc_chan->chan_ext, rank); 391 } 392 } 393 394 if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) { 395 pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 396 val, 4); 397 } 398 } 399 400 static void 401 ecc_e5_stop(device_t dev) 402 { 403 struct ecc_e5_softc *sc = device_get_softc(dev); 404 405 if (sc->ecc_senstask != NULL) { 406 sensor_task_unregister2(sc->ecc_senstask); 407 sc->ecc_senstask = NULL; 408 } 409 } 410 411 static int 412 ecc_e5_detach(device_t dev) 413 { 414 struct ecc_e5_softc *sc = device_get_softc(dev); 415 struct ecc_e5_dimm *dimm_sc; 416 417 ecc_e5_stop(dev); 418 419 while ((dimm_sc = TAILQ_FIRST(&sc->ecc_dimm)) != NULL) { 420 TAILQ_REMOVE(&sc->ecc_dimm, dimm_sc, dimm_link); 421 dimm_sensor_detach(dimm_sc->dimm_softc, &dimm_sc->dimm_sensor); 422 dimm_destroy(dimm_sc->dimm_softc); 423 424 kfree(dimm_sc, M_DEVBUF); 425 } 426 return 0; 427 } 428 429 static void 430 ecc_e5_shutdown(device_t dev) 431 { 432 ecc_e5_stop(dev); 433 } 434