1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bitops.h> 38 #include <sys/bus.h> 39 #include <sys/kernel.h> 40 #include <sys/malloc.h> 41 #include <sys/sensors.h> 42 #include <sys/sysctl.h> 43 44 #include <bus/pci/pcivar.h> 45 #include <bus/pci/pcireg.h> 46 #include <bus/pci/pcibus.h> 47 #include <bus/pci/pci_cfgreg.h> 48 #include <bus/pci/pcib_private.h> 49 50 #include "pcib_if.h" 51 52 #include <dev/misc/ecc/e5_imc_reg.h> 53 #include <dev/misc/ecc/e5_imc_var.h> 54 55 #define MEMTEMP_E5_DIMM_TEMP_HIWAT 85 /* spec default TEMPLO */ 56 #define MEMTEMP_E5_DIMM_TEMP_STEP 5 /* spec TEMPLO/MID/HI step */ 57 58 struct memtemp_e5_softc; 59 60 struct memtemp_e5_dimm { 61 TAILQ_ENTRY(memtemp_e5_dimm) dimm_link; 62 struct ksensordev dimm_sensordev; 63 struct ksensor dimm_sensor; 64 struct memtemp_e5_softc *dimm_parent; 65 int dimm_id; 66 int dimm_temp_hiwat; 67 int dimm_temp_lowat; 68 int dimm_flags; 69 }; 70 71 #define MEMTEMP_E5_DIMM_FLAG_CRIT 0x1 72 73 struct memtemp_e5_softc { 74 device_t temp_dev; 75 const struct e5_imc_chan *temp_chan; 76 int temp_node; 77 TAILQ_HEAD(, memtemp_e5_dimm) temp_dimm; 78 }; 79 80 static int memtemp_e5_probe(device_t); 81 static int memtemp_e5_attach(device_t); 82 static int memtemp_e5_detach(device_t); 83 84 static int memtemp_e5_tempth_adjust(int); 85 static void memtemp_e5_tempth_str(int, char *, int); 86 static void memtemp_e5_sensor_task(void *); 87 88 #define MEMTEMP_E5_CHAN(v, imc, c, c_ext) \ 89 { \ 90 .did = PCI_E5V##v##_IMC##imc##_THERMAL_CHN##c##_DID_ID, \ 91 .slot = PCISLOT_E5V##v##_IMC##imc##_THERMAL_CHN##c, \ 92 .func = PCIFUNC_E5V##v##_IMC##imc##_THERMAL_CHN##c, \ 93 .desc = "Intel E5 v" #v " memory thermal sensor", \ 94 \ 95 E5_IMC_CHAN_FIELDS(v, imc, c, c_ext) \ 96 } 97 98 #define MEMTEMP_E5_CHAN_V2(c) MEMTEMP_E5_CHAN(2, 0, c, c) 99 #define MEMTEMP_E5_CHAN_IMC0_V3(c) MEMTEMP_E5_CHAN(3, 0, c, c) 100 #define MEMTEMP_E5_CHAN_IMC1_V3(c, c_ext) \ 101 MEMTEMP_E5_CHAN(3, 1, c, c_ext) 102 #define MEMTEMP_E5_CHAN_END E5_IMC_CHAN_END 103 104 static const struct e5_imc_chan memtemp_e5_chans[] = { 105 MEMTEMP_E5_CHAN_V2(0), 106 MEMTEMP_E5_CHAN_V2(1), 107 MEMTEMP_E5_CHAN_V2(2), 108 MEMTEMP_E5_CHAN_V2(3), 109 110 MEMTEMP_E5_CHAN_IMC0_V3(0), 111 MEMTEMP_E5_CHAN_IMC0_V3(1), 112 MEMTEMP_E5_CHAN_IMC0_V3(2), 113 MEMTEMP_E5_CHAN_IMC0_V3(3), 114 MEMTEMP_E5_CHAN_IMC1_V3(0, 2), /* IMC1 chan0 -> channel2 */ 115 MEMTEMP_E5_CHAN_IMC1_V3(1, 3), /* IMC1 chan1 -> channel3 */ 116 117 MEMTEMP_E5_CHAN_END 118 }; 119 120 #undef MEMTEMP_E5_CHAN_END 121 #undef MEMTEMP_E5_CHAN_V2 122 #undef MEMTEMP_E5_CHAN 123 124 static device_method_t memtemp_e5_methods[] = { 125 /* Device interface */ 126 DEVMETHOD(device_probe, memtemp_e5_probe), 127 DEVMETHOD(device_attach, memtemp_e5_attach), 128 DEVMETHOD(device_detach, memtemp_e5_detach), 129 DEVMETHOD(device_shutdown, bus_generic_shutdown), 130 DEVMETHOD(device_suspend, bus_generic_suspend), 131 DEVMETHOD(device_resume, bus_generic_resume), 132 DEVMETHOD_END 133 }; 134 135 static driver_t memtemp_e5_driver = { 136 "memtemp", 137 memtemp_e5_methods, 138 sizeof(struct memtemp_e5_softc) 139 }; 140 static devclass_t memtemp_devclass; 141 DRIVER_MODULE(memtemp_e5, pci, memtemp_e5_driver, memtemp_devclass, NULL, NULL); 142 MODULE_DEPEND(memtemp_e5, pci, 1, 1, 1); 143 144 static int 145 memtemp_e5_probe(device_t dev) 146 { 147 const struct e5_imc_chan *c; 148 uint16_t vid, did; 149 int slot, func; 150 151 vid = pci_get_vendor(dev); 152 if (vid != PCI_E5_IMC_VID_ID) 153 return ENXIO; 154 155 did = pci_get_device(dev); 156 slot = pci_get_slot(dev); 157 func = pci_get_function(dev); 158 159 for (c = memtemp_e5_chans; c->desc != NULL; ++c) { 160 if (c->did == did && c->slot == slot && c->func == func) { 161 struct memtemp_e5_softc *sc = device_get_softc(dev); 162 char desc[128]; 163 uint32_t cfg; 164 int node; 165 166 node = e5_imc_node_probe(dev, c); 167 if (node < 0) 168 break; 169 170 /* 171 * XXX 172 * It seems that memory thermal sensor is available, 173 * only if CLTT is set (OLTT_EN does not seem matter). 174 */ 175 cfg = pci_read_config(dev, 176 PCI_E5_IMC_THERMAL_CHN_TEMP_CFG, 4); 177 if ((cfg & PCI_E5_IMC_THERMAL_CHN_TEMP_CFG_CLTT) == 0) 178 break; 179 180 ksnprintf(desc, sizeof(desc), "%s node%d channel%d", 181 c->desc, node, c->chan_ext); 182 device_set_desc_copy(dev, desc); 183 184 sc->temp_chan = c; 185 sc->temp_node = node; 186 187 return 0; 188 } 189 } 190 return ENXIO; 191 } 192 193 static int 194 memtemp_e5_tempth_adjust(int temp) 195 { 196 if (temp == PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_DISABLE) 197 return 0; 198 else if (temp < PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_TEMPMIN || 199 temp >= PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_TEMPMAX) 200 return -1; 201 return temp; 202 } 203 204 static void 205 memtemp_e5_tempth_str(int temp, char *temp_str, int temp_strlen) 206 { 207 if (temp < 0) 208 strlcpy(temp_str, "reserved", temp_strlen); 209 else if (temp == 0) 210 strlcpy(temp_str, "disabled", temp_strlen); 211 else 212 ksnprintf(temp_str, temp_strlen, "%dC", temp); 213 } 214 215 static int 216 memtemp_e5_attach(device_t dev) 217 { 218 struct memtemp_e5_softc *sc = device_get_softc(dev); 219 int dimm; 220 221 sc->temp_dev = dev; 222 TAILQ_INIT(&sc->temp_dimm); 223 224 for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) { 225 char temp_lostr[16], temp_midstr[16], temp_histr[16]; 226 struct memtemp_e5_dimm *dimm_sc; 227 int dimm_extid, temp_lo, temp_mid, temp_hi; 228 uint32_t dimmmtr, temp_th; 229 230 dimmmtr = IMC_CTAD_READ_4(dev, sc->temp_chan, 231 PCI_E5_IMC_CTAD_DIMMMTR(dimm)); 232 233 if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0) 234 continue; 235 236 dimm_sc = kmalloc(sizeof(*dimm_sc), M_DEVBUF, 237 M_WAITOK | M_ZERO); 238 dimm_sc->dimm_id = dimm; 239 dimm_sc->dimm_parent = sc; 240 241 temp_th = pci_read_config(dev, 242 PCI_E5_IMC_THERMAL_DIMM_TEMP_TH(dimm), 4); 243 244 temp_lo = __SHIFTOUT(temp_th, 245 PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_TEMPLO); 246 temp_lo = memtemp_e5_tempth_adjust(temp_lo); 247 memtemp_e5_tempth_str(temp_lo, temp_lostr, sizeof(temp_lostr)); 248 249 temp_mid = __SHIFTOUT(temp_th, 250 PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_TEMPMID); 251 temp_mid = memtemp_e5_tempth_adjust(temp_mid); 252 memtemp_e5_tempth_str(temp_mid, temp_midstr, 253 sizeof(temp_midstr)); 254 255 temp_hi = __SHIFTOUT(temp_th, 256 PCI_E5_IMC_THERMAL_DIMM_TEMP_TH_TEMPHI); 257 temp_hi = memtemp_e5_tempth_adjust(temp_hi); 258 memtemp_e5_tempth_str(temp_hi, temp_histr, sizeof(temp_histr)); 259 260 /* 261 * NOTE: 262 * - TEMPHI initiates THRTCRIT. 263 * - TEMPMID initiates THRTHI, so it is also taken into 264 * consideration. 265 * - Some BIOSes program temp_lo to a rediculous low value, 266 * so ignore TEMPLO here. 267 */ 268 if (temp_mid <= 0) { 269 if (temp_hi <= 0) 270 dimm_sc->dimm_temp_hiwat = MEMTEMP_E5_DIMM_TEMP_HIWAT; 271 else 272 dimm_sc->dimm_temp_hiwat = temp_hi; 273 } else { 274 dimm_sc->dimm_temp_hiwat = temp_mid; 275 } 276 if (dimm_sc->dimm_temp_hiwat < MEMTEMP_E5_DIMM_TEMP_STEP) 277 dimm_sc->dimm_temp_hiwat = MEMTEMP_E5_DIMM_TEMP_HIWAT; 278 dimm_sc->dimm_temp_lowat = dimm_sc->dimm_temp_hiwat - 279 MEMTEMP_E5_DIMM_TEMP_STEP; 280 281 device_printf(dev, "DIMM%d " 282 "temp_hi %s, temp_mid %s, temp_lo %s\n", dimm, 283 temp_histr, temp_midstr, temp_lostr); 284 device_printf(dev, "DIMM%d hiwat %dC, lowat %dC\n", dimm, 285 dimm_sc->dimm_temp_hiwat, dimm_sc->dimm_temp_lowat); 286 287 dimm_extid = 288 (sc->temp_node * PCI_E5_IMC_CHN_MAX * PCI_E5_IMC_CHN_DIMM_MAX) + 289 (sc->temp_chan->chan_ext * PCI_E5_IMC_CHN_DIMM_MAX) + dimm; 290 ksnprintf(dimm_sc->dimm_sensordev.xname, 291 sizeof(dimm_sc->dimm_sensordev.xname), 292 "dimm%d", dimm_extid); 293 dimm_sc->dimm_sensor.type = SENSOR_TEMP; 294 sensor_attach(&dimm_sc->dimm_sensordev, &dimm_sc->dimm_sensor); 295 if (sensor_task_register(dimm_sc, memtemp_e5_sensor_task, 2)) { 296 device_printf(dev, "DIMM%d sensor task register " 297 "failed\n", dimm); 298 kfree(dimm_sc, M_DEVBUF); 299 continue; 300 } 301 sensordev_install(&dimm_sc->dimm_sensordev); 302 303 TAILQ_INSERT_TAIL(&sc->temp_dimm, dimm_sc, dimm_link); 304 } 305 return 0; 306 } 307 308 static int 309 memtemp_e5_detach(device_t dev) 310 { 311 struct memtemp_e5_softc *sc = device_get_softc(dev); 312 struct memtemp_e5_dimm *dimm_sc; 313 314 while ((dimm_sc = TAILQ_FIRST(&sc->temp_dimm)) != NULL) { 315 TAILQ_REMOVE(&sc->temp_dimm, dimm_sc, dimm_link); 316 317 sensordev_deinstall(&dimm_sc->dimm_sensordev); 318 sensor_task_unregister(dimm_sc); 319 320 kfree(dimm_sc, M_DEVBUF); 321 } 322 return 0; 323 } 324 325 static void 326 memtemp_e5_sensor_task(void *xdimm_sc) 327 { 328 struct memtemp_e5_dimm *dimm_sc = xdimm_sc; 329 struct ksensor *sensor = &dimm_sc->dimm_sensor; 330 device_t dev = dimm_sc->dimm_parent->temp_dev; 331 int dimm = dimm_sc->dimm_id; 332 uint32_t val; 333 int temp, reg; 334 335 reg = PCI_E5_IMC_THERMAL_DIMMTEMPSTAT(dimm); 336 337 val = pci_read_config(dev, reg, 4); 338 if (val & (PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMPHI | 339 PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMPMID | 340 PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMPLO)) 341 pci_write_config(dev, reg, val, 4); 342 343 temp = __SHIFTOUT(val, PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMP); 344 if (temp < PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMPMIN || 345 temp >= PCI_E5_IMC_THERMAL_DIMMTEMPSTAT_TEMPMAX) { 346 sensor->status = SENSOR_S_UNSPEC; 347 sensor->flags |= SENSOR_FINVALID; 348 sensor->value = 0; 349 return; 350 } 351 352 /* 353 * Some BIOSes will always turn on TEMPMID, so we rely on 354 * our own hiwat/lowat to send the notification. 355 */ 356 if (temp >= dimm_sc->dimm_temp_hiwat && 357 (dimm_sc->dimm_flags & MEMTEMP_E5_DIMM_FLAG_CRIT) == 0) { 358 int node, chan; 359 char temp_str[16], data[64]; 360 361 node = dimm_sc->dimm_parent->temp_node; 362 chan = dimm_sc->dimm_parent->temp_chan->chan_ext; 363 364 ksnprintf(temp_str, sizeof(temp_str), "%d", temp); 365 ksnprintf(data, sizeof(data), 366 "node=%d channel=%d dimm=%d", node, chan, dimm); 367 devctl_notify("memtemp", "Thermal", temp_str, data); 368 369 device_printf(dev, "node%d channel%d DIMM%d " 370 "temperature (%dC) is too high (>= %d)\n", 371 node, chan, dimm, temp, dimm_sc->dimm_temp_hiwat); 372 373 dimm_sc->dimm_flags |= MEMTEMP_E5_DIMM_FLAG_CRIT; 374 } else if ((dimm_sc->dimm_flags & MEMTEMP_E5_DIMM_FLAG_CRIT) && 375 temp < dimm_sc->dimm_temp_lowat) { 376 dimm_sc->dimm_flags &= ~MEMTEMP_E5_DIMM_FLAG_CRIT; 377 } 378 379 if (dimm_sc->dimm_flags & MEMTEMP_E5_DIMM_FLAG_CRIT) 380 sensor->status = SENSOR_S_CRIT; 381 else 382 sensor->status = SENSOR_S_OK; 383 sensor->flags &= ~SENSOR_FINVALID; 384 sensor->value = (temp * 1000000) + 273150000; 385 } 386