1 /* 2 * Copyright (c) 2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sepherosa Ziehau <sepherosa@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/bus.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/malloc.h> 40 #include <sys/module.h> 41 #include <sys/sensors.h> 42 #include <sys/sysctl.h> 43 #include <sys/systm.h> 44 45 #include <dev/misc/dimm/dimm.h> 46 47 #define DIMM_TEMP_HIWAT_DEFAULT 85 48 #define DIMM_TEMP_LOWAT_DEFAULT 75 49 50 #define DIMM_ECC_THRESH_DEFAULT 5 51 52 struct dimm_softc { 53 TAILQ_ENTRY(dimm_softc) dimm_link; 54 int dimm_node; 55 int dimm_chan; 56 int dimm_slot; 57 int dimm_temp_hiwat; 58 int dimm_temp_lowat; 59 int dimm_id; 60 int dimm_ref; 61 int dimm_ecc_cnt; 62 int dimm_ecc_thresh; 63 64 struct ksensordev dimm_sensdev; 65 uint32_t dimm_sens_taskflags; /* DIMM_SENS_TF_ */ 66 67 struct sysctl_ctx_list dimm_sysctl_ctx; 68 struct sysctl_oid *dimm_sysctl_tree; 69 }; 70 TAILQ_HEAD(dimm_softc_list, dimm_softc); 71 72 #define DIMM_SENS_TF_TEMP_CRIT 0x1 73 #define DIMM_SENS_TF_ECC_CRIT 0x2 74 75 static void dimm_mod_unload(void); 76 static void dimm_sensor_ecc(struct dimm_softc *, struct ksensor *, 77 boolean_t); 78 79 /* In the ascending order of dimm_softc.dimm_id */ 80 static struct dimm_softc_list dimm_softc_list; 81 82 static SYSCTL_NODE(_hw, OID_AUTO, dimminfo, CTLFLAG_RD, NULL, 83 "DIMM information"); 84 85 struct dimm_softc * 86 dimm_create(int node, int chan, int slot) 87 { 88 struct dimm_softc *sc, *after = NULL; 89 int dimm_id = 0; 90 91 SYSCTL_XLOCK(); 92 93 TAILQ_FOREACH(sc, &dimm_softc_list, dimm_link) { 94 /* 95 * Already exists; done. 96 */ 97 if (sc->dimm_node == node && sc->dimm_chan == chan && 98 sc->dimm_slot == slot) { 99 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", 100 sc->dimm_ref)); 101 sc->dimm_ref++; 102 SYSCTL_XUNLOCK(); 103 return sc; 104 } 105 106 /* 107 * Find the lowest usable id. 108 */ 109 if (sc->dimm_id == dimm_id) { 110 ++dimm_id; 111 after = sc; 112 } 113 } 114 115 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO); 116 sc->dimm_node = node; 117 sc->dimm_chan = chan; 118 sc->dimm_slot = slot; 119 sc->dimm_id = dimm_id; 120 sc->dimm_ref = 1; 121 sc->dimm_temp_hiwat = DIMM_TEMP_HIWAT_DEFAULT; 122 sc->dimm_temp_lowat = DIMM_TEMP_LOWAT_DEFAULT; 123 sc->dimm_ecc_thresh = DIMM_ECC_THRESH_DEFAULT; 124 125 ksnprintf(sc->dimm_sensdev.xname, sizeof(sc->dimm_sensdev.xname), 126 "dimm%d", sc->dimm_id); 127 128 /* 129 * Create sysctl tree for the location information. Use 130 * same name as the sensor device. 131 */ 132 sysctl_ctx_init(&sc->dimm_sysctl_ctx); 133 sc->dimm_sysctl_tree = SYSCTL_ADD_NODE(&sc->dimm_sysctl_ctx, 134 SYSCTL_STATIC_CHILDREN(_hw_dimminfo), OID_AUTO, 135 sc->dimm_sensdev.xname, CTLFLAG_RD, 0, ""); 136 if (sc->dimm_sysctl_tree != NULL) { 137 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 138 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 139 "node", CTLFLAG_RD, &sc->dimm_node, 0, 140 "CPU node of this DIMM"); 141 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 142 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 143 "chan", CTLFLAG_RD, &sc->dimm_chan, 0, 144 "channel of this DIMM"); 145 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 146 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 147 "slot", CTLFLAG_RD, &sc->dimm_slot, 0, 148 "slot of this DIMM"); 149 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 150 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 151 "temp_hiwat", CTLFLAG_RW, &sc->dimm_temp_hiwat, 0, 152 "Raise alarm once DIMM temperature is above this value " 153 "(unit: C)"); 154 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 155 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 156 "temp_lowat", CTLFLAG_RW, &sc->dimm_temp_lowat, 0, 157 "Cancel alarm once DIMM temperature is below this value " 158 "(unit: C)"); 159 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx, 160 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO, 161 "ecc_thresh", CTLFLAG_RW, &sc->dimm_ecc_thresh, 0, 162 "Raise alarm once number ECC errors go above this value"); 163 } 164 165 if (after == NULL) { 166 KKASSERT(sc->dimm_id == 0); 167 TAILQ_INSERT_HEAD(&dimm_softc_list, sc, dimm_link); 168 } else { 169 TAILQ_INSERT_AFTER(&dimm_softc_list, after, sc, dimm_link); 170 } 171 172 sensordev_install(&sc->dimm_sensdev); 173 174 SYSCTL_XUNLOCK(); 175 return sc; 176 } 177 178 int 179 dimm_destroy(struct dimm_softc *sc) 180 { 181 SYSCTL_XLOCK(); 182 183 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", sc->dimm_ref)); 184 sc->dimm_ref--; 185 if (sc->dimm_ref > 0) { 186 SYSCTL_XUNLOCK(); 187 return EAGAIN; 188 } 189 190 sensordev_deinstall(&sc->dimm_sensdev); 191 192 TAILQ_REMOVE(&dimm_softc_list, sc, dimm_link); 193 if (sc->dimm_sysctl_tree != NULL) 194 sysctl_ctx_free(&sc->dimm_sysctl_ctx); 195 kfree(sc, M_DEVBUF); 196 197 SYSCTL_XUNLOCK(); 198 return 0; 199 } 200 201 void 202 dimm_sensor_attach(struct dimm_softc *sc, struct ksensor *sens) 203 { 204 sensor_attach(&sc->dimm_sensdev, sens); 205 } 206 207 void 208 dimm_sensor_detach(struct dimm_softc *sc, struct ksensor *sens) 209 { 210 sensor_detach(&sc->dimm_sensdev, sens); 211 } 212 213 void 214 dimm_set_temp_thresh(struct dimm_softc *sc, int hiwat, int lowat) 215 { 216 sc->dimm_temp_hiwat = hiwat; 217 sc->dimm_temp_lowat = lowat; 218 } 219 220 void 221 dimm_set_ecc_thresh(struct dimm_softc *sc, int thresh) 222 { 223 sc->dimm_ecc_thresh = thresh; 224 } 225 226 void 227 dimm_sensor_temp(struct dimm_softc *sc, struct ksensor *sens, int temp) 228 { 229 enum sensor_status status; 230 231 if (temp >= sc->dimm_temp_hiwat && 232 (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) == 0) { 233 char temp_str[16], data[64]; 234 235 ksnprintf(temp_str, sizeof(temp_str), "%d", temp); 236 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d", 237 sc->dimm_node, sc->dimm_chan, sc->dimm_slot); 238 devctl_notify("memtemp", "Thermal", temp_str, data); 239 240 kprintf("dimm%d: node%d channel%d DIMM%d " 241 "temperature (%dC) is too high (>= %dC)\n", 242 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot, 243 temp, sc->dimm_temp_hiwat); 244 245 sc->dimm_sens_taskflags |= DIMM_SENS_TF_TEMP_CRIT; 246 } else if ((sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) && 247 temp < sc->dimm_temp_lowat) { 248 sc->dimm_sens_taskflags &= ~DIMM_SENS_TF_TEMP_CRIT; 249 } 250 251 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) 252 status = SENSOR_S_CRIT; 253 else 254 status = SENSOR_S_OK; 255 sensor_set_temp_degc(sens, temp, status); 256 } 257 258 void 259 dimm_sensor_ecc_set(struct dimm_softc *sc, struct ksensor *sens, 260 int ecc_cnt, boolean_t crit) 261 { 262 sc->dimm_ecc_cnt = ecc_cnt; 263 dimm_sensor_ecc(sc, sens, crit); 264 } 265 266 void 267 dimm_sensor_ecc_add(struct dimm_softc *sc, struct ksensor *sens, 268 int ecc_cnt, boolean_t crit) 269 { 270 sc->dimm_ecc_cnt += ecc_cnt; 271 dimm_sensor_ecc(sc, sens, crit); 272 } 273 274 static void 275 dimm_sensor_ecc(struct dimm_softc *sc, struct ksensor *sens, boolean_t crit) 276 { 277 enum sensor_status status; 278 279 if (!crit && sc->dimm_ecc_cnt >= sc->dimm_ecc_thresh) 280 crit = TRUE; 281 282 if (crit && (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) == 0) { 283 char ecc_str[16], data[64]; 284 285 ksnprintf(ecc_str, sizeof(ecc_str), "%d", sc->dimm_ecc_cnt); 286 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d", 287 sc->dimm_node, sc->dimm_chan, sc->dimm_slot); 288 devctl_notify("ecc", "ECC", ecc_str, data); 289 290 kprintf("dimm%d: node%d channel%d DIMM%d " 291 "too many ECC errors %d\n", 292 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot, 293 sc->dimm_ecc_cnt); 294 295 sc->dimm_sens_taskflags |= DIMM_SENS_TF_ECC_CRIT; 296 } 297 298 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) 299 status = SENSOR_S_CRIT; 300 else 301 status = SENSOR_S_OK; 302 sensor_set(sens, sc->dimm_ecc_cnt, status); 303 } 304 305 static void 306 dimm_mod_unload(void) 307 { 308 struct dimm_softc *sc; 309 310 SYSCTL_XLOCK(); 311 312 while ((sc = TAILQ_FIRST(&dimm_softc_list)) != NULL) { 313 int error; 314 315 error = dimm_destroy(sc); 316 KASSERT(!error, ("dimm%d is still referenced, ref %d", 317 sc->dimm_id, sc->dimm_ref)); 318 } 319 320 SYSCTL_XUNLOCK(); 321 } 322 323 static int 324 dimm_mod_event(module_t mod, int type, void *unused) 325 { 326 switch (type) { 327 case MOD_LOAD: 328 TAILQ_INIT(&dimm_softc_list); 329 return 0; 330 331 case MOD_UNLOAD: 332 dimm_mod_unload(); 333 return 0; 334 335 default: 336 return 0; 337 } 338 } 339 340 static moduledata_t dimm_mod = { 341 "dimm", 342 dimm_mod_event, 343 0 344 }; 345 DECLARE_MODULE(dimm, dimm_mod, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY); 346 MODULE_VERSION(dimm, 1); 347