1 /*
2 * Copyright (c) 2015 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Sepherosa Ziehau <sepherosa@gmail.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/sensors.h>
42 #include <sys/sysctl.h>
43 #include <sys/systm.h>
44
45 #include <dev/misc/dimm/dimm.h>
46
47 #define DIMM_TEMP_HIWAT_DEFAULT 85
48 #define DIMM_TEMP_LOWAT_DEFAULT 75
49
50 #define DIMM_ECC_THRESH_DEFAULT 5
51
52 struct dimm_softc {
53 TAILQ_ENTRY(dimm_softc) dimm_link;
54 int dimm_node;
55 int dimm_chan;
56 int dimm_slot;
57 int dimm_temp_hiwat;
58 int dimm_temp_lowat;
59 int dimm_id;
60 int dimm_ref;
61 int dimm_ecc_cnt;
62 int dimm_ecc_thresh;
63
64 struct ksensordev dimm_sensdev;
65 uint32_t dimm_sens_taskflags; /* DIMM_SENS_TF_ */
66
67 struct sysctl_ctx_list dimm_sysctl_ctx;
68 struct sysctl_oid *dimm_sysctl_tree;
69 };
70 TAILQ_HEAD(dimm_softc_list, dimm_softc);
71
72 #define DIMM_SENS_TF_TEMP_CRIT 0x1
73 #define DIMM_SENS_TF_ECC_CRIT 0x2
74
75 static void dimm_mod_unload(void);
76 static void dimm_sensor_ecc(struct dimm_softc *, struct ksensor *,
77 boolean_t);
78
79 /* In the ascending order of dimm_softc.dimm_id */
80 static struct dimm_softc_list dimm_softc_list;
81
82 static SYSCTL_NODE(_hw, OID_AUTO, dimminfo, CTLFLAG_RD, NULL,
83 "DIMM information");
84
85 struct dimm_softc *
dimm_create(int node,int chan,int slot)86 dimm_create(int node, int chan, int slot)
87 {
88 struct dimm_softc *sc, *after = NULL;
89 int dimm_id = 0;
90
91 SYSCTL_XLOCK();
92
93 TAILQ_FOREACH(sc, &dimm_softc_list, dimm_link) {
94 /*
95 * Already exists; done.
96 */
97 if (sc->dimm_node == node && sc->dimm_chan == chan &&
98 sc->dimm_slot == slot) {
99 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d",
100 sc->dimm_ref));
101 sc->dimm_ref++;
102 SYSCTL_XUNLOCK();
103 return sc;
104 }
105
106 /*
107 * Find the lowest usable id.
108 */
109 if (sc->dimm_id == dimm_id) {
110 ++dimm_id;
111 after = sc;
112 }
113 }
114
115 sc = kmalloc(sizeof(*sc), M_DEVBUF, M_WAITOK | M_ZERO);
116 sc->dimm_node = node;
117 sc->dimm_chan = chan;
118 sc->dimm_slot = slot;
119 sc->dimm_id = dimm_id;
120 sc->dimm_ref = 1;
121 sc->dimm_temp_hiwat = DIMM_TEMP_HIWAT_DEFAULT;
122 sc->dimm_temp_lowat = DIMM_TEMP_LOWAT_DEFAULT;
123 sc->dimm_ecc_thresh = DIMM_ECC_THRESH_DEFAULT;
124
125 ksnprintf(sc->dimm_sensdev.xname, sizeof(sc->dimm_sensdev.xname),
126 "dimm%d", sc->dimm_id);
127
128 /*
129 * Create sysctl tree for the location information. Use
130 * same name as the sensor device.
131 */
132 sysctl_ctx_init(&sc->dimm_sysctl_ctx);
133 sc->dimm_sysctl_tree = SYSCTL_ADD_NODE(&sc->dimm_sysctl_ctx,
134 SYSCTL_STATIC_CHILDREN(_hw_dimminfo), OID_AUTO,
135 sc->dimm_sensdev.xname, CTLFLAG_RD, 0, "");
136 if (sc->dimm_sysctl_tree != NULL) {
137 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
138 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
139 "node", CTLFLAG_RD, &sc->dimm_node, 0,
140 "CPU node of this DIMM");
141 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
142 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
143 "chan", CTLFLAG_RD, &sc->dimm_chan, 0,
144 "channel of this DIMM");
145 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
146 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
147 "slot", CTLFLAG_RD, &sc->dimm_slot, 0,
148 "slot of this DIMM");
149 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
150 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
151 "temp_hiwat", CTLFLAG_RW, &sc->dimm_temp_hiwat, 0,
152 "Raise alarm once DIMM temperature is above this value "
153 "(unit: C)");
154 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
155 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
156 "temp_lowat", CTLFLAG_RW, &sc->dimm_temp_lowat, 0,
157 "Cancel alarm once DIMM temperature is below this value "
158 "(unit: C)");
159 SYSCTL_ADD_INT(&sc->dimm_sysctl_ctx,
160 SYSCTL_CHILDREN(sc->dimm_sysctl_tree), OID_AUTO,
161 "ecc_thresh", CTLFLAG_RW, &sc->dimm_ecc_thresh, 0,
162 "Raise alarm once number ECC errors go above this value");
163 }
164
165 if (after == NULL) {
166 KKASSERT(sc->dimm_id == 0);
167 TAILQ_INSERT_HEAD(&dimm_softc_list, sc, dimm_link);
168 } else {
169 TAILQ_INSERT_AFTER(&dimm_softc_list, after, sc, dimm_link);
170 }
171
172 sensordev_install(&sc->dimm_sensdev);
173
174 SYSCTL_XUNLOCK();
175 return sc;
176 }
177
178 int
dimm_destroy(struct dimm_softc * sc)179 dimm_destroy(struct dimm_softc *sc)
180 {
181 SYSCTL_XLOCK();
182
183 KASSERT(sc->dimm_ref > 0, ("invalid dimm reference %d", sc->dimm_ref));
184 sc->dimm_ref--;
185 if (sc->dimm_ref > 0) {
186 SYSCTL_XUNLOCK();
187 return EAGAIN;
188 }
189
190 sensordev_deinstall(&sc->dimm_sensdev);
191
192 TAILQ_REMOVE(&dimm_softc_list, sc, dimm_link);
193 if (sc->dimm_sysctl_tree != NULL)
194 sysctl_ctx_free(&sc->dimm_sysctl_ctx);
195 kfree(sc, M_DEVBUF);
196
197 SYSCTL_XUNLOCK();
198 return 0;
199 }
200
201 void
dimm_sensor_attach(struct dimm_softc * sc,struct ksensor * sens)202 dimm_sensor_attach(struct dimm_softc *sc, struct ksensor *sens)
203 {
204 sensor_attach(&sc->dimm_sensdev, sens);
205 }
206
207 void
dimm_sensor_detach(struct dimm_softc * sc,struct ksensor * sens)208 dimm_sensor_detach(struct dimm_softc *sc, struct ksensor *sens)
209 {
210 sensor_detach(&sc->dimm_sensdev, sens);
211 }
212
213 void
dimm_set_temp_thresh(struct dimm_softc * sc,int hiwat,int lowat)214 dimm_set_temp_thresh(struct dimm_softc *sc, int hiwat, int lowat)
215 {
216 sc->dimm_temp_hiwat = hiwat;
217 sc->dimm_temp_lowat = lowat;
218 }
219
220 void
dimm_set_ecc_thresh(struct dimm_softc * sc,int thresh)221 dimm_set_ecc_thresh(struct dimm_softc *sc, int thresh)
222 {
223 sc->dimm_ecc_thresh = thresh;
224 }
225
226 void
dimm_sensor_temp(struct dimm_softc * sc,struct ksensor * sens,int temp)227 dimm_sensor_temp(struct dimm_softc *sc, struct ksensor *sens, int temp)
228 {
229 enum sensor_status status;
230
231 if (temp >= sc->dimm_temp_hiwat &&
232 (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) == 0) {
233 char temp_str[16], data[64];
234
235 ksnprintf(temp_str, sizeof(temp_str), "%d", temp);
236 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d",
237 sc->dimm_node, sc->dimm_chan, sc->dimm_slot);
238 devctl_notify("memtemp", "Thermal", temp_str, data);
239
240 kprintf("dimm%d: node%d channel%d DIMM%d "
241 "temperature (%dC) is too high (>= %dC)\n",
242 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot,
243 temp, sc->dimm_temp_hiwat);
244
245 sc->dimm_sens_taskflags |= DIMM_SENS_TF_TEMP_CRIT;
246 } else if ((sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT) &&
247 temp < sc->dimm_temp_lowat) {
248 sc->dimm_sens_taskflags &= ~DIMM_SENS_TF_TEMP_CRIT;
249 }
250
251 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_TEMP_CRIT)
252 status = SENSOR_S_CRIT;
253 else
254 status = SENSOR_S_OK;
255 sensor_set_temp_degc(sens, temp, status);
256 }
257
258 void
dimm_sensor_ecc_set(struct dimm_softc * sc,struct ksensor * sens,int ecc_cnt,boolean_t crit)259 dimm_sensor_ecc_set(struct dimm_softc *sc, struct ksensor *sens,
260 int ecc_cnt, boolean_t crit)
261 {
262 sc->dimm_ecc_cnt = ecc_cnt;
263 dimm_sensor_ecc(sc, sens, crit);
264 }
265
266 void
dimm_sensor_ecc_add(struct dimm_softc * sc,struct ksensor * sens,int ecc_cnt,boolean_t crit)267 dimm_sensor_ecc_add(struct dimm_softc *sc, struct ksensor *sens,
268 int ecc_cnt, boolean_t crit)
269 {
270 sc->dimm_ecc_cnt += ecc_cnt;
271 dimm_sensor_ecc(sc, sens, crit);
272 }
273
274 static void
dimm_sensor_ecc(struct dimm_softc * sc,struct ksensor * sens,boolean_t crit)275 dimm_sensor_ecc(struct dimm_softc *sc, struct ksensor *sens, boolean_t crit)
276 {
277 enum sensor_status status;
278
279 if (!crit && sc->dimm_ecc_cnt >= sc->dimm_ecc_thresh)
280 crit = TRUE;
281
282 if (crit && (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT) == 0) {
283 char ecc_str[16], data[64];
284
285 ksnprintf(ecc_str, sizeof(ecc_str), "%d", sc->dimm_ecc_cnt);
286 ksnprintf(data, sizeof(data), "node=%d channel=%d dimm=%d",
287 sc->dimm_node, sc->dimm_chan, sc->dimm_slot);
288 devctl_notify("ecc", "ECC", ecc_str, data);
289
290 kprintf("dimm%d: node%d channel%d DIMM%d "
291 "too many ECC errors %d\n",
292 sc->dimm_id, sc->dimm_node, sc->dimm_chan, sc->dimm_slot,
293 sc->dimm_ecc_cnt);
294
295 sc->dimm_sens_taskflags |= DIMM_SENS_TF_ECC_CRIT;
296 }
297
298 if (sc->dimm_sens_taskflags & DIMM_SENS_TF_ECC_CRIT)
299 status = SENSOR_S_CRIT;
300 else
301 status = SENSOR_S_OK;
302 sensor_set(sens, sc->dimm_ecc_cnt, status);
303 }
304
305 static void
dimm_mod_unload(void)306 dimm_mod_unload(void)
307 {
308 struct dimm_softc *sc;
309
310 SYSCTL_XLOCK();
311
312 while ((sc = TAILQ_FIRST(&dimm_softc_list)) != NULL) {
313 int error;
314
315 error = dimm_destroy(sc);
316 KASSERT(!error, ("dimm%d is still referenced, ref %d",
317 sc->dimm_id, sc->dimm_ref));
318 }
319
320 SYSCTL_XUNLOCK();
321 }
322
323 static int
dimm_mod_event(module_t mod,int type,void * unused)324 dimm_mod_event(module_t mod, int type, void *unused)
325 {
326 switch (type) {
327 case MOD_LOAD:
328 TAILQ_INIT(&dimm_softc_list);
329 return 0;
330
331 case MOD_UNLOAD:
332 dimm_mod_unload();
333 return 0;
334
335 default:
336 return 0;
337 }
338 }
339
340 static moduledata_t dimm_mod = {
341 "dimm",
342 dimm_mod_event,
343 0
344 };
345 DECLARE_MODULE(dimm, dimm_mod, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY);
346 MODULE_VERSION(dimm, 1);
347