1 /*-
2 * collectd - src/mcelog.c
3 * MIT License
4 *
5 * Copyright(c) 2016-2017 Intel Corporation. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24
25 * Authors:
26 * Maryam Tahhan <maryam.tahhan@intel.com>
27 * Volodymyr Mytnyk <volodymyrx.mytnyk@intel.com>
28 * Taras Chornyi <tarasx.chornyi@intel.com>
29 * Krzysztof Matczak <krzysztofx.matczak@intel.com>
30 */
31
32 #include "collectd.h"
33
34 #include "utils/common/common.h"
35 #include "utils_llist.h"
36
37 #include <poll.h>
38 #include <sys/socket.h>
39 #include <sys/un.h>
40 #include <unistd.h>
41
42 #define MCELOG_PLUGIN "mcelog"
43 #define MCELOG_BUFF_SIZE 1024
44 #define MCELOG_POLL_TIMEOUT 1000 /* ms */
45 #define MCELOG_SOCKET_STR "SOCKET"
46 #define MCELOG_DIMM_NAME "DMI_NAME"
47 #define MCELOG_CORRECTED_ERR "corrected memory errors"
48 #define MCELOG_UNCORRECTED_ERR "uncorrected memory errors"
49 #define MCELOG_CORRECTED_ERR_TIMED "corrected memory timed errors"
50 #define MCELOG_UNCORRECTED_ERR_TIMED "uncorrected memory timed errors"
51 #define MCELOG_CORRECTED_ERR_TYPE_INS "corrected_memory_errors"
52 #define MCELOG_UNCORRECTED_ERR_TYPE_INS "uncorrected_memory_errors"
53
54 typedef struct mcelog_config_s {
55 char logfile[PATH_MAX]; /* mcelog logfile */
56 pthread_t tid; /* poll thread id */
57 llist_t *dimms_list; /* DIMMs list */
58 pthread_mutex_t dimms_lock; /* lock for dimms cache */
59 bool persist;
60 } mcelog_config_t;
61
62 typedef struct socket_adapter_s socket_adapter_t;
63
64 struct socket_adapter_s {
65 int sock_fd; /* mcelog server socket fd */
66 struct sockaddr_un unix_sock; /* mcelog client socket */
67 pthread_rwlock_t lock;
68 /* function pointers for socket operations */
69 int (*write)(socket_adapter_t *self, const char *msg, const size_t len);
70 int (*reinit)(socket_adapter_t *self);
71 int (*receive)(socket_adapter_t *self, FILE **p_file);
72 int (*close)(socket_adapter_t *self);
73 };
74
75 typedef struct mcelog_memory_rec_s {
76 int corrected_err_total; /* x total*/
77 int corrected_err_timed; /* x in 24h*/
78 char corrected_err_timed_period[DATA_MAX_NAME_LEN / 2];
79 int uncorrected_err_total; /* x total*/
80 int uncorrected_err_timed; /* x in 24h*/
81 char uncorrected_err_timed_period[DATA_MAX_NAME_LEN / 2];
82 char location[DATA_MAX_NAME_LEN / 2]; /* SOCKET x CHANNEL x DIMM x*/
83 char dimm_name[DATA_MAX_NAME_LEN / 2]; /* DMI_NAME "DIMM_F1" */
84 } mcelog_memory_rec_t;
85
86 static int socket_close(socket_adapter_t *self);
87 static int socket_write(socket_adapter_t *self, const char *msg,
88 const size_t len);
89 static int socket_reinit(socket_adapter_t *self);
90 static int socket_receive(socket_adapter_t *self, FILE **p_file);
91
92 static mcelog_config_t g_mcelog_config = {
93 .logfile = "/var/log/mcelog",
94 .persist = false,
95 };
96
97 static socket_adapter_t socket_adapter = {
98 .sock_fd = -1,
99 .unix_sock =
100 {
101 .sun_family = AF_UNIX,
102 .sun_path = "/var/run/mcelog-client",
103 },
104 .lock = PTHREAD_RWLOCK_INITIALIZER,
105 .close = socket_close,
106 .write = socket_write,
107 .reinit = socket_reinit,
108 .receive = socket_receive,
109 };
110
111 static bool mcelog_thread_running;
112 static bool mcelog_apply_defaults;
113
mcelog_free_dimms_list_records(llist_t * dimms_list)114 static void mcelog_free_dimms_list_records(llist_t *dimms_list) {
115
116 for (llentry_t *e = llist_head(dimms_list); e != NULL; e = e->next) {
117 sfree(e->key);
118 sfree(e->value);
119 }
120 }
121
122 /* Create or get dimm by dimm name/location */
mcelog_dimm(const mcelog_memory_rec_t * rec,llist_t * dimms_list)123 static llentry_t *mcelog_dimm(const mcelog_memory_rec_t *rec,
124 llist_t *dimms_list) {
125
126 char dimm_name[DATA_MAX_NAME_LEN];
127
128 if (strlen(rec->dimm_name) > 0) {
129 snprintf(dimm_name, sizeof(dimm_name), "%s_%s", rec->location,
130 rec->dimm_name);
131 } else
132 sstrncpy(dimm_name, rec->location, sizeof(dimm_name));
133
134 llentry_t *dimm_le = llist_search(g_mcelog_config.dimms_list, dimm_name);
135
136 if (dimm_le != NULL)
137 return dimm_le;
138
139 /* allocate new linked list entry */
140 mcelog_memory_rec_t *dimm_mr = calloc(1, sizeof(*dimm_mr));
141 if (dimm_mr == NULL) {
142 ERROR(MCELOG_PLUGIN ": Error allocating dimm memory item");
143 return NULL;
144 }
145 char *p_name = strdup(dimm_name);
146 if (p_name == NULL) {
147 ERROR(MCELOG_PLUGIN ": strdup: error");
148 free(dimm_mr);
149 return NULL;
150 }
151
152 /* add new dimm */
153 dimm_le = llentry_create(p_name, dimm_mr);
154 if (dimm_le == NULL) {
155 ERROR(MCELOG_PLUGIN ": llentry_create(): error");
156 free(dimm_mr);
157 free(p_name);
158 return NULL;
159 }
160 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
161 llist_append(g_mcelog_config.dimms_list, dimm_le);
162 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
163
164 return dimm_le;
165 }
166
mcelog_update_dimm_stats(llentry_t * dimm,const mcelog_memory_rec_t * rec)167 static void mcelog_update_dimm_stats(llentry_t *dimm,
168 const mcelog_memory_rec_t *rec) {
169 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
170 memcpy(dimm->value, rec, sizeof(mcelog_memory_rec_t));
171 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
172 }
173
mcelog_config(oconfig_item_t * ci)174 static int mcelog_config(oconfig_item_t *ci) {
175 int use_logfile = 0, use_memory = 0;
176 for (int i = 0; i < ci->children_num; i++) {
177 oconfig_item_t *child = ci->children + i;
178 if (strcasecmp("McelogLogfile", child->key) == 0) {
179 use_logfile = 1;
180 if (use_memory) {
181 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Memory "
182 "option is already configured.",
183 child->key);
184 return -1;
185 }
186 if (cf_util_get_string_buffer(child, g_mcelog_config.logfile,
187 sizeof(g_mcelog_config.logfile)) < 0) {
188 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
189 child->key);
190 return -1;
191 }
192 memset(socket_adapter.unix_sock.sun_path, 0,
193 sizeof(socket_adapter.unix_sock.sun_path));
194 } else if (strcasecmp("Memory", child->key) == 0) {
195 if (use_logfile) {
196 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\", Logfile "
197 "option is already configured.",
198 child->key);
199 return -1;
200 }
201 use_memory = 1;
202 for (int j = 0; j < child->children_num; j++) {
203 oconfig_item_t *mem_child = child->children + j;
204 if (strcasecmp("McelogClientSocket", mem_child->key) == 0) {
205 if (cf_util_get_string_buffer(
206 mem_child, socket_adapter.unix_sock.sun_path,
207 sizeof(socket_adapter.unix_sock.sun_path)) < 0) {
208 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
209 mem_child->key);
210 return -1;
211 }
212 } else if (strcasecmp("PersistentNotification", mem_child->key) == 0) {
213 if (cf_util_get_boolean(mem_child, &g_mcelog_config.persist) < 0) {
214 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
215 mem_child->key);
216 return -1;
217 }
218 } else {
219 ERROR(MCELOG_PLUGIN ": Invalid Memory configuration option: \"%s\".",
220 mem_child->key);
221 return -1;
222 }
223 }
224 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
225 } else {
226 ERROR(MCELOG_PLUGIN ": Invalid configuration option: \"%s\".",
227 child->key);
228 return -1;
229 }
230 }
231
232 if (!use_logfile && !use_memory)
233 mcelog_apply_defaults = 1;
234
235 return 0;
236 }
237
socket_close(socket_adapter_t * self)238 static int socket_close(socket_adapter_t *self) {
239 int ret = 0;
240 pthread_rwlock_rdlock(&self->lock);
241 if (fcntl(self->sock_fd, F_GETFL) != -1) {
242 if (shutdown(self->sock_fd, SHUT_RDWR) != 0) {
243 ERROR(MCELOG_PLUGIN ": Socket shutdown failed: %s", STRERRNO);
244 ret = -1;
245 }
246 if (close(self->sock_fd) != 0) {
247 ERROR(MCELOG_PLUGIN ": Socket close failed: %s", STRERRNO);
248 ret = -1;
249 }
250 }
251 pthread_rwlock_unlock(&self->lock);
252 return ret;
253 }
254
socket_write(socket_adapter_t * self,const char * msg,const size_t len)255 static int socket_write(socket_adapter_t *self, const char *msg,
256 const size_t len) {
257 int ret = 0;
258 pthread_rwlock_rdlock(&self->lock);
259 if (swrite(self->sock_fd, msg, len) != 0)
260 ret = -1;
261 pthread_rwlock_unlock(&self->lock);
262 return ret;
263 }
264
mcelog_dispatch_notification(notification_t * n)265 static void mcelog_dispatch_notification(notification_t *n) {
266 if (!n) {
267 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
268 return;
269 }
270
271 sstrncpy(n->host, hostname_g, sizeof(n->host));
272 sstrncpy(n->type, "gauge", sizeof(n->type));
273 plugin_dispatch_notification(n);
274 if (n->meta)
275 plugin_notification_meta_free(n->meta);
276 }
277
socket_reinit(socket_adapter_t * self)278 static int socket_reinit(socket_adapter_t *self) {
279 int ret = -1;
280 cdtime_t interval = plugin_get_interval();
281 struct timeval socket_timeout = CDTIME_T_TO_TIMEVAL(interval);
282
283 /* synchronization via write lock since sock_fd may be changed here */
284 pthread_rwlock_wrlock(&self->lock);
285 self->sock_fd =
286 socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
287 if (self->sock_fd < 0) {
288 ERROR(MCELOG_PLUGIN ": Could not create a socket. %s", STRERRNO);
289 pthread_rwlock_unlock(&self->lock);
290 return ret;
291 }
292
293 /* Set socket timeout option */
294 if (setsockopt(self->sock_fd, SOL_SOCKET, SO_SNDTIMEO, &socket_timeout,
295 sizeof(socket_timeout)) < 0)
296 ERROR(MCELOG_PLUGIN ": Failed to set the socket timeout option.");
297
298 /* downgrading to read lock due to possible recursive read locks
299 * in self->close(self) call */
300 pthread_rwlock_unlock(&self->lock);
301 pthread_rwlock_rdlock(&self->lock);
302 if (connect(self->sock_fd, (struct sockaddr *)&(self->unix_sock),
303 sizeof(self->unix_sock)) < 0) {
304 ERROR(MCELOG_PLUGIN ": Failed to connect to mcelog server. %s", STRERRNO);
305 self->close(self);
306 ret = -1;
307 } else {
308 ret = 0;
309 mcelog_dispatch_notification(
310 &(notification_t){.severity = NOTIF_OKAY,
311 .time = cdtime(),
312 .message = "Connected to mcelog server",
313 .plugin = MCELOG_PLUGIN,
314 .type_instance = "mcelog_status"});
315 }
316 pthread_rwlock_unlock(&self->lock);
317 return ret;
318 }
319
mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t * mr)320 static int mcelog_dispatch_mem_notifications(const mcelog_memory_rec_t *mr) {
321 notification_t n = {.severity = NOTIF_WARNING,
322 .time = cdtime(),
323 .plugin = MCELOG_PLUGIN,
324 .type = "errors"};
325
326 int dispatch_corrected_notifs = 0, dispatch_uncorrected_notifs = 0;
327
328 if (mr == NULL)
329 return -1;
330
331 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
332 if (dimm == NULL) {
333 ERROR(MCELOG_PLUGIN
334 ": Error adding/getting dimm memory item to/from cache");
335 return -1;
336 }
337 mcelog_memory_rec_t *mr_old = dimm->value;
338 if (!g_mcelog_config.persist) {
339
340 if (mr_old->corrected_err_total != mr->corrected_err_total ||
341 mr_old->corrected_err_timed != mr->corrected_err_timed)
342 dispatch_corrected_notifs = 1;
343
344 if (mr_old->uncorrected_err_total != mr->uncorrected_err_total ||
345 mr_old->uncorrected_err_timed != mr->uncorrected_err_timed)
346 dispatch_uncorrected_notifs = 1;
347
348 if (!dispatch_corrected_notifs && !dispatch_uncorrected_notifs) {
349 DEBUG("%s: No new notifications to dispatch", MCELOG_PLUGIN);
350 return 0;
351 }
352 } else {
353 dispatch_corrected_notifs = 1;
354 dispatch_uncorrected_notifs = 1;
355 }
356
357 sstrncpy(n.host, hostname_g, sizeof(n.host));
358
359 if (mr->dimm_name[0] != '\0')
360 snprintf(n.plugin_instance, sizeof(n.plugin_instance), "%s_%s",
361 mr->location, mr->dimm_name);
362 else
363 sstrncpy(n.plugin_instance, mr->location, sizeof(n.plugin_instance));
364
365 if (dispatch_corrected_notifs &&
366 (mr->corrected_err_total > 0 || mr->corrected_err_timed > 0)) {
367 /* Corrected Error Notifications */
368 plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR,
369 mr->corrected_err_total);
370 plugin_notification_meta_add_signed_int(&n, MCELOG_CORRECTED_ERR_TIMED,
371 mr->corrected_err_timed);
372 snprintf(n.message, sizeof(n.message), MCELOG_CORRECTED_ERR);
373 sstrncpy(n.type_instance, MCELOG_CORRECTED_ERR_TYPE_INS,
374 sizeof(n.type_instance));
375 plugin_dispatch_notification(&n);
376 if (n.meta)
377 plugin_notification_meta_free(n.meta);
378 n.meta = NULL;
379 }
380
381 if (dispatch_uncorrected_notifs &&
382 (mr->uncorrected_err_total > 0 || mr->uncorrected_err_timed > 0)) {
383 /* Uncorrected Error Notifications */
384 plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR,
385 mr->uncorrected_err_total);
386 plugin_notification_meta_add_signed_int(&n, MCELOG_UNCORRECTED_ERR_TIMED,
387 mr->uncorrected_err_timed);
388 snprintf(n.message, sizeof(n.message), MCELOG_UNCORRECTED_ERR);
389 sstrncpy(n.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
390 sizeof(n.type_instance));
391 n.severity = NOTIF_FAILURE;
392 plugin_dispatch_notification(&n);
393 if (n.meta)
394 plugin_notification_meta_free(n.meta);
395 n.meta = NULL;
396 }
397
398 return 0;
399 }
400
mcelog_submit(const mcelog_memory_rec_t * mr)401 static int mcelog_submit(const mcelog_memory_rec_t *mr) {
402
403 if (!mr) {
404 ERROR(MCELOG_PLUGIN ": %s: NULL pointer", __FUNCTION__);
405 return -1;
406 }
407
408 llentry_t *dimm = mcelog_dimm(mr, g_mcelog_config.dimms_list);
409 if (dimm == NULL) {
410 ERROR(MCELOG_PLUGIN
411 ": Error adding/getting dimm memory item to/from cache");
412 return -1;
413 }
414
415 value_list_t vl = {
416 .values_len = 1,
417 .values = &(value_t){.derive = (derive_t)mr->corrected_err_total},
418 .time = cdtime(),
419 .plugin = MCELOG_PLUGIN,
420 .type = "errors",
421 .type_instance = MCELOG_CORRECTED_ERR_TYPE_INS};
422
423 mcelog_update_dimm_stats(dimm, mr);
424
425 if (mr->dimm_name[0] != '\0')
426 snprintf(vl.plugin_instance, sizeof(vl.plugin_instance), "%s_%s",
427 mr->location, mr->dimm_name);
428 else
429 sstrncpy(vl.plugin_instance, mr->location, sizeof(vl.plugin_instance));
430
431 plugin_dispatch_values(&vl);
432
433 snprintf(vl.type_instance, sizeof(vl.type_instance),
434 "corrected_memory_errors_in_%s", mr->corrected_err_timed_period);
435 vl.values = &(value_t){.derive = (derive_t)mr->corrected_err_timed};
436 plugin_dispatch_values(&vl);
437
438 sstrncpy(vl.type_instance, MCELOG_UNCORRECTED_ERR_TYPE_INS,
439 sizeof(vl.type_instance));
440 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_total};
441 plugin_dispatch_values(&vl);
442
443 snprintf(vl.type_instance, sizeof(vl.type_instance),
444 "uncorrected_memory_errors_in_%s", mr->uncorrected_err_timed_period);
445 vl.values = &(value_t){.derive = (derive_t)mr->uncorrected_err_timed};
446 plugin_dispatch_values(&vl);
447
448 return 0;
449 }
450
parse_memory_info(FILE * p_file,mcelog_memory_rec_t * memory_record)451 static int parse_memory_info(FILE *p_file, mcelog_memory_rec_t *memory_record) {
452 char buf[DATA_MAX_NAME_LEN] = {0};
453 while (fgets(buf, sizeof(buf), p_file)) {
454 /* Got empty line or "done" */
455 if ((!strncmp("\n", buf, strlen(buf))) ||
456 (!strncmp(buf, "done\n", strlen(buf))))
457 return 1;
458 if (strlen(buf) < 5)
459 continue;
460 if (!strncmp(buf, MCELOG_SOCKET_STR, strlen(MCELOG_SOCKET_STR))) {
461 sstrncpy(memory_record->location, buf, strlen(buf));
462 /* replace spaces with '_' */
463 for (size_t i = 0; i < strlen(memory_record->location); i++)
464 if (memory_record->location[i] == ' ')
465 memory_record->location[i] = '_';
466 DEBUG(MCELOG_PLUGIN ": Got SOCKET INFO %s", memory_record->location);
467 }
468 if (!strncmp(buf, MCELOG_DIMM_NAME, strlen(MCELOG_DIMM_NAME))) {
469 char *name = NULL;
470 char *saveptr = NULL;
471 name = strtok_r(buf, "\"", &saveptr);
472 if (name != NULL && saveptr != NULL) {
473 name = strtok_r(NULL, "\"", &saveptr);
474 if (name != NULL) {
475 sstrncpy(memory_record->dimm_name, name,
476 sizeof(memory_record->dimm_name));
477 DEBUG(MCELOG_PLUGIN ": Got DIMM NAME %s", memory_record->dimm_name);
478 }
479 }
480 }
481 if (!strncmp(buf, MCELOG_CORRECTED_ERR, strlen(MCELOG_CORRECTED_ERR))) {
482 /* Get next line*/
483 if (fgets(buf, sizeof(buf), p_file) != NULL) {
484 sscanf(buf, "\t%d total", &(memory_record->corrected_err_total));
485 DEBUG(MCELOG_PLUGIN ": Got corrected error total %d",
486 memory_record->corrected_err_total);
487 }
488 if (fgets(buf, sizeof(buf), p_file) != NULL) {
489 sscanf(buf, "\t%d in %s", &(memory_record->corrected_err_timed),
490 memory_record->corrected_err_timed_period);
491 DEBUG(MCELOG_PLUGIN ": Got timed corrected errors %d in %s",
492 memory_record->corrected_err_total,
493 memory_record->corrected_err_timed_period);
494 }
495 }
496 if (!strncmp(buf, MCELOG_UNCORRECTED_ERR, strlen(MCELOG_UNCORRECTED_ERR))) {
497 if (fgets(buf, sizeof(buf), p_file) != NULL) {
498 sscanf(buf, "\t%d total", &(memory_record->uncorrected_err_total));
499 DEBUG(MCELOG_PLUGIN ": Got uncorrected error total %d",
500 memory_record->uncorrected_err_total);
501 }
502 if (fgets(buf, sizeof(buf), p_file) != NULL) {
503 sscanf(buf, "\t%d in %s", &(memory_record->uncorrected_err_timed),
504 memory_record->uncorrected_err_timed_period);
505 DEBUG(MCELOG_PLUGIN ": Got timed uncorrected errors %d in %s",
506 memory_record->uncorrected_err_total,
507 memory_record->uncorrected_err_timed_period);
508 }
509 }
510 memset(buf, 0, sizeof(buf));
511 }
512 /* parsing definitely finished */
513 return 0;
514 }
515
poll_worker_cleanup(void * arg)516 static void poll_worker_cleanup(void *arg) {
517 mcelog_thread_running = 0;
518 FILE *p_file = *((FILE **)arg);
519 if (p_file != NULL)
520 fclose(p_file);
521 free(arg);
522 }
523
socket_receive(socket_adapter_t * self,FILE ** pp_file)524 static int socket_receive(socket_adapter_t *self, FILE **pp_file) {
525 int res = -1;
526 pthread_rwlock_rdlock(&self->lock);
527 struct pollfd poll_fd = {
528 .fd = self->sock_fd,
529 .events = POLLIN | POLLPRI,
530 };
531
532 if ((res = poll(&poll_fd, 1, MCELOG_POLL_TIMEOUT)) <= 0) {
533 if (res != 0 && errno != EINTR) {
534 ERROR("mcelog: poll failed: %s", STRERRNO);
535 }
536 pthread_rwlock_unlock(&self->lock);
537 return res;
538 }
539
540 if (poll_fd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
541 /* connection is broken */
542 ERROR(MCELOG_PLUGIN ": Connection to socket is broken");
543 if (poll_fd.revents & (POLLERR | POLLHUP)) {
544 mcelog_dispatch_notification(
545 &(notification_t){.severity = NOTIF_FAILURE,
546 .time = cdtime(),
547 .message = "Connection to mcelog socket is broken.",
548 .plugin = MCELOG_PLUGIN,
549 .type_instance = "mcelog_status"});
550 }
551 pthread_rwlock_unlock(&self->lock);
552 return -1;
553 }
554
555 if (!(poll_fd.revents & (POLLIN | POLLPRI))) {
556 INFO(MCELOG_PLUGIN ": No data to read");
557 pthread_rwlock_unlock(&self->lock);
558 return 0;
559 }
560
561 if ((*pp_file = fdopen(dup(self->sock_fd), "r")) == NULL)
562 res = -1;
563
564 pthread_rwlock_unlock(&self->lock);
565 return res;
566 }
567
poll_worker(void * arg)568 static void *poll_worker(__attribute__((unused)) void *arg) {
569 mcelog_thread_running = 1;
570 FILE **pp_file = calloc(1, sizeof(*pp_file));
571 if (pp_file == NULL) {
572 ERROR("mcelog: memory allocation failed: %s", STRERRNO);
573 pthread_exit((void *)1);
574 }
575
576 pthread_cleanup_push(poll_worker_cleanup, pp_file);
577
578 while (1) {
579 /* blocking call */
580 int res = socket_adapter.receive(&socket_adapter, pp_file);
581 if (res < 0) {
582 socket_adapter.close(&socket_adapter);
583 while (socket_adapter.reinit(&socket_adapter) != 0) {
584 nanosleep(&CDTIME_T_TO_TIMESPEC(MS_TO_CDTIME_T(MCELOG_POLL_TIMEOUT)),
585 NULL);
586 }
587 continue;
588 }
589 /* timeout or no data to read */
590 else if (res == 0)
591 continue;
592
593 if (*pp_file == NULL)
594 continue;
595
596 mcelog_memory_rec_t memory_record = {0};
597 while (parse_memory_info(*pp_file, &memory_record)) {
598 /* Check if location was successfully parsed */
599 if (memory_record.location[0] == '\0') {
600 memset(&memory_record, 0, sizeof(memory_record));
601 continue;
602 }
603
604 if (mcelog_dispatch_mem_notifications(&memory_record) != 0)
605 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors notification");
606 if (mcelog_submit(&memory_record) != 0)
607 ERROR(MCELOG_PLUGIN ": Failed to submit memory errors");
608 memset(&memory_record, 0, sizeof(memory_record));
609 }
610
611 fclose(*pp_file);
612 *pp_file = NULL;
613 }
614
615 mcelog_thread_running = 0;
616 pthread_cleanup_pop(1);
617 return NULL;
618 }
619
mcelog_init(void)620 static int mcelog_init(void) {
621 if (mcelog_apply_defaults) {
622 INFO(MCELOG_PLUGIN
623 ": No configuration selected defaulting to memory errors.");
624 memset(g_mcelog_config.logfile, 0, sizeof(g_mcelog_config.logfile));
625 }
626 g_mcelog_config.dimms_list = llist_create();
627 int err = pthread_mutex_init(&g_mcelog_config.dimms_lock, NULL);
628 if (err < 0) {
629 ERROR(MCELOG_PLUGIN ": plugin: failed to initialize cache lock");
630 return -1;
631 }
632
633 if (socket_adapter.reinit(&socket_adapter) != 0) {
634 ERROR(MCELOG_PLUGIN ": Cannot connect to client socket");
635 return -1;
636 }
637
638 if (strlen(socket_adapter.unix_sock.sun_path)) {
639 if (plugin_thread_create(&g_mcelog_config.tid, poll_worker, NULL, NULL) !=
640 0) {
641 ERROR(MCELOG_PLUGIN ": Error creating poll thread.");
642 return -1;
643 }
644 }
645 return 0;
646 }
647
get_memory_machine_checks(void)648 static int get_memory_machine_checks(void) {
649 static const char dump[] = "dump all bios\n";
650 int ret = socket_adapter.write(&socket_adapter, dump, sizeof(dump));
651 if (ret != 0)
652 ERROR(MCELOG_PLUGIN ": SENT DUMP REQUEST FAILED");
653 else
654 DEBUG(MCELOG_PLUGIN ": SENT DUMP REQUEST OK");
655 return ret;
656 }
657
mcelog_read(user_data_t * ud)658 static int mcelog_read(__attribute__((unused)) user_data_t *ud) {
659 DEBUG(MCELOG_PLUGIN ": %s", __FUNCTION__);
660
661 if (get_memory_machine_checks() != 0)
662 ERROR(MCELOG_PLUGIN ": MACHINE CHECK INFO NOT AVAILABLE");
663
664 return 0;
665 }
666
mcelog_shutdown(void)667 static int mcelog_shutdown(void) {
668 int ret = 0;
669 if (mcelog_thread_running) {
670 pthread_cancel(g_mcelog_config.tid);
671 if (pthread_join(g_mcelog_config.tid, NULL) != 0) {
672 ERROR(MCELOG_PLUGIN ": Stopping thread failed.");
673 ret = -1;
674 }
675 }
676 pthread_mutex_lock(&g_mcelog_config.dimms_lock);
677 mcelog_free_dimms_list_records(g_mcelog_config.dimms_list);
678 llist_destroy(g_mcelog_config.dimms_list);
679 g_mcelog_config.dimms_list = NULL;
680 pthread_mutex_unlock(&g_mcelog_config.dimms_lock);
681 pthread_mutex_destroy(&g_mcelog_config.dimms_lock);
682 ret = socket_adapter.close(&socket_adapter) || ret;
683 pthread_rwlock_destroy(&(socket_adapter.lock));
684 return -ret;
685 }
686
module_register(void)687 void module_register(void) {
688 plugin_register_complex_config(MCELOG_PLUGIN, mcelog_config);
689 plugin_register_init(MCELOG_PLUGIN, mcelog_init);
690 plugin_register_complex_read(NULL, MCELOG_PLUGIN, mcelog_read, 0, NULL);
691 plugin_register_shutdown(MCELOG_PLUGIN, mcelog_shutdown);
692 }
693