1 /*****************************************************************************\
2 * $Id: ipmiseld.c,v 1.17 2010-02-08 22:02:30 chu11 Exp $
3 *****************************************************************************
4 * Copyright (C) 2012-2015 Lawrence Livermore National Security, LLC.
5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6 * Written by Albert Chu <chu11@llnl.gov>
7 * LLNL-CODE-559172
8 *
9 * This file is part of Ipmiseld, an IPMI SEL syslog logging daemon.
10 * For details, see http://www.llnl.gov/linux/.
11 *
12 * Ipmiseld is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by the
14 * Free Software Foundation; either version 3 of the License, or (at your
15 * option) any later version.
16 *
17 * Ipmiseld is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
20 * for more details.
21 *
22 * You should have received a copy of the GNU General Public License along
23 * with Ipmiseld. If not, see <http://www.gnu.org/licenses/>.
24 \*****************************************************************************/
25
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif /* HAVE_CONFIG_H */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #if STDC_HEADERS
34 #include <string.h>
35 #endif /* STDC_HEADERS */
36 #if TIME_WITH_SYS_TIME
37 #include <sys/time.h>
38 #include <time.h>
39 #else /* !TIME_WITH_SYS_TIME */
40 #if HAVE_SYS_TIME_H
41 #include <sys/time.h>
42 #else /* !HAVE_SYS_TIME_H */
43 #include <time.h>
44 #endif /* !HAVE_SYS_TIME_H */
45 #endif /* !TIME_WITH_SYS_TIME */
46 #include <syslog.h>
47 #include <pthread.h>
48 #include <assert.h>
49 #include <errno.h>
50
51 #include <freeipmi/freeipmi.h>
52
53 #include "ipmiseld.h"
54 #include "ipmiseld-argp.h"
55 #include "ipmiseld-cache.h"
56 #include "ipmiseld-common.h"
57 #include "ipmiseld-debug.h"
58 #include "ipmiseld-ipmi-communication.h"
59 #include "ipmiseld-threadpool.h"
60
61 #include "freeipmi-portability.h"
62 #include "error.h"
63 #include "fi_hostlist.h"
64 #include "heap.h"
65 #include "pstdout.h"
66 #include "tool-common.h"
67 #include "tool-daemon-common.h"
68 #include "tool-event-common.h"
69 #include "tool-util-common.h"
70
71 #define IPMISELD_PIDFILE IPMISELD_LOCALSTATEDIR "/run/ipmiseld.pid"
72
73 #define IPMISELD_FORMAT_BUFLEN 4096
74
75 #define IPMISELD_EVENT_OUTPUT_BUFLEN 4096
76
77 #define IPMISELD_RETRY_ATTEMPT_MAX 3
78
79 static Heap host_data_heap = NULL;
80 static pthread_mutex_t host_data_heap_lock = PTHREAD_MUTEX_INITIALIZER;
81
82 static int exit_flag = 1;
83
84 static int
ipmiseld_sel_info_get(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info)85 ipmiseld_sel_info_get (ipmiseld_host_data_t *host_data, ipmiseld_sel_info_t *sel_info)
86 {
87 fiid_obj_t obj_cmd_rs = NULL;
88 uint64_t val;
89 int rv = -1;
90
91 assert (host_data);
92 assert (host_data->host_poll);
93 assert (host_data->host_poll->ipmi_ctx);
94 assert (sel_info);
95
96 if (!(obj_cmd_rs = fiid_obj_create (tmpl_cmd_get_sel_info_rs)))
97 {
98 ipmiseld_err_output (host_data, "fiid_obj_create: %s", strerror (errno));
99 goto cleanup;
100 }
101
102 if (ipmi_cmd_get_sel_info (host_data->host_poll->ipmi_ctx, obj_cmd_rs) < 0)
103 {
104 ipmiseld_err_output (host_data, "ipmi_cmd_get_sel_info: %s",
105 ipmi_ctx_errormsg (host_data->host_poll->ipmi_ctx));
106 goto cleanup;
107 }
108
109 if (FIID_OBJ_GET (obj_cmd_rs, "entries", &val) < 0)
110 {
111 ipmiseld_err_output (host_data, "fiid_obj_get: 'entries': %s",
112 fiid_obj_errormsg (obj_cmd_rs));
113 goto cleanup;
114 }
115 sel_info->entries = val;
116
117 if (FIID_OBJ_GET (obj_cmd_rs, "free_space", &val) < 0)
118 {
119 ipmiseld_err_output (host_data, "fiid_obj_get: 'free_space': %s",
120 fiid_obj_errormsg (obj_cmd_rs));
121 goto cleanup;
122 }
123 sel_info->free_space = val;
124
125 if (FIID_OBJ_GET (obj_cmd_rs, "most_recent_addition_timestamp", &val) < 0)
126 {
127 ipmiseld_err_output (host_data, "fiid_obj_get: 'most_recent_addition_timestamp': %s",
128 fiid_obj_errormsg (obj_cmd_rs));
129 goto cleanup;
130 }
131 sel_info->most_recent_addition_timestamp = val;
132
133 if (FIID_OBJ_GET (obj_cmd_rs, "most_recent_erase_timestamp", &val) < 0)
134 {
135 ipmiseld_err_output (host_data, "fiid_obj_get: 'most_recent_erase_timestamp': %s",
136 fiid_obj_errormsg (obj_cmd_rs));
137 goto cleanup;
138 }
139 sel_info->most_recent_erase_timestamp = val;
140
141 if (FIID_OBJ_GET (obj_cmd_rs, "delete_sel_command_supported", &val) < 0)
142 {
143 ipmiseld_err_output (host_data, "fiid_obj_get: 'delete_sel_command_supported': %s",
144 fiid_obj_errormsg (obj_cmd_rs));
145 goto cleanup;
146 }
147 sel_info->delete_sel_command_supported = val;
148
149 if (FIID_OBJ_GET (obj_cmd_rs, "reserve_sel_command_supported", &val) < 0)
150 {
151 ipmiseld_err_output (host_data, "fiid_obj_get: 'reserve_sel_command_supported': %s",
152 fiid_obj_errormsg (obj_cmd_rs));
153 goto cleanup;
154 }
155 sel_info->reserve_sel_command_supported = val;
156
157 if (FIID_OBJ_GET (obj_cmd_rs, "overflow_flag", &val) < 0)
158 {
159 ipmiseld_err_output (host_data, "fiid_obj_get: 'overflow_flag': %s",
160 fiid_obj_errormsg (obj_cmd_rs));
161 goto cleanup;
162 }
163 sel_info->overflow_flag = val;
164
165 rv = 0;
166 cleanup:
167 fiid_obj_destroy (obj_cmd_rs);
168 return (rv);
169 }
170
171 static int
_sel_last_record_id_callback(ipmi_sel_ctx_t ctx,void * callback_data)172 _sel_last_record_id_callback (ipmi_sel_ctx_t ctx, void *callback_data)
173 {
174 ipmiseld_last_record_id_t *last_record_id;
175
176 assert (ctx);
177 assert (callback_data);
178
179 last_record_id = (ipmiseld_last_record_id_t *)callback_data;
180
181 if (ipmi_sel_parse_read_record_id (ctx,
182 NULL,
183 0,
184 &(last_record_id->record_id)) < 0)
185 {
186 err_output ("ipmi_sel_parse_read_record_id: %s",
187 ipmi_sel_ctx_errormsg (ctx));
188 return (-1);
189 }
190
191 last_record_id->loaded = 1;
192 return (0);
193 }
194
195 static int
ipmiseld_get_last_record_id(ipmiseld_host_data_t * host_data,ipmiseld_last_record_id_t * last_record_id)196 ipmiseld_get_last_record_id (ipmiseld_host_data_t *host_data,
197 ipmiseld_last_record_id_t *last_record_id)
198 {
199 assert (host_data);
200 assert (host_data->host_poll);
201 assert (host_data->host_poll->sel_ctx);
202 assert (last_record_id);
203
204 last_record_id->loaded = 0;
205
206 if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
207 IPMI_SEL_RECORD_ID_LAST,
208 IPMI_SEL_RECORD_ID_LAST,
209 _sel_last_record_id_callback,
210 last_record_id) < 0)
211 {
212 /* A general IPMI error (busy, timeout, etc.) is ok, it happens */
213 if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) != IPMI_SEL_ERR_IPMI_ERROR)
214 ipmiseld_err_output (host_data, "ipmi_sel_parse: %s", ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
215 return (-1);
216 }
217
218 return (0);
219 }
220
221 static unsigned int
ipmiseld_calc_percent_full(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info)222 ipmiseld_calc_percent_full (ipmiseld_host_data_t *host_data,
223 ipmiseld_sel_info_t *sel_info)
224 {
225 unsigned int used_bytes;
226 unsigned int total_bytes;
227 unsigned int percent;
228
229 assert (host_data);
230 assert (sel_info);
231
232 used_bytes = (sel_info->entries * IPMI_SEL_RECORD_MAX_RECORD_LENGTH);
233 total_bytes = used_bytes + sel_info->free_space;
234 percent = (int)(100 * (double)used_bytes/total_bytes);
235
236 if (percent > 100)
237 {
238 /* Some rounding errors could occur, we accept small ones */
239 if (percent > 105)
240 {
241 if (host_data->prog_data->args->verbose_count)
242 ipmiseld_syslog_host (host_data, "SEL percent calc error: %u", percent);
243 }
244 percent = 100;
245 }
246
247 return (percent);
248 }
249
250 static int
ipmiseld_host_state_init(ipmiseld_host_data_t * host_data)251 ipmiseld_host_state_init (ipmiseld_host_data_t *host_data)
252 {
253 unsigned int percent;
254 int rv = -1;
255
256 assert (host_data);
257
258 if (ipmiseld_get_last_record_id (host_data, &(host_data->last_host_state.last_record_id)) < 0)
259 goto cleanup;
260
261 /* possible SEL is empty */
262 if (!host_data->last_host_state.last_record_id.loaded)
263 {
264 host_data->last_host_state.last_record_id.record_id = 0;
265 host_data->last_host_state.last_record_id.loaded = 1;
266 }
267
268 if (ipmiseld_sel_info_get (host_data, &(host_data->last_host_state.sel_info)) < 0)
269 goto cleanup;
270
271 percent = ipmiseld_calc_percent_full (host_data, &(host_data->last_host_state.sel_info));
272 host_data->last_host_state.last_percent_full = percent;
273
274 host_data->last_host_state.initialized = 1;
275 rv = 0;
276 cleanup:
277 return (rv);
278 }
279
280 /* return (-1), real error */
281 static int
_sel_parse_err_handle(ipmiseld_host_data_t * host_data,char * func)282 _sel_parse_err_handle (ipmiseld_host_data_t *host_data, char *func)
283 {
284 assert (host_data);
285 assert (func);
286
287 if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_INVALID_SEL_ENTRY)
288 {
289 /* maybe a bad SEL entry returned from remote system, don't error out */
290 if (host_data->prog_data->args->verbose_count)
291 ipmiseld_syslog_host (host_data, "Invalid SEL entry read");
292 return (0);
293 }
294
295 ipmiseld_err_output (host_data, "%s: %s",
296 func,
297 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
298
299 return (-1);
300 }
301
302 /* returns 0 on success, 1 on success but w/ truncation */
303 static int
_snprintf(char * buf,unsigned int buflen,unsigned int * wlen,const char * fmt,...)304 _snprintf (char *buf,
305 unsigned int buflen,
306 unsigned int *wlen,
307 const char *fmt,
308 ...)
309 {
310 va_list ap;
311 int ret;
312
313 assert (buf);
314 assert (buflen);
315 assert (wlen);
316 assert (fmt);
317
318 va_start (ap, fmt);
319 ret = vsnprintf (buf + *wlen, buflen - *wlen, fmt, ap);
320 va_end (ap);
321 if (ret >= (buflen - *wlen))
322 {
323 (*wlen) = buflen;
324 return (1);
325 }
326 (*wlen) += ret;
327 return (0);
328 }
329
330 static int
_sel_log_format(ipmiseld_host_data_t * host_data,const char * fmt_str,char * fmtbuf,unsigned int fmtbuf_len)331 _sel_log_format (ipmiseld_host_data_t *host_data,
332 const char *fmt_str,
333 char *fmtbuf,
334 unsigned int fmtbuf_len)
335 {
336 unsigned int wlen = 0;
337 int percent_flag = 0;
338 char *ptr;
339
340 assert (host_data);
341 assert (fmt_str);
342
343 ptr = (char *)fmt_str;
344 while (*ptr)
345 {
346 if (*ptr == '%')
347 {
348 if (percent_flag)
349 {
350 if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%%"))
351 return (0);
352 percent_flag = 0;
353 }
354 else
355 percent_flag = 1;
356 goto end_loop;
357 }
358 else if (percent_flag && *ptr == 'h')
359 {
360 if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%s",
361 host_data->hostname ? host_data->hostname : "localhost"))
362 return (0);
363 percent_flag = 0;
364 }
365 else
366 {
367 if (percent_flag)
368 {
369 if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%%%c", *ptr))
370 return (0);
371 percent_flag = 0;
372 }
373 else
374 {
375 if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%c", *ptr))
376 return (0);
377 }
378 }
379
380 end_loop:
381 ptr++;
382 }
383
384 return (0);
385 }
386
387 static int
_sel_log_output(ipmiseld_host_data_t * host_data,uint8_t record_type)388 _sel_log_output (ipmiseld_host_data_t *host_data, uint8_t record_type)
389 {
390 char fmtbuf[IPMISELD_FORMAT_BUFLEN + 1];
391 char outbuf[IPMISELD_EVENT_OUTPUT_BUFLEN + 1];
392 int outbuf_len;
393 unsigned int flags;
394 int record_type_class;
395 char *format_str;
396 uint16_t record_id;
397
398 assert (host_data);
399
400 memset (fmtbuf, '\0', IPMISELD_FORMAT_BUFLEN + 1);
401 memset (outbuf, '\0', IPMISELD_EVENT_OUTPUT_BUFLEN + 1);
402
403 if (ipmi_sel_parse_read_record_id (host_data->host_poll->sel_ctx,
404 NULL,
405 0,
406 &record_id) < 0)
407 {
408 ipmiseld_err_output (host_data, "ipmi_sel_parse_read_record_id: %s",
409 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
410 return (-1);
411 }
412
413 if (host_data->prog_data->args->foreground
414 && host_data->prog_data->args->common_args.debug)
415 IPMISELD_HOST_DEBUG (("SEL Record parsed: Record ID = %u", record_id));
416
417 /* achu:
418 *
419 * Algorithmically we can "find" the next entry to log several ways,
420 * but there are two reasonable ways.
421 *
422 * 1) Whatever the last record id is, add 1 to it and iterate until
423 * you reach the next valid SEL record id.
424 *
425 * 2) Read the last record id, and use that to get the next record
426 * id to log.
427 *
428 * While '1' will be faster on most systems, there are a number of
429 * systems were vendors jump semi-big chunks of record ids on new
430 * events (I have no idea why, it makes no sense). We will
431 * implement '2' as the most reasonable average solution. So when
432 * we hit this callback with the already logged last record id, we
433 * need to not log it. '2' is also the safer implementation, in the
434 * event there is a bug in the firmware, and we could loop endlessly
435 * looking for the next entry to log when there is none.
436 */
437 if (host_data->now_host_state.last_record_id.record_id == record_id)
438 return (0);
439
440 flags = IPMI_SEL_STRING_FLAGS_IGNORE_UNAVAILABLE_FIELD;
441 flags |= IPMI_SEL_STRING_FLAGS_OUTPUT_NOT_AVAILABLE;
442 flags |= IPMI_SEL_STRING_FLAGS_DATE_MONTH_STRING;
443 if (host_data->prog_data->args->verbose_count)
444 flags |= IPMI_SEL_STRING_FLAGS_VERBOSE;
445 if (host_data->prog_data->args->entity_sensor_names)
446 flags |= IPMI_SEL_STRING_FLAGS_ENTITY_SENSOR_NAMES;
447 if (host_data->prog_data->args->non_abbreviated_units)
448 flags |= IPMI_SEL_STRING_FLAGS_NON_ABBREVIATED_UNITS;
449 if (host_data->prog_data->args->interpret_oem_data)
450 flags |= IPMI_SEL_STRING_FLAGS_INTERPRET_OEM_DATA;
451
452 record_type_class = ipmi_sel_record_type_class (record_type);
453 if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD)
454 {
455 if (host_data->prog_data->args->system_event_format_str)
456 format_str = host_data->prog_data->args->system_event_format_str;
457 else
458 {
459 if (host_data->hostname)
460 format_str = IPMISELD_SYSTEM_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
461 else
462 format_str = IPMISELD_SYSTEM_EVENT_FORMAT_STR_DEFAULT;
463 }
464 }
465 else if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD)
466 {
467 if (host_data->prog_data->args->oem_timestamped_event_format_str)
468 format_str = host_data->prog_data->args->oem_timestamped_event_format_str;
469 else
470 {
471 if (host_data->hostname)
472 format_str = IPMISELD_OEM_TIMESTAMPED_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
473 else
474 format_str = IPMISELD_OEM_TIMESTAMPED_EVENT_FORMAT_STR_DEFAULT;
475 }
476 }
477 else if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD)
478 {
479 if (host_data->prog_data->args->oem_non_timestamped_event_format_str)
480 format_str = host_data->prog_data->args->oem_non_timestamped_event_format_str;
481 else
482 {
483 if (host_data->hostname)
484 format_str = IPMISELD_OEM_NON_TIMESTAMPED_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
485 else
486 format_str = IPMISELD_OEM_NON_TIMESTAMPED_EVENT_FORMAT_STR_DEFAULT;
487 }
488 }
489 else
490 {
491 if (host_data->prog_data->args->verbose_count)
492 ipmiseld_syslog_host (host_data,
493 "SEL Event: Unknown SEL Record Type: %Xh",
494 record_type);
495 return (0);
496 }
497
498 if (_sel_log_format (host_data,
499 format_str,
500 fmtbuf,
501 IPMISELD_FORMAT_BUFLEN) < 0)
502 return (-1);
503
504 if ((outbuf_len = ipmi_sel_parse_read_record_string (host_data->host_poll->sel_ctx,
505 fmtbuf,
506 NULL,
507 0,
508 outbuf,
509 IPMISELD_EVENT_OUTPUT_BUFLEN,
510 flags)) < 0)
511 {
512 if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_string") < 0)
513 return (-1);
514 return (0);
515 }
516
517 if (outbuf_len)
518 ipmiseld_syslog (host_data, "%s", outbuf);
519
520 host_data->now_host_state.last_record_id.record_id = record_id;
521
522 return (0);
523 }
524
525 static int
_sel_parse_callback(ipmi_sel_ctx_t ctx,void * callback_data)526 _sel_parse_callback (ipmi_sel_ctx_t ctx, void *callback_data)
527 {
528 ipmiseld_host_data_t *host_data;
529 uint8_t record_type;
530 int record_type_class;
531 int rv = -1;
532
533 assert (ctx);
534 assert (callback_data);
535
536 host_data = (ipmiseld_host_data_t *)callback_data;
537
538 if (host_data->prog_data->args->sensor_types_length
539 || host_data->prog_data->args->exclude_sensor_types_length)
540 {
541 uint8_t sensor_type;
542 int flag;
543
544 if (ipmi_sel_parse_read_sensor_type (host_data->host_poll->sel_ctx,
545 NULL,
546 0,
547 &sensor_type) < 0)
548 {
549 if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
550 goto cleanup;
551 goto out;
552 }
553
554 if (host_data->prog_data->args->sensor_types_length)
555 {
556 if ((flag = sensor_type_listed (NULL,
557 sensor_type,
558 host_data->prog_data->args->sensor_types,
559 host_data->prog_data->args->sensor_types_length)) < 0)
560 goto cleanup;
561
562 if (!flag)
563 goto out;
564 }
565
566 if (host_data->prog_data->args->exclude_sensor_types_length)
567 {
568 if ((flag = sensor_type_listed (NULL,
569 sensor_type,
570 host_data->prog_data->args->exclude_sensor_types,
571 host_data->prog_data->args->exclude_sensor_types_length)) < 0)
572 goto cleanup;
573
574 if (flag)
575 goto out;
576 }
577 }
578
579 if (ipmi_sel_parse_read_record_type (host_data->host_poll->sel_ctx,
580 NULL,
581 0,
582 &record_type) < 0)
583 {
584 if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
585 goto cleanup;
586 goto out;
587 }
588
589 /* IPMI Workaround
590 *
591 * HP DL 380 G5
592 * Intel S2600JF/Appro 512X
593 *
594 * Motherboard is reporting invalid SEL Records types (0x00 on HP DL
595 * 380 G5, 0x03 on Intel S2600JF/Appro 512X)
596 */
597 if (host_data->prog_data->args->common_args.section_specific_workaround_flags & IPMI_PARSE_SECTION_SPECIFIC_WORKAROUND_FLAGS_ASSUME_SYSTEM_EVENT
598 && (!IPMI_SEL_RECORD_TYPE_VALID (record_type)))
599 record_type = IPMI_SEL_RECORD_TYPE_SYSTEM_EVENT_RECORD;
600
601 record_type_class = ipmi_sel_record_type_class (record_type);
602
603 if (host_data->prog_data->args->system_event_only
604 && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD)
605 goto out;
606
607 if (host_data->prog_data->args->oem_event_only
608 && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD
609 && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD)
610 goto out;
611
612 if (host_data->prog_data->event_state_filter_mask)
613 {
614 char sel_record[IPMI_SEL_RECORD_MAX_RECORD_LENGTH];
615 int sel_record_len;
616 unsigned int event_state = 0;
617
618 if ((sel_record_len = ipmi_sel_parse_read_record (host_data->host_poll->sel_ctx,
619 sel_record,
620 IPMI_SEL_RECORD_MAX_RECORD_LENGTH)) < 0)
621 {
622 if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
623 goto cleanup;
624 goto out;
625 }
626
627 if (ipmi_interpret_sel (host_data->host_poll->interpret_ctx,
628 sel_record,
629 sel_record_len,
630 &event_state) < 0)
631 {
632 ipmiseld_err_output (host_data, "ipmi_interpret_sel: %s",
633 ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
634 goto cleanup;
635 }
636
637 if ((host_data->prog_data->event_state_filter_mask & IPMISELD_NOMINAL_FILTER)
638 && event_state == IPMI_INTERPRET_STATE_NOMINAL)
639 goto out;
640
641 if ((host_data->prog_data->event_state_filter_mask & IPMISELD_WARNING_FILTER)
642 && event_state == IPMI_INTERPRET_STATE_WARNING)
643 goto out;
644
645 if ((host_data->prog_data->event_state_filter_mask & IPMISELD_CRITICAL_FILTER)
646 && event_state == IPMI_INTERPRET_STATE_CRITICAL)
647 goto out;
648
649 if ((host_data->prog_data->event_state_filter_mask & IPMISELD_NA_FILTER)
650 && event_state == IPMI_INTERPRET_STATE_UNKNOWN)
651 goto out;
652 }
653
654 if (_sel_log_output (host_data, record_type) < 0)
655 goto cleanup;
656
657 out:
658 rv = 0;
659 cleanup:
660 return (rv);
661 }
662
663 static int
ipmiseld_sel_parse_test_run(ipmiseld_host_data_t * host_data)664 ipmiseld_sel_parse_test_run (ipmiseld_host_data_t *host_data)
665 {
666 assert (host_data);
667 assert (host_data->host_poll);
668 assert (host_data->host_poll->sel_ctx);
669
670 if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
671 IPMI_SEL_RECORD_ID_FIRST,
672 IPMI_SEL_RECORD_ID_LAST,
673 _sel_parse_callback,
674 host_data) < 0)
675 {
676 /* A general IPMI error (busy, timeout, etc.) is ok, it happens */
677 if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) != IPMI_SEL_ERR_IPMI_ERROR)
678 ipmiseld_err_output (host_data, "ipmi_sel_parse: %s",
679 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
680 return (-1);
681 }
682
683 return (0);
684 }
685
686 static void
_dump_sel_info(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info,const char * prefix)687 _dump_sel_info (ipmiseld_host_data_t *host_data,
688 ipmiseld_sel_info_t *sel_info,
689 const char *prefix)
690 {
691 assert (host_data);
692 assert (host_data->prog_data->args->foreground);
693 assert (host_data->prog_data->args->common_args.debug);
694 assert (sel_info);
695 assert (prefix);
696
697 IPMISELD_HOST_DEBUG (("%s: Entries = %u", prefix, sel_info->entries));
698 IPMISELD_HOST_DEBUG (("%s: Free Space = %u", prefix, sel_info->free_space));
699 IPMISELD_HOST_DEBUG (("%s: Most Recent Addition Timestamp = %u", prefix, sel_info->most_recent_addition_timestamp));
700 IPMISELD_HOST_DEBUG (("%s: Most Recent Erase Timestamp = %u", prefix, sel_info->most_recent_erase_timestamp));
701 IPMISELD_HOST_DEBUG (("%s: Delete Sel Command Supported = %u", prefix, sel_info->delete_sel_command_supported));
702 IPMISELD_HOST_DEBUG (("%s: Reserve Sel Command Supported = %u", prefix, sel_info->reserve_sel_command_supported));
703 IPMISELD_HOST_DEBUG (("%s: Overflow Flag = %u", prefix, sel_info->overflow_flag));
704 }
705
706 static void
_dump_host_state(ipmiseld_host_data_t * host_data,ipmiseld_host_state_t * host_state,const char * prefix)707 _dump_host_state (ipmiseld_host_data_t *host_data,
708 ipmiseld_host_state_t *host_state,
709 const char *prefix)
710 {
711 assert (host_data);
712 assert (host_data->prog_data->args->foreground);
713 assert (host_data->prog_data->args->common_args.debug);
714 assert (host_state);
715 assert (prefix);
716
717 IPMISELD_HOST_DEBUG (("%s: Last Record ID = %u", prefix, host_state->last_record_id.record_id));
718 IPMISELD_HOST_DEBUG (("%s: Last Percent Full = %u", prefix, host_state->last_percent_full));
719 _dump_sel_info (host_data, &(host_state->sel_info), prefix);
720 }
721
722 /* returns 1 to log events, 0 if not, -1 on error */
723 static int
ipmiseld_check_sel_info(ipmiseld_host_data_t * host_data,uint16_t * record_id_start)724 ipmiseld_check_sel_info (ipmiseld_host_data_t *host_data, uint16_t *record_id_start)
725 {
726 int log_entries_flag = 0;
727 int rv = -1;
728
729 assert (host_data);
730 assert (record_id_start);
731
732 if (host_data->now_host_state.sel_info.most_recent_addition_timestamp < host_data->last_host_state.sel_info.most_recent_addition_timestamp
733 || host_data->now_host_state.sel_info.most_recent_erase_timestamp < host_data->last_host_state.sel_info.most_recent_erase_timestamp)
734 {
735 /* This shouldn't be possible under normal circumstances, but
736 * could occur if the user changes the SEL timestamp or clock.
737 * Or perhaps a vendor firmware update or similar action
738 * modified the clock.
739 *
740 * Under this circumstance, we will treat the timestamps has
741 * having changed (note that all checks below are for "not equal
742 * to" and not "greater than" or "less than". We just log to
743 * note this.
744 */
745 if (host_data->prog_data->args->verbose_count)
746 ipmiseld_syslog_host (host_data, "SEL timestamps modified to earlier time");
747 }
748
749 if (host_data->now_host_state.sel_info.entries == host_data->last_host_state.sel_info.entries)
750 {
751 /* Small chance entry count is the same after a
752 * out-of-daemon clear. Need to do some checks to handle
753 * for this
754 */
755
756 /* Timestamps unchanged - this is the most common/normal case, no new log entries to log. */
757 if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp
758 && host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
759 {
760 /* nothing to do except this single copy/save */
761 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
762 }
763 /* If erase timestamp changed but addition timestamp has
764 * not. An out-of-daemon delete/clear occurred, but
765 * there are no new entries to log.
766 */
767 else if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp
768 && host_data->now_host_state.sel_info.most_recent_erase_timestamp != host_data->last_host_state.sel_info.most_recent_erase_timestamp)
769 {
770 if (host_data->now_host_state.sel_info.delete_sel_command_supported)
771 {
772 /* We don't know if the erase was for some old entries or if it was a clear.
773 * We will look at the last_record_id to take a guess
774 */
775 ipmiseld_last_record_id_t last_record_id;
776
777 if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
778 goto cleanup;
779
780 /* If new last_record_id has changed or there are no
781 * records, we assume the erase was a clear
782 */
783 if (!last_record_id.loaded
784 || last_record_id.record_id != host_data->last_host_state.last_record_id.record_id)
785 host_data->now_host_state.last_record_id.record_id = 0;
786 else
787 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
788 }
789 else
790 {
791 /* If delete not supported, the erase must have been a clear.
792 * Reset last_record_id to zero.
793 */
794 host_data->now_host_state.last_record_id.record_id = 0;
795 }
796 }
797 /* An erase and addition occured, must determine the type of action that occurred */
798 else if (host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp
799 && host_data->now_host_state.sel_info.most_recent_erase_timestamp != host_data->last_host_state.sel_info.most_recent_erase_timestamp)
800 {
801 if (host_data->now_host_state.sel_info.delete_sel_command_supported)
802 {
803 /* We don't know if the erase was for some old entries or if it was a clear.
804 * We will look at the last_record_id to take a guess
805 */
806 ipmiseld_last_record_id_t last_record_id;
807
808 if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
809 goto cleanup;
810
811 /* If new last_record_id is greater, we assume it's some additional entries
812 * and the erase was only deleting some old entries.
813 */
814 if (last_record_id.loaded
815 && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
816 {
817 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
818 if (host_data->last_host_state.last_record_id.record_id)
819 (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
820 else
821 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
822 log_entries_flag++;
823 }
824 else
825 {
826 /* We assume a clear occurred so start from the beginning */
827 host_data->now_host_state.last_record_id.record_id = 0;
828 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
829 log_entries_flag++;
830 }
831 }
832 else
833 {
834 /* If delete not supported, the erase must have been a clear
835 * So log all the new entries if some are available and
836 * reset last_record_id to zero.
837 */
838 host_data->now_host_state.last_record_id.record_id = 0;
839 if (host_data->now_host_state.sel_info.entries)
840 {
841 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
842 log_entries_flag++;
843 }
844 }
845 }
846 else /* host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp
847 && host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp */
848 {
849 /* This shouldn't be possible and is likely a bug in the
850 * IPMI firmware (user erased entries but timestamp didn't
851 * update, SEL added entries and updated timestamp but
852 * didn't update entry count, etc.) we'll only save off the
853 * host state for later.
854 */
855 if (host_data->prog_data->args->verbose_count)
856 ipmiseld_syslog_host (host_data, "SEL illegal timestamp situation");
857 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
858 }
859 }
860 else if (host_data->now_host_state.sel_info.entries > host_data->last_host_state.sel_info.entries)
861 {
862 ipmiseld_last_record_id_t last_record_id;
863
864 if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp)
865 {
866 /* This shouldn't be possible and is likely a bug in the
867 * IPMI firmware. Log this, but for rest of this chunk of
868 * code, we assume the addition timestamp must have changed.
869 */
870 if (host_data->prog_data->args->verbose_count)
871 ipmiseld_syslog_host (host_data, "SEL timestamp error, more entries without addition");
872 }
873
874 if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
875 goto cleanup;
876
877 /* There is a small race chance that the last time we got sel
878 * info, a new SEL event occurred after it, but before the call
879 * to ipmi_sel_parse(). So we check what the last record id to
880 * see if that happened. If the last record id is the same,
881 * then we already logged it. So no new logging needs to
882 * happen.
883 */
884 if (last_record_id.loaded
885 && host_data->last_host_state.last_record_id.record_id == last_record_id.record_id)
886 {
887 /* nothing to do except this single copy/save */
888 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
889 }
890 else if (host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
891 {
892 /* This is the most normal case we should expect, there
893 * are more entries in the SEL than last time we checked
894 * and must log them.
895 */
896 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
897 if (host_data->last_host_state.last_record_id.record_id)
898 (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
899 else
900 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
901 log_entries_flag++;
902 }
903 else
904 {
905 if (host_data->now_host_state.sel_info.delete_sel_command_supported)
906 {
907 /* If new last_record_id is greater, we assume it's some additional entries
908 * and the erase was only deleting some old entries.
909 */
910 if (last_record_id.loaded
911 && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
912 {
913 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
914 if (host_data->last_host_state.last_record_id.record_id)
915 (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
916 else
917 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
918 log_entries_flag++;
919 }
920 else
921 {
922 /* We assume a clear occurred so start from the beginning */
923 host_data->now_host_state.last_record_id.record_id = 0;
924 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
925 log_entries_flag++;
926 }
927 }
928 else
929 {
930 /* If delete not supported, the erase must have been a clear
931 * So log all the new entries if some are available and
932 * reset last_record_id to zero.
933 */
934 host_data->now_host_state.last_record_id.record_id = 0;
935 if (host_data->now_host_state.sel_info.entries)
936 {
937 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
938 log_entries_flag++;
939 }
940 }
941 }
942 }
943 else /* host_data->now_host_state.sel_info.entries < host_data->host_state.sel_info.entries) */
944 {
945 if (host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
946 {
947 /* This shouldn't be possible and is likely a bug in the
948 * IPMI firmware. Log this, but for rest of this chunk of
949 * code, we assume the erase timestamp must have changed.
950 */
951 if (host_data->prog_data->args->verbose_count)
952 ipmiseld_syslog_host (host_data, "SEL timestamp error, fewer entries without erase");
953 }
954
955 /* if no additional entries, nothing to log */
956 if (host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp)
957 {
958 if (host_data->now_host_state.sel_info.delete_sel_command_supported)
959 {
960 /* We don't know if the erase was for some old entries or if it was a clear.
961 * We will look at the last_record_id to take a guess
962 */
963 ipmiseld_last_record_id_t last_record_id;
964
965 if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
966 goto cleanup;
967
968 /* If new last_record_id is greater, we assume it's some additional entries
969 * and the erase was only deleting some old entries.
970 */
971 if (last_record_id.loaded
972 && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
973 {
974 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
975 if (host_data->last_host_state.last_record_id.record_id)
976 (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
977 else
978 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
979 log_entries_flag++;
980 }
981 else
982 {
983 /* We assume a clear occurred so start from the beginning */
984 host_data->now_host_state.last_record_id.record_id = 0;
985 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
986 log_entries_flag++;
987 }
988 }
989 else
990 {
991 /* If delete not supported, the erase must have been a clear
992 * So log all the new entries if some are available and
993 * reset last_record_id to zero.
994 */
995 host_data->now_host_state.last_record_id.record_id = 0;
996 if (host_data->now_host_state.sel_info.entries)
997 {
998 (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
999 log_entries_flag++;
1000 }
1001 }
1002 }
1003 else
1004 {
1005 if (!host_data->now_host_state.sel_info.entries)
1006 host_data->now_host_state.last_record_id.record_id = 0;
1007 else
1008 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
1009 }
1010 }
1011
1012 if (log_entries_flag)
1013 rv = 1;
1014 else
1015 rv = 0;
1016
1017 cleanup:
1018 return (rv);
1019 }
1020
1021 /* returns 1 if clear should occur, 0 if not, -1 on error */
1022 static int
ipmiseld_check_thresholds(ipmiseld_host_data_t * host_data)1023 ipmiseld_check_thresholds (ipmiseld_host_data_t *host_data)
1024 {
1025 int do_clear_flag = 0;
1026 unsigned int percent;
1027 int rv = -1;
1028
1029 assert (host_data);
1030
1031 percent = ipmiseld_calc_percent_full (host_data, &(host_data->now_host_state.sel_info));
1032
1033 if (host_data->prog_data->args->warning_threshold)
1034 {
1035 if (percent > host_data->prog_data->args->warning_threshold)
1036 {
1037 if (percent > host_data->last_host_state.last_percent_full)
1038 ipmiseld_syslog_host (host_data, "SEL is %d%% full", percent);
1039 }
1040 }
1041
1042 if (!host_data->last_host_state.sel_info.overflow_flag
1043 && host_data->now_host_state.sel_info.overflow_flag)
1044 ipmiseld_syslog_host (host_data, "SEL Overflow, events have been dropped due to lack of space in the SEL");
1045
1046 if (host_data->prog_data->args->clear_threshold)
1047 {
1048 if (percent > host_data->prog_data->args->clear_threshold)
1049 do_clear_flag = 1;
1050 }
1051
1052 host_data->now_host_state.last_percent_full = percent;
1053
1054 if (do_clear_flag)
1055 rv = 1;
1056 else
1057 rv = 0;
1058
1059 return (rv);
1060 }
1061
1062 /* returns 1 if reserve successful, 0 if not, -1 on error */
1063 static int
ipmiseld_sel_reserve(ipmiseld_host_data_t * host_data)1064 ipmiseld_sel_reserve (ipmiseld_host_data_t *host_data)
1065 {
1066 assert (host_data);
1067 assert (host_data->host_poll);
1068 assert (host_data->host_poll->sel_ctx);
1069
1070 if (host_data->now_host_state.sel_info.reserve_sel_command_supported)
1071 {
1072 if (ipmi_sel_ctx_register_reservation_id (host_data->host_poll->sel_ctx, NULL) < 0)
1073 {
1074 /* If an IPMI error, we assume just can't do reservation, no biggie */
1075 if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_IPMI_ERROR)
1076 return (0);
1077
1078 ipmiseld_err_output (host_data, "ipmi_sel_ctx_register_reservation_id: %s",
1079 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1080 return (-1);
1081 }
1082
1083 return (1);
1084 }
1085
1086 return (0);
1087 }
1088
1089 static int
ipmiseld_sel_log_entries(ipmiseld_host_data_t * host_data,uint16_t record_id_start)1090 ipmiseld_sel_log_entries (ipmiseld_host_data_t *host_data,
1091 uint16_t record_id_start)
1092 {
1093 assert (host_data);
1094 assert (host_data->host_poll);
1095 assert (host_data->host_poll->sel_ctx);
1096
1097 if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
1098 record_id_start,
1099 IPMI_SEL_RECORD_ID_LAST,
1100 _sel_parse_callback,
1101 host_data) < 0)
1102 {
1103 ipmiseld_err_output (host_data, "ipmi_sel_parse: %s", ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1104 return (-1);
1105 }
1106
1107 return (0);
1108 }
1109
1110 static int
ipmiseld_save_state(ipmiseld_host_data_t * host_data)1111 ipmiseld_save_state (ipmiseld_host_data_t *host_data)
1112 {
1113 assert (host_data);
1114
1115 memcpy (&(host_data->last_host_state),
1116 &(host_data->now_host_state),
1117 sizeof (ipmiseld_host_state_t));
1118
1119 /* ignore error, continue on even if it fails */
1120 ipmiseld_data_cache_store (host_data);
1121
1122 return (0);
1123 }
1124
1125 /* return 1 - retry immediately, return 0 general success, -1 error */
1126 static int
ipmiseld_sel_parse_log(ipmiseld_host_data_t * host_data)1127 ipmiseld_sel_parse_log (ipmiseld_host_data_t *host_data)
1128 {
1129 uint16_t record_id_start = 0;
1130 int log_entries_flag = 0;
1131 int do_clear_flag = 0;
1132 int reserve_flag = 0;
1133 int retry_flag = 0;
1134 int rv = -1;
1135 int ret;
1136
1137 assert (host_data);
1138
1139 if (host_data->prog_data->args->clear_sel
1140 && !host_data->clear_sel_done)
1141 {
1142 if (ipmi_sel_clear_sel (host_data->host_poll->sel_ctx) < 0)
1143 {
1144 ipmiseld_err_output (host_data, "ipmi_sel_clear_sel: %s",
1145 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1146 goto cleanup;
1147 }
1148 host_data->clear_sel_done = 1;
1149 }
1150
1151 if (!host_data->last_host_state.initialized)
1152 {
1153 if ((ret = ipmiseld_data_cache_load (host_data)) < 0)
1154 {
1155 if (host_data->prog_data->args->verbose_count)
1156 ipmiseld_syslog_host (host_data, "Failed to load cached previous state, some SEL entries maybe missed");
1157 }
1158
1159 if (ret <= 0)
1160 {
1161 if (ipmiseld_host_state_init (host_data) < 0)
1162 goto cleanup;
1163
1164 if (host_data->prog_data->args->foreground
1165 && host_data->prog_data->args->common_args.debug)
1166 _dump_host_state (host_data,
1167 &(host_data->last_host_state),
1168 "Initial State");
1169
1170 goto out;
1171 }
1172 else
1173 {
1174 if (host_data->prog_data->args->foreground
1175 && host_data->prog_data->args->common_args.debug)
1176 _dump_host_state (host_data,
1177 &(host_data->last_host_state),
1178 "Loaded State");
1179 }
1180 }
1181
1182 if (ipmiseld_sel_info_get (host_data, &(host_data->now_host_state.sel_info)) < 0)
1183 goto cleanup;
1184
1185 if (host_data->prog_data->args->foreground
1186 && host_data->prog_data->args->common_args.debug)
1187 {
1188 _dump_host_state (host_data, &(host_data->last_host_state), "Last State");
1189 _dump_sel_info (host_data, &host_data->now_host_state.sel_info, "Current State");
1190 }
1191
1192 if ((do_clear_flag = ipmiseld_check_thresholds (host_data)) < 0)
1193 goto cleanup;
1194
1195 if ((log_entries_flag = ipmiseld_check_sel_info (host_data, &record_id_start)) < 0)
1196 goto cleanup;
1197
1198 if (do_clear_flag)
1199 {
1200 if ((reserve_flag = ipmiseld_sel_reserve (host_data)) < 0)
1201 goto cleanup;
1202 }
1203
1204 if (log_entries_flag)
1205 {
1206 if (ipmiseld_sel_log_entries (host_data, record_id_start) < 0)
1207 goto cleanup;
1208 }
1209
1210 if (do_clear_flag)
1211 {
1212 ipmiseld_sel_info_t tmp_sel_info;
1213
1214 if ((ret = ipmi_sel_clear_sel (host_data->host_poll->sel_ctx)) < 0)
1215 {
1216 if (reserve_flag
1217 && ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_RESERVATION_CANCELED)
1218 retry_flag++;
1219 else
1220 {
1221 ipmiseld_err_output (host_data, "ipmi_sel_clear_sel: %s",
1222 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1223 goto save_state_out;
1224 }
1225 }
1226
1227 ipmiseld_syslog_host (host_data, "SEL cleared");
1228
1229 if (ipmiseld_sel_info_get (host_data, &tmp_sel_info) < 0)
1230 goto save_state_out;
1231
1232 memcpy (&(host_data->now_host_state.sel_info), &tmp_sel_info, sizeof (ipmiseld_sel_info_t));
1233 host_data->now_host_state.last_record_id.record_id = 0;
1234 }
1235
1236 save_state_out:
1237
1238 host_data->now_host_state.initialized = 1;
1239
1240 if (ipmiseld_save_state (host_data) < 0)
1241 goto cleanup;
1242
1243 out:
1244
1245 if (retry_flag)
1246 rv = 1;
1247 else
1248 rv = 0;
1249
1250 cleanup:
1251 return (rv);
1252 }
1253
1254 static int
ipmiseld_sel_parse(ipmiseld_host_data_t * host_data)1255 ipmiseld_sel_parse (ipmiseld_host_data_t *host_data)
1256 {
1257 unsigned int retry_count = 0;
1258 int rv;
1259
1260 assert (host_data);
1261
1262 if (host_data->prog_data->args->test_run)
1263 return (ipmiseld_sel_parse_test_run (host_data));
1264
1265 while (retry_count < IPMISELD_RETRY_ATTEMPT_MAX)
1266 {
1267 if ((rv = ipmiseld_sel_parse_log (host_data)) < 0)
1268 break;
1269
1270 if (!rv)
1271 break;
1272
1273 retry_count++;
1274 }
1275
1276 return (rv);
1277 }
1278
1279 static int
_ipmiseld_poll(void * arg)1280 _ipmiseld_poll (void *arg)
1281 {
1282 ipmiseld_host_data_t *host_data;
1283 ipmiseld_host_poll_t host_poll;
1284 unsigned int sel_flags = 0;
1285 unsigned int interpret_flags = 0;
1286 int exit_code = EXIT_FAILURE;
1287
1288 assert (arg);
1289
1290 host_data = (ipmiseld_host_data_t *)arg;
1291
1292 assert (!host_data->host_poll);
1293
1294 if (host_data->prog_data->args->foreground
1295 && host_data->prog_data->args->common_args.debug)
1296 IPMISELD_DEBUG (("Poll %s", host_data->hostname ? host_data->hostname : "localhost"));
1297
1298 memset (&host_poll, '\0', sizeof (ipmiseld_host_poll_t));
1299 host_data->host_poll = &host_poll;
1300
1301 if (ipmiseld_ipmi_setup (host_data) < 0)
1302 goto cleanup;
1303
1304 if (!host_data->prog_data->args->ignore_sdr)
1305 {
1306 if (ipmiseld_sdr_cache_create_and_load (host_data) < 0)
1307 goto cleanup;
1308 }
1309 else
1310 host_data->host_poll->sdr_ctx = NULL;
1311
1312 if (!(host_data->host_poll->sel_ctx = ipmi_sel_ctx_create (host_data->host_poll->ipmi_ctx, host_data->host_poll->sdr_ctx)))
1313 {
1314 ipmiseld_err_output (host_data, "ipmi_sel_ctx_create: %s", strerror (errno));
1315 goto cleanup;
1316 }
1317
1318 if (host_data->prog_data->args->foreground
1319 && host_data->prog_data->args->common_args.debug > 1)
1320 sel_flags |= IPMI_SEL_FLAGS_DEBUG_DUMP;
1321
1322 if (host_data->prog_data->args->common_args.section_specific_workaround_flags & IPMI_PARSE_SECTION_SPECIFIC_WORKAROUND_FLAGS_ASSUME_SYSTEM_EVENT)
1323 sel_flags |= IPMI_SEL_FLAGS_ASSUME_SYTEM_EVENT_RECORDS;
1324
1325 if (sel_flags)
1326 {
1327 /* Don't error out, if this fails we can still continue */
1328 if (ipmi_sel_ctx_set_flags (host_data->host_poll->sel_ctx, sel_flags) < 0)
1329 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_flags: %s",
1330 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1331 }
1332
1333 if (host_data->prog_data->args->foreground
1334 && host_data->prog_data->args->common_args.debug > 1
1335 && host_data->hostname)
1336 {
1337 if (ipmi_sel_ctx_set_debug_prefix (host_data->host_poll->sel_ctx, host_data->hostname) < 0)
1338 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_debug_prefix: %s",
1339 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1340 }
1341
1342 if (!(host_data->host_poll->interpret_ctx = ipmi_interpret_ctx_create ()))
1343 {
1344 ipmiseld_err_output (host_data, "ipmi_interpret_ctx_create: %s", strerror (errno));
1345 goto cleanup;
1346 }
1347
1348 if (ipmi_interpret_load_sel_config (host_data->host_poll->interpret_ctx,
1349 host_data->prog_data->args->event_state_config_file) < 0)
1350 {
1351 /* if default file is missing its ok */
1352 if (!(!host_data->prog_data->args->event_state_config_file
1353 && ipmi_interpret_ctx_errnum (host_data->host_poll->interpret_ctx) == IPMI_INTERPRET_ERR_SEL_CONFIG_FILE_DOES_NOT_EXIST))
1354 {
1355 ipmiseld_err_output (host_data, "ipmi_interpret_load_sel_config: %s", ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1356 goto cleanup;
1357 }
1358 }
1359
1360 if (host_data->prog_data->args->interpret_oem_data)
1361 interpret_flags |= IPMI_INTERPRET_FLAGS_INTERPRET_OEM_DATA;
1362
1363 if (interpret_flags)
1364 {
1365 if (ipmi_interpret_ctx_set_flags (host_data->host_poll->interpret_ctx, interpret_flags) < 0)
1366 {
1367 ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_flags: %s",
1368 ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1369 goto cleanup;
1370 }
1371 }
1372
1373 if (ipmi_sel_ctx_set_parameter (host_data->host_poll->sel_ctx,
1374 IPMI_SEL_PARAMETER_INTERPRET_CONTEXT,
1375 &(host_data->host_poll->interpret_ctx)) < 0)
1376 {
1377 err_output("ipmi_sel_ctx_set_interpret: %s",
1378 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1379 goto cleanup;
1380 }
1381
1382 if (ipmi_sel_ctx_set_separator (host_data->host_poll->sel_ctx, EVENT_OUTPUT_SEPARATOR) < 0)
1383 {
1384 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_separator: %s",
1385 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1386 return (-1);
1387 }
1388
1389 if (host_data->prog_data->args->interpret_oem_data
1390 || host_data->prog_data->args->output_oem_event_strings)
1391 {
1392 if (ipmi_get_oem_data (NULL,
1393 host_data->host_poll->ipmi_ctx,
1394 &host_data->host_poll->oem_data) < 0)
1395 return (-1);
1396
1397 if (ipmi_sel_ctx_set_manufacturer_id (host_data->host_poll->sel_ctx,
1398 host_data->host_poll->oem_data.manufacturer_id) < 0)
1399 {
1400 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_manufacturer_id: %s",
1401 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1402 return (-1);
1403 }
1404
1405 if (ipmi_sel_ctx_set_product_id (host_data->host_poll->sel_ctx,
1406 host_data->host_poll->oem_data.product_id) < 0)
1407 {
1408 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_product_id: %s",
1409 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1410 return (-1);
1411 }
1412
1413 if (ipmi_sel_ctx_set_ipmi_version (host_data->host_poll->sel_ctx,
1414 host_data->host_poll->oem_data.ipmi_version_major,
1415 host_data->host_poll->oem_data.ipmi_version_minor) < 0)
1416 {
1417 ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_ipmi_version: %s",
1418 ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1419 return (-1);
1420 }
1421
1422 if (host_data->prog_data->args->interpret_oem_data)
1423 {
1424 if (ipmi_interpret_ctx_set_manufacturer_id (host_data->host_poll->interpret_ctx,
1425 host_data->host_poll->oem_data.manufacturer_id) < 0)
1426 {
1427 ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_manufacturer_id: %s",
1428 ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1429 return (-1);
1430 }
1431
1432 if (ipmi_interpret_ctx_set_product_id (host_data->host_poll->interpret_ctx,
1433 host_data->host_poll->oem_data.product_id) < 0)
1434 {
1435 ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_product_id: %s",
1436 ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1437 return (-1);
1438 }
1439 }
1440 }
1441
1442 if (ipmiseld_sel_parse (host_data) < 0)
1443 goto cleanup;
1444
1445 exit_code = EXIT_SUCCESS;
1446 cleanup:
1447 ipmi_interpret_ctx_destroy (host_data->host_poll->interpret_ctx);
1448 ipmi_sel_ctx_destroy (host_data->host_poll->sel_ctx);
1449 ipmi_sdr_ctx_destroy (host_data->host_poll->sdr_ctx);
1450 ipmi_ctx_close (host_data->host_poll->ipmi_ctx);
1451 ipmi_ctx_destroy (host_data->host_poll->ipmi_ctx);
1452 host_data->host_poll = NULL;
1453 return (exit_code);
1454 }
1455
1456 static int
_ipmiseld_poll_postprocess(void * arg)1457 _ipmiseld_poll_postprocess (void *arg)
1458 {
1459 ipmiseld_host_data_t *host_data;
1460 struct timeval tv;
1461 int rv = -1;
1462
1463 assert (arg);
1464
1465 host_data = (ipmiseld_host_data_t *)arg;
1466
1467 assert (!host_data->host_poll);
1468
1469 gettimeofday (&tv, NULL);
1470 host_data->next_poll_time = tv.tv_sec + host_data->prog_data->args->poll_interval;
1471
1472 pthread_mutex_lock (&host_data_heap_lock);
1473
1474 if (!heap_insert (host_data_heap, host_data))
1475 {
1476 pthread_mutex_unlock (&host_data_heap_lock);
1477 ipmiseld_err_output (host_data, "heap_insert: %s", strerror (errno));
1478 goto cleanup;
1479 }
1480
1481 pthread_mutex_unlock (&host_data_heap_lock);
1482 rv = 0;
1483 cleanup:
1484 return (rv);
1485 }
1486
1487 static void
_signal_handler_callback(int sig)1488 _signal_handler_callback (int sig)
1489 {
1490 exit_flag = 0;
1491 }
1492
1493 static void
_free_host_data(void * x)1494 _free_host_data (void *x)
1495 {
1496 ipmiseld_host_data_t *host_data;;
1497
1498 assert (x);
1499
1500 host_data = (ipmiseld_host_data_t *)x;
1501 free (host_data->hostname);
1502 free (host_data);
1503 }
1504
1505 static ipmiseld_host_data_t *
_alloc_host_data(ipmiseld_prog_data_t * prog_data,const char * hostname)1506 _alloc_host_data (ipmiseld_prog_data_t *prog_data, const char *hostname)
1507 {
1508 ipmiseld_host_data_t *host_data;
1509
1510 assert (prog_data);
1511
1512 if (!(host_data = (ipmiseld_host_data_t *) malloc (sizeof (ipmiseld_host_data_t))))
1513 {
1514 err_output ("malloc: %s", strerror (errno));
1515 return (NULL);
1516 }
1517
1518 memset (host_data, '\0', sizeof (ipmiseld_host_data_t));
1519 host_data->prog_data = prog_data;
1520 if (hostname)
1521 {
1522 if (!(host_data->hostname = strdup (hostname)))
1523 {
1524 err_output ("strdup: %s", strerror (errno));
1525 free (host_data);
1526 return (NULL);
1527 }
1528 }
1529 else
1530 host_data->hostname = NULL;
1531 host_data->host_poll = NULL;
1532 host_data->re_download_sdr_done = 0;
1533 host_data->clear_sel_done = 0;
1534 host_data->next_poll_time = 0; /* 0 will first immediate check first time through */
1535 host_data->last_ipmi_errnum = 0;
1536 host_data->last_ipmi_errnum_count = 0;
1537
1538 return (host_data);
1539 }
1540
1541 static int
hostdata_timecmp(void * x,void * y)1542 hostdata_timecmp (void *x, void *y)
1543 {
1544 ipmiseld_host_data_t *hd1, *hd2;
1545
1546 assert (x);
1547 assert (y);
1548
1549 hd1 = (ipmiseld_host_data_t *)x;
1550 hd2 = (ipmiseld_host_data_t *)y;
1551
1552 if (hd1->next_poll_time < hd2->next_poll_time)
1553 return (1);
1554 else if (hd1->next_poll_time > hd2->next_poll_time)
1555 return (-1);
1556 return (0);
1557 }
1558
1559 static int
_ipmiseld(ipmiseld_prog_data_t * prog_data)1560 _ipmiseld (ipmiseld_prog_data_t *prog_data)
1561 {
1562 int hosts_count = 0;
1563 fi_hostlist_t hlist = NULL;
1564 fi_hostlist_iterator_t hitr = NULL;
1565 ipmiseld_host_data_t *host_data;
1566 char *host = NULL;
1567 int rv = -1;
1568 int ret;
1569
1570 assert (prog_data);
1571 assert (!host_data_heap);
1572
1573 if (prog_data->args->common_args.hostname)
1574 {
1575 if ((hosts_count = pstdout_hostnames_count (prog_data->args->common_args.hostname)) < 0)
1576 {
1577 err_output ("pstdout_hostnames_count: %s", pstdout_strerror (pstdout_errnum));
1578 goto cleanup;
1579 }
1580
1581 if (!hosts_count)
1582 {
1583 err_output ("invalid number of hosts specified");
1584 goto cleanup;
1585 }
1586 }
1587 else /* inband communication, hosts_count = 1 */
1588 hosts_count = 1;
1589
1590 /* don't need more threads than hosts */
1591 if (hosts_count < prog_data->args->threadpool_count)
1592 prog_data->args->threadpool_count = hosts_count;
1593
1594 if (!(host_data_heap = heap_create (hosts_count,
1595 (HeapCmpF)hostdata_timecmp,
1596 (HeapDelF)_free_host_data)))
1597 {
1598 err_output ("heap_create: %s", strerror (errno));
1599 goto cleanup;
1600 }
1601
1602 if ((ret = pthread_mutex_init (&host_data_heap_lock, NULL)))
1603 {
1604 err_output ("pthread_mutex_init: %s", strerror (ret));
1605 goto cleanup;
1606 }
1607
1608 if (hosts_count == 1)
1609 {
1610 if (!(host_data = _alloc_host_data (prog_data, prog_data->args->common_args.hostname)))
1611 goto cleanup;
1612
1613 if (!heap_insert (host_data_heap, host_data))
1614 {
1615 err_output ("heap_insert: %s", strerror (errno));
1616 goto cleanup;
1617 }
1618 }
1619 else
1620 {
1621 if (!(hlist = fi_hostlist_create (prog_data->args->common_args.hostname)))
1622 {
1623 err_output ("fi_hostlist_create: %s", strerror (errno));
1624 goto cleanup;
1625 }
1626
1627 if (!(hitr = fi_hostlist_iterator_create (hlist)))
1628 {
1629 err_output ("fi_hostlist_iterator_create: %s", strerror (errno));
1630 goto cleanup;
1631 }
1632
1633 while ((host = fi_hostlist_next (hitr)))
1634 {
1635 if (!(host_data = _alloc_host_data (prog_data, host)))
1636 goto cleanup;
1637
1638 if (!heap_insert (host_data_heap, host_data))
1639 {
1640 err_output ("heap_insert: %s", strerror (errno));
1641 goto cleanup;
1642 }
1643
1644 free(host);
1645 }
1646 host = NULL;
1647 }
1648
1649 if (ipmiseld_threadpool_init (prog_data,
1650 _ipmiseld_poll,
1651 _ipmiseld_poll_postprocess) < 0)
1652 goto cleanup;
1653
1654 if (prog_data->args->test_run)
1655 {
1656 while (!heap_is_empty (host_data_heap))
1657 {
1658 if (!(host_data = heap_pop (host_data_heap)))
1659 {
1660 err_output ("heap_pop: %s", strerror (errno));
1661 goto cleanup;
1662 }
1663
1664 _ipmiseld_poll (host_data);
1665 }
1666 }
1667 else
1668 {
1669 while (exit_flag)
1670 {
1671 pthread_mutex_lock (&host_data_heap_lock);
1672
1673 host_data = heap_pop (host_data_heap);
1674
1675 pthread_mutex_unlock (&host_data_heap_lock);
1676
1677 /* empty heap, small chance of this happening, but
1678 * everything is processing and previous sleeps didn't sleep
1679 * long enough. So we just need to wait until something
1680 * else finishes.
1681 *
1682 * There's no way to know the exact right amount of time, so
1683 * we're going to make an estimate. What we'll do is
1684 * estimate 1/5th the time of a IPMI session timeout. So in
1685 * the event the previous poll fully timed out, we will
1686 * interrupt and go through this loop only 5 times.
1687 */
1688 if (!host_data)
1689 {
1690 unsigned int waittime;
1691
1692 if (prog_data->args->common_args.session_timeout)
1693 waittime = prog_data->args->common_args.session_timeout;
1694 else
1695 waittime = IPMI_SESSION_TIMEOUT_DEFAULT;
1696
1697 /* session timeout is in milliseconds */
1698 waittime /= 1000;
1699
1700 /* now take a 5th of it */
1701 waittime /= 5;
1702
1703 if (!waittime)
1704 waittime = 1;
1705
1706 daemon_sleep (waittime);
1707 continue;
1708 }
1709
1710 if (ipmiseld_threadpool_queue (host_data) < 0)
1711 {
1712 pthread_mutex_lock (&host_data_heap_lock);
1713
1714 if (!heap_insert (host_data_heap, host_data))
1715 ipmiseld_err_output (host_data, "heap_insert: %s", strerror (errno));
1716
1717 pthread_mutex_unlock (&host_data_heap_lock);
1718 }
1719
1720 pthread_mutex_lock (&host_data_heap_lock);
1721
1722 host_data = heap_peek (host_data_heap);
1723
1724 pthread_mutex_unlock (&host_data_heap_lock);
1725
1726 /* empty heap, everything must be processing, so we'll sleep
1727 * for the poll interval, b/c no one should be scheduled
1728 * until after this time has passed anyways.
1729 */
1730 if (!host_data)
1731 daemon_sleep (prog_data->args->poll_interval + 1);
1732 else
1733 {
1734 struct timeval tv;
1735
1736 gettimeofday (&tv, NULL);
1737
1738 /* If next_poll_time == 0, no sleep, its the first time through */
1739 if (host_data->next_poll_time
1740 && (host_data->next_poll_time > tv.tv_sec))
1741 daemon_sleep ((host_data->next_poll_time - tv.tv_sec) + 1);
1742 }
1743 }
1744 }
1745
1746 rv = 0;
1747 cleanup:
1748 ipmiseld_threadpool_destroy ();
1749 heap_destroy (host_data_heap);
1750 fi_hostlist_iterator_destroy (hitr);
1751 fi_hostlist_destroy (hlist);
1752 free (host);
1753 return (rv);
1754 }
1755
1756 int
main(int argc,char ** argv)1757 main (int argc, char **argv)
1758 {
1759 ipmiseld_prog_data_t prog_data;
1760 struct ipmiseld_arguments cmd_args;
1761
1762 err_init (argv[0]);
1763 err_set_flags (ERROR_STDERR);
1764
1765 ipmi_disable_coredump ();
1766
1767 prog_data.progname = argv[0];
1768 ipmiseld_argp_parse (argc, argv, &cmd_args);
1769 prog_data.args = &cmd_args;
1770
1771 if (prog_data.args->event_state_filter_str)
1772 prog_data.event_state_filter_mask = ipmiseld_event_state_filter_parse (prog_data.args->event_state_filter_str);
1773 else
1774 prog_data.event_state_filter_mask = 0;
1775
1776 if (prog_data.args->log_facility_str)
1777 prog_data.log_facility = ipmiseld_log_facility_parse (prog_data.args->log_facility_str);
1778 else
1779 prog_data.log_facility = LOG_DAEMON;
1780
1781 if (prog_data.args->log_priority_str)
1782 prog_data.log_priority = ipmiseld_log_priority_parse (prog_data.args->log_priority_str);
1783 else
1784 prog_data.log_priority = LOG_ERR;
1785
1786 if (!cmd_args.test_run)
1787 {
1788 if (!cmd_args.foreground)
1789 {
1790 daemonize_common (IPMISELD_PIDFILE);
1791 err_set_flags (ERROR_SYSLOG);
1792 }
1793 else
1794 err_set_flags (ERROR_STDERR);
1795
1796 daemon_signal_handler_setup (_signal_handler_callback);
1797
1798 /* Call after daemonization, since daemonization closes currently
1799 * open fds
1800 */
1801 if (argv[0][0] == '/')
1802 argv[0] = strrchr(argv[0], '/') + 1;
1803 openlog (argv[0], LOG_ODELAY | LOG_PID, prog_data.log_facility);
1804 }
1805
1806 return (_ipmiseld (&prog_data));
1807 }
1808