1 /*****************************************************************************\
2  *  $Id: ipmiseld.c,v 1.17 2010-02-08 22:02:30 chu11 Exp $
3  *****************************************************************************
4  *  Copyright (C) 2012-2015 Lawrence Livermore National Security, LLC.
5  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
6  *  Written by Albert Chu <chu11@llnl.gov>
7  *  LLNL-CODE-559172
8  *
9  *  This file is part of Ipmiseld, an IPMI SEL syslog logging daemon.
10  *  For details, see http://www.llnl.gov/linux/.
11  *
12  *  Ipmiseld is free software; you can redistribute it and/or modify
13  *  it under the terms of the GNU General Public License as published by the
14  *  Free Software Foundation; either version 3 of the License, or (at your
15  *  option) any later version.
16  *
17  *  Ipmiseld is distributed in the hope that it will be useful, but
18  *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
19  *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
20  *  for more details.
21  *
22  *  You should have received a copy of the GNU General Public License along
23  *  with Ipmiseld.  If not, see <http://www.gnu.org/licenses/>.
24 \*****************************************************************************/
25 
26 #ifdef HAVE_CONFIG_H
27 #include "config.h"
28 #endif /* HAVE_CONFIG_H */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <stdint.h>
33 #if STDC_HEADERS
34 #include <string.h>
35 #endif /* STDC_HEADERS */
36 #if TIME_WITH_SYS_TIME
37 #include <sys/time.h>
38 #include <time.h>
39 #else /* !TIME_WITH_SYS_TIME */
40 #if HAVE_SYS_TIME_H
41 #include <sys/time.h>
42 #else /* !HAVE_SYS_TIME_H */
43 #include <time.h>
44 #endif /* !HAVE_SYS_TIME_H */
45 #endif /* !TIME_WITH_SYS_TIME */
46 #include <syslog.h>
47 #include <pthread.h>
48 #include <assert.h>
49 #include <errno.h>
50 
51 #include <freeipmi/freeipmi.h>
52 
53 #include "ipmiseld.h"
54 #include "ipmiseld-argp.h"
55 #include "ipmiseld-cache.h"
56 #include "ipmiseld-common.h"
57 #include "ipmiseld-debug.h"
58 #include "ipmiseld-ipmi-communication.h"
59 #include "ipmiseld-threadpool.h"
60 
61 #include "freeipmi-portability.h"
62 #include "error.h"
63 #include "fi_hostlist.h"
64 #include "heap.h"
65 #include "pstdout.h"
66 #include "tool-common.h"
67 #include "tool-daemon-common.h"
68 #include "tool-event-common.h"
69 #include "tool-util-common.h"
70 
71 #define IPMISELD_PIDFILE                IPMISELD_LOCALSTATEDIR "/run/ipmiseld.pid"
72 
73 #define IPMISELD_FORMAT_BUFLEN          4096
74 
75 #define IPMISELD_EVENT_OUTPUT_BUFLEN    4096
76 
77 #define IPMISELD_RETRY_ATTEMPT_MAX      3
78 
79 static Heap host_data_heap = NULL;
80 static pthread_mutex_t host_data_heap_lock = PTHREAD_MUTEX_INITIALIZER;
81 
82 static int exit_flag = 1;
83 
84 static int
ipmiseld_sel_info_get(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info)85 ipmiseld_sel_info_get (ipmiseld_host_data_t *host_data, ipmiseld_sel_info_t *sel_info)
86 {
87   fiid_obj_t obj_cmd_rs = NULL;
88   uint64_t val;
89   int rv = -1;
90 
91   assert (host_data);
92   assert (host_data->host_poll);
93   assert (host_data->host_poll->ipmi_ctx);
94   assert (sel_info);
95 
96   if (!(obj_cmd_rs = fiid_obj_create (tmpl_cmd_get_sel_info_rs)))
97     {
98       ipmiseld_err_output (host_data, "fiid_obj_create: %s", strerror (errno));
99       goto cleanup;
100     }
101 
102   if (ipmi_cmd_get_sel_info (host_data->host_poll->ipmi_ctx, obj_cmd_rs) < 0)
103     {
104       ipmiseld_err_output (host_data, "ipmi_cmd_get_sel_info: %s",
105                   ipmi_ctx_errormsg (host_data->host_poll->ipmi_ctx));
106       goto cleanup;
107     }
108 
109   if (FIID_OBJ_GET (obj_cmd_rs, "entries", &val) < 0)
110     {
111       ipmiseld_err_output (host_data, "fiid_obj_get: 'entries': %s",
112                   fiid_obj_errormsg (obj_cmd_rs));
113       goto cleanup;
114     }
115   sel_info->entries = val;
116 
117   if (FIID_OBJ_GET (obj_cmd_rs, "free_space", &val) < 0)
118     {
119       ipmiseld_err_output (host_data, "fiid_obj_get: 'free_space': %s",
120                   fiid_obj_errormsg (obj_cmd_rs));
121       goto cleanup;
122     }
123   sel_info->free_space = val;
124 
125   if (FIID_OBJ_GET (obj_cmd_rs, "most_recent_addition_timestamp", &val) < 0)
126     {
127       ipmiseld_err_output (host_data, "fiid_obj_get: 'most_recent_addition_timestamp': %s",
128                   fiid_obj_errormsg (obj_cmd_rs));
129       goto cleanup;
130     }
131   sel_info->most_recent_addition_timestamp = val;
132 
133   if (FIID_OBJ_GET (obj_cmd_rs, "most_recent_erase_timestamp", &val) < 0)
134     {
135       ipmiseld_err_output (host_data, "fiid_obj_get: 'most_recent_erase_timestamp': %s",
136                   fiid_obj_errormsg (obj_cmd_rs));
137       goto cleanup;
138     }
139   sel_info->most_recent_erase_timestamp = val;
140 
141   if (FIID_OBJ_GET (obj_cmd_rs, "delete_sel_command_supported", &val) < 0)
142     {
143       ipmiseld_err_output (host_data, "fiid_obj_get: 'delete_sel_command_supported': %s",
144                   fiid_obj_errormsg (obj_cmd_rs));
145       goto cleanup;
146     }
147  sel_info->delete_sel_command_supported = val;
148 
149   if (FIID_OBJ_GET (obj_cmd_rs, "reserve_sel_command_supported", &val) < 0)
150     {
151       ipmiseld_err_output (host_data, "fiid_obj_get: 'reserve_sel_command_supported': %s",
152                   fiid_obj_errormsg (obj_cmd_rs));
153       goto cleanup;
154     }
155   sel_info->reserve_sel_command_supported = val;
156 
157   if (FIID_OBJ_GET (obj_cmd_rs, "overflow_flag", &val) < 0)
158     {
159       ipmiseld_err_output (host_data, "fiid_obj_get: 'overflow_flag': %s",
160                   fiid_obj_errormsg (obj_cmd_rs));
161       goto cleanup;
162     }
163   sel_info->overflow_flag = val;
164 
165   rv = 0;
166  cleanup:
167   fiid_obj_destroy (obj_cmd_rs);
168   return (rv);
169 }
170 
171 static int
_sel_last_record_id_callback(ipmi_sel_ctx_t ctx,void * callback_data)172 _sel_last_record_id_callback (ipmi_sel_ctx_t ctx, void *callback_data)
173 {
174   ipmiseld_last_record_id_t *last_record_id;
175 
176   assert (ctx);
177   assert (callback_data);
178 
179   last_record_id = (ipmiseld_last_record_id_t *)callback_data;
180 
181   if (ipmi_sel_parse_read_record_id (ctx,
182                                      NULL,
183                                      0,
184                                      &(last_record_id->record_id)) < 0)
185     {
186       err_output ("ipmi_sel_parse_read_record_id: %s",
187                   ipmi_sel_ctx_errormsg (ctx));
188       return (-1);
189     }
190 
191   last_record_id->loaded = 1;
192   return (0);
193 }
194 
195 static int
ipmiseld_get_last_record_id(ipmiseld_host_data_t * host_data,ipmiseld_last_record_id_t * last_record_id)196 ipmiseld_get_last_record_id (ipmiseld_host_data_t *host_data,
197                              ipmiseld_last_record_id_t *last_record_id)
198 {
199   assert (host_data);
200   assert (host_data->host_poll);
201   assert (host_data->host_poll->sel_ctx);
202   assert (last_record_id);
203 
204   last_record_id->loaded = 0;
205 
206   if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
207                       IPMI_SEL_RECORD_ID_LAST,
208                       IPMI_SEL_RECORD_ID_LAST,
209                       _sel_last_record_id_callback,
210                       last_record_id) < 0)
211     {
212       /* A general IPMI error (busy, timeout, etc.) is ok, it happens */
213       if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) != IPMI_SEL_ERR_IPMI_ERROR)
214         ipmiseld_err_output (host_data, "ipmi_sel_parse: %s", ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
215       return (-1);
216     }
217 
218   return (0);
219 }
220 
221 static unsigned int
ipmiseld_calc_percent_full(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info)222 ipmiseld_calc_percent_full (ipmiseld_host_data_t *host_data,
223                             ipmiseld_sel_info_t *sel_info)
224 {
225   unsigned int used_bytes;
226   unsigned int total_bytes;
227   unsigned int percent;
228 
229   assert (host_data);
230   assert (sel_info);
231 
232   used_bytes = (sel_info->entries * IPMI_SEL_RECORD_MAX_RECORD_LENGTH);
233   total_bytes = used_bytes + sel_info->free_space;
234   percent = (int)(100 * (double)used_bytes/total_bytes);
235 
236   if (percent > 100)
237     {
238       /* Some rounding errors could occur, we accept small ones */
239       if (percent > 105)
240         {
241           if (host_data->prog_data->args->verbose_count)
242             ipmiseld_syslog_host (host_data, "SEL percent calc error: %u", percent);
243         }
244       percent = 100;
245     }
246 
247   return (percent);
248 }
249 
250 static int
ipmiseld_host_state_init(ipmiseld_host_data_t * host_data)251 ipmiseld_host_state_init (ipmiseld_host_data_t *host_data)
252 {
253   unsigned int percent;
254   int rv = -1;
255 
256   assert (host_data);
257 
258   if (ipmiseld_get_last_record_id (host_data, &(host_data->last_host_state.last_record_id)) < 0)
259     goto cleanup;
260 
261   /* possible SEL is empty */
262   if (!host_data->last_host_state.last_record_id.loaded)
263     {
264       host_data->last_host_state.last_record_id.record_id = 0;
265       host_data->last_host_state.last_record_id.loaded = 1;
266     }
267 
268   if (ipmiseld_sel_info_get (host_data, &(host_data->last_host_state.sel_info)) < 0)
269     goto cleanup;
270 
271   percent = ipmiseld_calc_percent_full (host_data, &(host_data->last_host_state.sel_info));
272   host_data->last_host_state.last_percent_full = percent;
273 
274   host_data->last_host_state.initialized = 1;
275   rv = 0;
276  cleanup:
277   return (rv);
278 }
279 
280 /* return (-1), real error */
281 static int
_sel_parse_err_handle(ipmiseld_host_data_t * host_data,char * func)282 _sel_parse_err_handle (ipmiseld_host_data_t *host_data, char *func)
283 {
284   assert (host_data);
285   assert (func);
286 
287   if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_INVALID_SEL_ENTRY)
288     {
289       /* maybe a bad SEL entry returned from remote system, don't error out */
290       if (host_data->prog_data->args->verbose_count)
291         ipmiseld_syslog_host (host_data, "Invalid SEL entry read");
292       return (0);
293     }
294 
295   ipmiseld_err_output (host_data, "%s: %s",
296               func,
297               ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
298 
299   return (-1);
300 }
301 
302 /* returns 0 on success, 1 on success but w/ truncation */
303 static int
_snprintf(char * buf,unsigned int buflen,unsigned int * wlen,const char * fmt,...)304 _snprintf (char *buf,
305            unsigned int buflen,
306            unsigned int *wlen,
307            const char *fmt,
308            ...)
309 {
310   va_list ap;
311   int ret;
312 
313   assert (buf);
314   assert (buflen);
315   assert (wlen);
316   assert (fmt);
317 
318   va_start (ap, fmt);
319   ret = vsnprintf (buf + *wlen, buflen - *wlen, fmt, ap);
320   va_end (ap);
321   if (ret >= (buflen - *wlen))
322     {
323       (*wlen) = buflen;
324       return (1);
325     }
326   (*wlen) += ret;
327   return (0);
328 }
329 
330 static int
_sel_log_format(ipmiseld_host_data_t * host_data,const char * fmt_str,char * fmtbuf,unsigned int fmtbuf_len)331 _sel_log_format (ipmiseld_host_data_t *host_data,
332                  const char *fmt_str,
333                  char *fmtbuf,
334                  unsigned int fmtbuf_len)
335 {
336   unsigned int wlen = 0;
337   int percent_flag = 0;
338   char *ptr;
339 
340   assert (host_data);
341   assert (fmt_str);
342 
343   ptr = (char *)fmt_str;
344   while (*ptr)
345     {
346       if (*ptr == '%')
347         {
348           if (percent_flag)
349             {
350               if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%%"))
351                 return (0);
352               percent_flag = 0;
353             }
354           else
355             percent_flag = 1;
356           goto end_loop;
357         }
358       else if (percent_flag && *ptr == 'h')
359         {
360           if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%s",
361                          host_data->hostname ? host_data->hostname : "localhost"))
362             return (0);
363           percent_flag = 0;
364         }
365       else
366         {
367           if (percent_flag)
368             {
369               if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%%%c", *ptr))
370                 return (0);
371               percent_flag = 0;
372             }
373           else
374             {
375               if (_snprintf (fmtbuf, fmtbuf_len, &wlen, "%c", *ptr))
376                 return (0);
377             }
378         }
379 
380     end_loop:
381       ptr++;
382     }
383 
384   return (0);
385 }
386 
387 static int
_sel_log_output(ipmiseld_host_data_t * host_data,uint8_t record_type)388 _sel_log_output (ipmiseld_host_data_t *host_data, uint8_t record_type)
389 {
390   char fmtbuf[IPMISELD_FORMAT_BUFLEN + 1];
391   char outbuf[IPMISELD_EVENT_OUTPUT_BUFLEN + 1];
392   int outbuf_len;
393   unsigned int flags;
394   int record_type_class;
395   char *format_str;
396   uint16_t record_id;
397 
398   assert (host_data);
399 
400   memset (fmtbuf, '\0', IPMISELD_FORMAT_BUFLEN + 1);
401   memset (outbuf, '\0', IPMISELD_EVENT_OUTPUT_BUFLEN + 1);
402 
403   if (ipmi_sel_parse_read_record_id (host_data->host_poll->sel_ctx,
404                                      NULL,
405                                      0,
406                                      &record_id) < 0)
407     {
408       ipmiseld_err_output (host_data, "ipmi_sel_parse_read_record_id: %s",
409                   ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
410       return (-1);
411     }
412 
413   if (host_data->prog_data->args->foreground
414       && host_data->prog_data->args->common_args.debug)
415     IPMISELD_HOST_DEBUG (("SEL Record parsed: Record ID = %u", record_id));
416 
417   /* achu:
418    *
419    * Algorithmically we can "find" the next entry to log several ways,
420    * but there are two reasonable ways.
421    *
422    * 1) Whatever the last record id is, add 1 to it and iterate until
423    * you reach the next valid SEL record id.
424    *
425    * 2) Read the last record id, and use that to get the next record
426    * id to log.
427    *
428    * While '1' will be faster on most systems, there are a number of
429    * systems were vendors jump semi-big chunks of record ids on new
430    * events (I have no idea why, it makes no sense).  We will
431    * implement '2' as the most reasonable average solution.  So when
432    * we hit this callback with the already logged last record id, we
433    * need to not log it.  '2' is also the safer implementation, in the
434    * event there is a bug in the firmware, and we could loop endlessly
435    * looking for the next entry to log when there is none.
436    */
437   if (host_data->now_host_state.last_record_id.record_id == record_id)
438     return (0);
439 
440   flags = IPMI_SEL_STRING_FLAGS_IGNORE_UNAVAILABLE_FIELD;
441   flags |= IPMI_SEL_STRING_FLAGS_OUTPUT_NOT_AVAILABLE;
442   flags |= IPMI_SEL_STRING_FLAGS_DATE_MONTH_STRING;
443   if (host_data->prog_data->args->verbose_count)
444     flags |= IPMI_SEL_STRING_FLAGS_VERBOSE;
445   if (host_data->prog_data->args->entity_sensor_names)
446     flags |= IPMI_SEL_STRING_FLAGS_ENTITY_SENSOR_NAMES;
447   if (host_data->prog_data->args->non_abbreviated_units)
448     flags |= IPMI_SEL_STRING_FLAGS_NON_ABBREVIATED_UNITS;
449   if (host_data->prog_data->args->interpret_oem_data)
450     flags |= IPMI_SEL_STRING_FLAGS_INTERPRET_OEM_DATA;
451 
452   record_type_class = ipmi_sel_record_type_class (record_type);
453   if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD)
454     {
455       if (host_data->prog_data->args->system_event_format_str)
456         format_str = host_data->prog_data->args->system_event_format_str;
457       else
458         {
459           if (host_data->hostname)
460             format_str = IPMISELD_SYSTEM_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
461           else
462             format_str = IPMISELD_SYSTEM_EVENT_FORMAT_STR_DEFAULT;
463         }
464     }
465   else if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD)
466     {
467       if (host_data->prog_data->args->oem_timestamped_event_format_str)
468         format_str = host_data->prog_data->args->oem_timestamped_event_format_str;
469       else
470         {
471           if (host_data->hostname)
472             format_str = IPMISELD_OEM_TIMESTAMPED_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
473           else
474             format_str = IPMISELD_OEM_TIMESTAMPED_EVENT_FORMAT_STR_DEFAULT;
475         }
476     }
477   else if (record_type_class == IPMI_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD)
478     {
479       if (host_data->prog_data->args->oem_non_timestamped_event_format_str)
480         format_str = host_data->prog_data->args->oem_non_timestamped_event_format_str;
481       else
482         {
483           if (host_data->hostname)
484             format_str = IPMISELD_OEM_NON_TIMESTAMPED_EVENT_FORMAT_OUTOFBAND_STR_DEFAULT;
485           else
486             format_str = IPMISELD_OEM_NON_TIMESTAMPED_EVENT_FORMAT_STR_DEFAULT;
487         }
488     }
489   else
490     {
491       if (host_data->prog_data->args->verbose_count)
492         ipmiseld_syslog_host (host_data,
493                               "SEL Event: Unknown SEL Record Type: %Xh",
494                               record_type);
495       return (0);
496     }
497 
498   if (_sel_log_format (host_data,
499                        format_str,
500                        fmtbuf,
501                        IPMISELD_FORMAT_BUFLEN) < 0)
502     return (-1);
503 
504   if ((outbuf_len = ipmi_sel_parse_read_record_string (host_data->host_poll->sel_ctx,
505                                                        fmtbuf,
506                                                        NULL,
507                                                        0,
508                                                        outbuf,
509                                                        IPMISELD_EVENT_OUTPUT_BUFLEN,
510                                                        flags)) < 0)
511     {
512       if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_string") < 0)
513         return (-1);
514       return (0);
515     }
516 
517   if (outbuf_len)
518     ipmiseld_syslog (host_data, "%s", outbuf);
519 
520   host_data->now_host_state.last_record_id.record_id = record_id;
521 
522   return (0);
523 }
524 
525 static int
_sel_parse_callback(ipmi_sel_ctx_t ctx,void * callback_data)526 _sel_parse_callback (ipmi_sel_ctx_t ctx, void *callback_data)
527 {
528   ipmiseld_host_data_t *host_data;
529   uint8_t record_type;
530   int record_type_class;
531   int rv = -1;
532 
533   assert (ctx);
534   assert (callback_data);
535 
536   host_data = (ipmiseld_host_data_t *)callback_data;
537 
538   if (host_data->prog_data->args->sensor_types_length
539       || host_data->prog_data->args->exclude_sensor_types_length)
540     {
541       uint8_t sensor_type;
542       int flag;
543 
544       if (ipmi_sel_parse_read_sensor_type (host_data->host_poll->sel_ctx,
545                                            NULL,
546                                            0,
547                                            &sensor_type) < 0)
548         {
549           if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
550             goto cleanup;
551           goto out;
552         }
553 
554       if (host_data->prog_data->args->sensor_types_length)
555         {
556           if ((flag = sensor_type_listed (NULL,
557                                           sensor_type,
558                                           host_data->prog_data->args->sensor_types,
559                                           host_data->prog_data->args->sensor_types_length)) < 0)
560             goto cleanup;
561 
562           if (!flag)
563             goto out;
564         }
565 
566       if (host_data->prog_data->args->exclude_sensor_types_length)
567         {
568           if ((flag = sensor_type_listed (NULL,
569                                           sensor_type,
570                                           host_data->prog_data->args->exclude_sensor_types,
571                                           host_data->prog_data->args->exclude_sensor_types_length)) < 0)
572             goto cleanup;
573 
574           if (flag)
575             goto out;
576         }
577     }
578 
579   if (ipmi_sel_parse_read_record_type (host_data->host_poll->sel_ctx,
580                                        NULL,
581                                        0,
582                                        &record_type) < 0)
583     {
584       if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
585         goto cleanup;
586       goto out;
587     }
588 
589   /* IPMI Workaround
590    *
591    * HP DL 380 G5
592    * Intel S2600JF/Appro 512X
593    *
594    * Motherboard is reporting invalid SEL Records types (0x00 on HP DL
595    * 380 G5, 0x03 on Intel S2600JF/Appro 512X)
596    */
597   if (host_data->prog_data->args->common_args.section_specific_workaround_flags & IPMI_PARSE_SECTION_SPECIFIC_WORKAROUND_FLAGS_ASSUME_SYSTEM_EVENT
598       && (!IPMI_SEL_RECORD_TYPE_VALID (record_type)))
599     record_type = IPMI_SEL_RECORD_TYPE_SYSTEM_EVENT_RECORD;
600 
601   record_type_class = ipmi_sel_record_type_class (record_type);
602 
603   if (host_data->prog_data->args->system_event_only
604       && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_SYSTEM_EVENT_RECORD)
605     goto out;
606 
607   if (host_data->prog_data->args->oem_event_only
608       && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_TIMESTAMPED_OEM_RECORD
609       && record_type_class != IPMI_SEL_RECORD_TYPE_CLASS_NON_TIMESTAMPED_OEM_RECORD)
610     goto out;
611 
612   if (host_data->prog_data->event_state_filter_mask)
613     {
614       char sel_record[IPMI_SEL_RECORD_MAX_RECORD_LENGTH];
615       int sel_record_len;
616       unsigned int event_state = 0;
617 
618       if ((sel_record_len = ipmi_sel_parse_read_record (host_data->host_poll->sel_ctx,
619                                                         sel_record,
620                                                         IPMI_SEL_RECORD_MAX_RECORD_LENGTH)) < 0)
621         {
622           if (_sel_parse_err_handle (host_data, "ipmi_sel_parse_read_record_type") < 0)
623             goto cleanup;
624           goto out;
625         }
626 
627       if (ipmi_interpret_sel (host_data->host_poll->interpret_ctx,
628                               sel_record,
629                               sel_record_len,
630                               &event_state) < 0)
631         {
632           ipmiseld_err_output (host_data, "ipmi_interpret_sel: %s",
633                       ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
634           goto cleanup;
635         }
636 
637       if ((host_data->prog_data->event_state_filter_mask & IPMISELD_NOMINAL_FILTER)
638           && event_state == IPMI_INTERPRET_STATE_NOMINAL)
639         goto out;
640 
641       if ((host_data->prog_data->event_state_filter_mask & IPMISELD_WARNING_FILTER)
642           && event_state == IPMI_INTERPRET_STATE_WARNING)
643         goto out;
644 
645       if ((host_data->prog_data->event_state_filter_mask & IPMISELD_CRITICAL_FILTER)
646           && event_state == IPMI_INTERPRET_STATE_CRITICAL)
647         goto out;
648 
649       if ((host_data->prog_data->event_state_filter_mask & IPMISELD_NA_FILTER)
650           && event_state == IPMI_INTERPRET_STATE_UNKNOWN)
651         goto out;
652     }
653 
654   if (_sel_log_output (host_data, record_type) < 0)
655     goto cleanup;
656 
657  out:
658   rv = 0;
659  cleanup:
660   return (rv);
661 }
662 
663 static int
ipmiseld_sel_parse_test_run(ipmiseld_host_data_t * host_data)664 ipmiseld_sel_parse_test_run (ipmiseld_host_data_t *host_data)
665 {
666   assert (host_data);
667   assert (host_data->host_poll);
668   assert (host_data->host_poll->sel_ctx);
669 
670   if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
671                       IPMI_SEL_RECORD_ID_FIRST,
672                       IPMI_SEL_RECORD_ID_LAST,
673                       _sel_parse_callback,
674                       host_data) < 0)
675     {
676       /* A general IPMI error (busy, timeout, etc.) is ok, it happens */
677       if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) != IPMI_SEL_ERR_IPMI_ERROR)
678         ipmiseld_err_output (host_data, "ipmi_sel_parse: %s",
679                     ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
680       return (-1);
681     }
682 
683   return (0);
684 }
685 
686 static void
_dump_sel_info(ipmiseld_host_data_t * host_data,ipmiseld_sel_info_t * sel_info,const char * prefix)687 _dump_sel_info (ipmiseld_host_data_t *host_data,
688                 ipmiseld_sel_info_t *sel_info,
689                 const char *prefix)
690 {
691   assert (host_data);
692   assert (host_data->prog_data->args->foreground);
693   assert (host_data->prog_data->args->common_args.debug);
694   assert (sel_info);
695   assert (prefix);
696 
697   IPMISELD_HOST_DEBUG (("%s: Entries = %u", prefix, sel_info->entries));
698   IPMISELD_HOST_DEBUG (("%s: Free Space = %u", prefix, sel_info->free_space));
699   IPMISELD_HOST_DEBUG (("%s: Most Recent Addition Timestamp = %u", prefix, sel_info->most_recent_addition_timestamp));
700   IPMISELD_HOST_DEBUG (("%s: Most Recent Erase Timestamp = %u", prefix, sel_info->most_recent_erase_timestamp));
701   IPMISELD_HOST_DEBUG (("%s: Delete Sel Command Supported = %u", prefix, sel_info->delete_sel_command_supported));
702   IPMISELD_HOST_DEBUG (("%s: Reserve Sel Command Supported = %u", prefix, sel_info->reserve_sel_command_supported));
703   IPMISELD_HOST_DEBUG (("%s: Overflow Flag = %u", prefix, sel_info->overflow_flag));
704 }
705 
706 static void
_dump_host_state(ipmiseld_host_data_t * host_data,ipmiseld_host_state_t * host_state,const char * prefix)707 _dump_host_state (ipmiseld_host_data_t *host_data,
708                   ipmiseld_host_state_t *host_state,
709                   const char *prefix)
710 {
711   assert (host_data);
712   assert (host_data->prog_data->args->foreground);
713   assert (host_data->prog_data->args->common_args.debug);
714   assert (host_state);
715   assert (prefix);
716 
717   IPMISELD_HOST_DEBUG (("%s: Last Record ID = %u", prefix, host_state->last_record_id.record_id));
718   IPMISELD_HOST_DEBUG (("%s: Last Percent Full = %u", prefix, host_state->last_percent_full));
719   _dump_sel_info (host_data, &(host_state->sel_info), prefix);
720 }
721 
722 /* returns 1 to log events, 0 if not, -1 on error */
723 static int
ipmiseld_check_sel_info(ipmiseld_host_data_t * host_data,uint16_t * record_id_start)724 ipmiseld_check_sel_info (ipmiseld_host_data_t *host_data, uint16_t *record_id_start)
725 {
726   int log_entries_flag = 0;
727   int rv = -1;
728 
729   assert (host_data);
730   assert (record_id_start);
731 
732   if (host_data->now_host_state.sel_info.most_recent_addition_timestamp < host_data->last_host_state.sel_info.most_recent_addition_timestamp
733       || host_data->now_host_state.sel_info.most_recent_erase_timestamp < host_data->last_host_state.sel_info.most_recent_erase_timestamp)
734     {
735       /* This shouldn't be possible under normal circumstances, but
736        * could occur if the user changes the SEL timestamp or clock.
737        * Or perhaps a vendor firmware update or similar action
738        * modified the clock.
739        *
740        * Under this circumstance, we will treat the timestamps has
741        * having changed (note that all checks below are for "not equal
742        * to" and not "greater than" or "less than".  We just log to
743        * note this.
744        */
745       if (host_data->prog_data->args->verbose_count)
746         ipmiseld_syslog_host (host_data, "SEL timestamps modified to earlier time");
747     }
748 
749   if (host_data->now_host_state.sel_info.entries == host_data->last_host_state.sel_info.entries)
750     {
751       /* Small chance entry count is the same after a
752        * out-of-daemon clear.  Need to do some checks to handle
753        * for this
754        */
755 
756       /* Timestamps unchanged - this is the most common/normal case, no new log entries to log. */
757       if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp
758           && host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
759         {
760           /* nothing to do except this single copy/save */
761           host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
762         }
763       /* If erase timestamp changed but addition timestamp has
764        * not.  An out-of-daemon delete/clear occurred, but
765        * there are no new entries to log.
766        */
767       else if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp
768                && host_data->now_host_state.sel_info.most_recent_erase_timestamp != host_data->last_host_state.sel_info.most_recent_erase_timestamp)
769         {
770           if (host_data->now_host_state.sel_info.delete_sel_command_supported)
771             {
772               /* We don't know if the erase was for some old entries or if it was a clear.
773                * We will look at the last_record_id to take a guess
774                */
775               ipmiseld_last_record_id_t last_record_id;
776 
777               if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
778                 goto cleanup;
779 
780               /* If new last_record_id has changed or there are no
781                * records, we assume the erase was a clear
782                */
783               if (!last_record_id.loaded
784                   || last_record_id.record_id != host_data->last_host_state.last_record_id.record_id)
785                 host_data->now_host_state.last_record_id.record_id = 0;
786               else
787                 host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
788             }
789           else
790             {
791               /* If delete not supported, the erase must have been a clear.
792                * Reset last_record_id to zero.
793                */
794               host_data->now_host_state.last_record_id.record_id = 0;
795             }
796         }
797       /* An erase and addition occured, must determine the type of action that occurred */
798       else if (host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp
799                && host_data->now_host_state.sel_info.most_recent_erase_timestamp != host_data->last_host_state.sel_info.most_recent_erase_timestamp)
800         {
801           if (host_data->now_host_state.sel_info.delete_sel_command_supported)
802             {
803               /* We don't know if the erase was for some old entries or if it was a clear.
804                * We will look at the last_record_id to take a guess
805                */
806               ipmiseld_last_record_id_t last_record_id;
807 
808               if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
809                 goto cleanup;
810 
811               /* If new last_record_id is greater, we assume it's some additional entries
812                * and the erase was only deleting some old entries.
813                */
814               if (last_record_id.loaded
815                   && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
816                 {
817                   host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
818                   if (host_data->last_host_state.last_record_id.record_id)
819                     (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
820                   else
821                     (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
822                   log_entries_flag++;
823                 }
824               else
825                 {
826                   /* We assume a clear occurred so start from the beginning */
827                   host_data->now_host_state.last_record_id.record_id = 0;
828                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
829                   log_entries_flag++;
830                 }
831             }
832           else
833             {
834               /* If delete not supported, the erase must have been a clear
835                * So log all the new entries if some are available and
836                * reset last_record_id to zero.
837                */
838               host_data->now_host_state.last_record_id.record_id = 0;
839               if (host_data->now_host_state.sel_info.entries)
840                 {
841                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
842                   log_entries_flag++;
843                 }
844             }
845         }
846       else /* host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp
847               && host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp */
848         {
849           /* This shouldn't be possible and is likely a bug in the
850            * IPMI firmware (user erased entries but timestamp didn't
851            * update, SEL added entries and updated timestamp but
852            * didn't update entry count, etc.) we'll only save off the
853            * host state for later.
854            */
855           if (host_data->prog_data->args->verbose_count)
856             ipmiseld_syslog_host (host_data, "SEL illegal timestamp situation");
857           host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
858         }
859     }
860   else if (host_data->now_host_state.sel_info.entries > host_data->last_host_state.sel_info.entries)
861     {
862       ipmiseld_last_record_id_t last_record_id;
863 
864       if (host_data->now_host_state.sel_info.most_recent_addition_timestamp == host_data->last_host_state.sel_info.most_recent_addition_timestamp)
865         {
866           /* This shouldn't be possible and is likely a bug in the
867            * IPMI firmware.  Log this, but for rest of this chunk of
868            * code, we assume the addition timestamp must have changed.
869            */
870           if (host_data->prog_data->args->verbose_count)
871             ipmiseld_syslog_host (host_data, "SEL timestamp error, more entries without addition");
872         }
873 
874       if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
875         goto cleanup;
876 
877       /* There is a small race chance that the last time we got sel
878        * info, a new SEL event occurred after it, but before the call
879        * to ipmi_sel_parse().  So we check what the last record id to
880        * see if that happened.  If the last record id is the same,
881        * then we already logged it.  So no new logging needs to
882        * happen.
883        */
884       if (last_record_id.loaded
885           && host_data->last_host_state.last_record_id.record_id == last_record_id.record_id)
886         {
887           /* nothing to do except this single copy/save */
888           host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
889         }
890       else if (host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
891         {
892           /* This is the most normal case we should expect, there
893            * are more entries in the SEL than last time we checked
894            * and must log them.
895            */
896           host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
897           if (host_data->last_host_state.last_record_id.record_id)
898             (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
899           else
900             (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
901           log_entries_flag++;
902         }
903       else
904         {
905           if (host_data->now_host_state.sel_info.delete_sel_command_supported)
906             {
907               /* If new last_record_id is greater, we assume it's some additional entries
908                * and the erase was only deleting some old entries.
909                */
910               if (last_record_id.loaded
911                   && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
912                 {
913                   host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
914                   if (host_data->last_host_state.last_record_id.record_id)
915                     (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
916                   else
917                     (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
918                   log_entries_flag++;
919                 }
920               else
921                 {
922                   /* We assume a clear occurred so start from the beginning */
923                   host_data->now_host_state.last_record_id.record_id = 0;
924                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
925                   log_entries_flag++;
926                 }
927             }
928           else
929             {
930               /* If delete not supported, the erase must have been a clear
931                * So log all the new entries if some are available and
932                * reset last_record_id to zero.
933                */
934               host_data->now_host_state.last_record_id.record_id = 0;
935               if (host_data->now_host_state.sel_info.entries)
936                 {
937                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
938                   log_entries_flag++;
939                 }
940             }
941         }
942     }
943   else /* host_data->now_host_state.sel_info.entries < host_data->host_state.sel_info.entries) */
944     {
945       if (host_data->now_host_state.sel_info.most_recent_erase_timestamp == host_data->last_host_state.sel_info.most_recent_erase_timestamp)
946         {
947           /* This shouldn't be possible and is likely a bug in the
948            * IPMI firmware.  Log this, but for rest of this chunk of
949            * code, we assume the erase timestamp must have changed.
950            */
951           if (host_data->prog_data->args->verbose_count)
952             ipmiseld_syslog_host (host_data, "SEL timestamp error, fewer entries without erase");
953         }
954 
955       /* if no additional entries, nothing to log */
956       if (host_data->now_host_state.sel_info.most_recent_addition_timestamp != host_data->last_host_state.sel_info.most_recent_addition_timestamp)
957         {
958           if (host_data->now_host_state.sel_info.delete_sel_command_supported)
959             {
960               /* We don't know if the erase was for some old entries or if it was a clear.
961                * We will look at the last_record_id to take a guess
962                */
963               ipmiseld_last_record_id_t last_record_id;
964 
965               if (ipmiseld_get_last_record_id (host_data, &last_record_id) < 0)
966                 goto cleanup;
967 
968               /* If new last_record_id is greater, we assume it's some additional entries
969                * and the erase was only deleting some old entries.
970                */
971               if (last_record_id.loaded
972                   && last_record_id.record_id > host_data->last_host_state.last_record_id.record_id)
973                 {
974                   host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
975                   if (host_data->last_host_state.last_record_id.record_id)
976                     (*record_id_start) = host_data->last_host_state.last_record_id.record_id;
977                   else
978                     (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
979                   log_entries_flag++;
980                 }
981               else
982                 {
983                   /* We assume a clear occurred so start from the beginning */
984                   host_data->now_host_state.last_record_id.record_id = 0;
985                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
986                   log_entries_flag++;
987                 }
988             }
989           else
990             {
991               /* If delete not supported, the erase must have been a clear
992                * So log all the new entries if some are available and
993                * reset last_record_id to zero.
994                */
995               host_data->now_host_state.last_record_id.record_id = 0;
996               if (host_data->now_host_state.sel_info.entries)
997                 {
998                   (*record_id_start) = IPMI_SEL_RECORD_ID_FIRST;
999                   log_entries_flag++;
1000                 }
1001             }
1002         }
1003       else
1004         {
1005           if (!host_data->now_host_state.sel_info.entries)
1006             host_data->now_host_state.last_record_id.record_id = 0;
1007           else
1008             host_data->now_host_state.last_record_id.record_id = host_data->last_host_state.last_record_id.record_id;
1009         }
1010     }
1011 
1012   if (log_entries_flag)
1013     rv = 1;
1014   else
1015     rv = 0;
1016 
1017  cleanup:
1018   return (rv);
1019 }
1020 
1021 /* returns 1 if clear should occur, 0 if not, -1 on error */
1022 static int
ipmiseld_check_thresholds(ipmiseld_host_data_t * host_data)1023 ipmiseld_check_thresholds (ipmiseld_host_data_t *host_data)
1024 {
1025   int do_clear_flag = 0;
1026   unsigned int percent;
1027   int rv = -1;
1028 
1029   assert (host_data);
1030 
1031   percent = ipmiseld_calc_percent_full (host_data, &(host_data->now_host_state.sel_info));
1032 
1033   if (host_data->prog_data->args->warning_threshold)
1034     {
1035       if (percent > host_data->prog_data->args->warning_threshold)
1036         {
1037           if (percent > host_data->last_host_state.last_percent_full)
1038             ipmiseld_syslog_host (host_data, "SEL is %d%% full", percent);
1039         }
1040     }
1041 
1042   if (!host_data->last_host_state.sel_info.overflow_flag
1043       && host_data->now_host_state.sel_info.overflow_flag)
1044     ipmiseld_syslog_host (host_data, "SEL Overflow, events have been dropped due to lack of space in the SEL");
1045 
1046   if (host_data->prog_data->args->clear_threshold)
1047     {
1048       if (percent > host_data->prog_data->args->clear_threshold)
1049         do_clear_flag = 1;
1050     }
1051 
1052   host_data->now_host_state.last_percent_full = percent;
1053 
1054   if (do_clear_flag)
1055     rv = 1;
1056   else
1057     rv = 0;
1058 
1059   return (rv);
1060 }
1061 
1062 /* returns 1 if reserve successful, 0 if not, -1 on error */
1063 static int
ipmiseld_sel_reserve(ipmiseld_host_data_t * host_data)1064 ipmiseld_sel_reserve (ipmiseld_host_data_t *host_data)
1065 {
1066   assert (host_data);
1067   assert (host_data->host_poll);
1068   assert (host_data->host_poll->sel_ctx);
1069 
1070   if (host_data->now_host_state.sel_info.reserve_sel_command_supported)
1071     {
1072       if (ipmi_sel_ctx_register_reservation_id (host_data->host_poll->sel_ctx, NULL) < 0)
1073         {
1074           /* If an IPMI error, we assume just can't do reservation, no biggie */
1075           if (ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_IPMI_ERROR)
1076             return (0);
1077 
1078           ipmiseld_err_output (host_data, "ipmi_sel_ctx_register_reservation_id: %s",
1079                       ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1080           return (-1);
1081         }
1082 
1083       return (1);
1084     }
1085 
1086   return (0);
1087 }
1088 
1089 static int
ipmiseld_sel_log_entries(ipmiseld_host_data_t * host_data,uint16_t record_id_start)1090 ipmiseld_sel_log_entries (ipmiseld_host_data_t *host_data,
1091                           uint16_t record_id_start)
1092 {
1093   assert (host_data);
1094   assert (host_data->host_poll);
1095   assert (host_data->host_poll->sel_ctx);
1096 
1097   if (ipmi_sel_parse (host_data->host_poll->sel_ctx,
1098                       record_id_start,
1099                       IPMI_SEL_RECORD_ID_LAST,
1100                       _sel_parse_callback,
1101                       host_data) < 0)
1102     {
1103       ipmiseld_err_output (host_data, "ipmi_sel_parse: %s", ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1104       return (-1);
1105     }
1106 
1107   return (0);
1108 }
1109 
1110 static int
ipmiseld_save_state(ipmiseld_host_data_t * host_data)1111 ipmiseld_save_state (ipmiseld_host_data_t *host_data)
1112 {
1113   assert (host_data);
1114 
1115   memcpy (&(host_data->last_host_state),
1116           &(host_data->now_host_state),
1117           sizeof (ipmiseld_host_state_t));
1118 
1119   /* ignore error, continue on even if it fails */
1120   ipmiseld_data_cache_store (host_data);
1121 
1122   return (0);
1123 }
1124 
1125 /* return 1 - retry immediately, return 0 general success, -1 error */
1126 static int
ipmiseld_sel_parse_log(ipmiseld_host_data_t * host_data)1127 ipmiseld_sel_parse_log (ipmiseld_host_data_t *host_data)
1128 {
1129   uint16_t record_id_start = 0;
1130   int log_entries_flag = 0;
1131   int do_clear_flag = 0;
1132   int reserve_flag = 0;
1133   int retry_flag = 0;
1134   int rv = -1;
1135   int ret;
1136 
1137   assert (host_data);
1138 
1139   if (host_data->prog_data->args->clear_sel
1140       && !host_data->clear_sel_done)
1141     {
1142       if (ipmi_sel_clear_sel (host_data->host_poll->sel_ctx) < 0)
1143         {
1144           ipmiseld_err_output (host_data, "ipmi_sel_clear_sel: %s",
1145                       ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1146           goto cleanup;
1147         }
1148       host_data->clear_sel_done = 1;
1149     }
1150 
1151   if (!host_data->last_host_state.initialized)
1152     {
1153       if ((ret = ipmiseld_data_cache_load (host_data)) < 0)
1154         {
1155           if (host_data->prog_data->args->verbose_count)
1156             ipmiseld_syslog_host (host_data, "Failed to load cached previous state, some SEL entries maybe missed");
1157         }
1158 
1159       if (ret <= 0)
1160         {
1161           if (ipmiseld_host_state_init (host_data) < 0)
1162             goto cleanup;
1163 
1164           if (host_data->prog_data->args->foreground
1165               && host_data->prog_data->args->common_args.debug)
1166             _dump_host_state (host_data,
1167                               &(host_data->last_host_state),
1168                               "Initial State");
1169 
1170           goto out;
1171         }
1172       else
1173         {
1174           if (host_data->prog_data->args->foreground
1175               && host_data->prog_data->args->common_args.debug)
1176             _dump_host_state (host_data,
1177                               &(host_data->last_host_state),
1178                               "Loaded State");
1179         }
1180     }
1181 
1182   if (ipmiseld_sel_info_get (host_data, &(host_data->now_host_state.sel_info)) < 0)
1183     goto cleanup;
1184 
1185   if (host_data->prog_data->args->foreground
1186       && host_data->prog_data->args->common_args.debug)
1187     {
1188       _dump_host_state (host_data, &(host_data->last_host_state), "Last State");
1189       _dump_sel_info (host_data, &host_data->now_host_state.sel_info, "Current State");
1190     }
1191 
1192   if ((do_clear_flag = ipmiseld_check_thresholds (host_data)) < 0)
1193     goto cleanup;
1194 
1195   if ((log_entries_flag = ipmiseld_check_sel_info (host_data, &record_id_start)) < 0)
1196     goto cleanup;
1197 
1198   if (do_clear_flag)
1199     {
1200       if ((reserve_flag = ipmiseld_sel_reserve (host_data)) < 0)
1201         goto cleanup;
1202     }
1203 
1204   if (log_entries_flag)
1205     {
1206       if (ipmiseld_sel_log_entries (host_data, record_id_start) < 0)
1207         goto cleanup;
1208     }
1209 
1210   if (do_clear_flag)
1211     {
1212       ipmiseld_sel_info_t tmp_sel_info;
1213 
1214       if ((ret = ipmi_sel_clear_sel (host_data->host_poll->sel_ctx)) < 0)
1215         {
1216           if (reserve_flag
1217               && ipmi_sel_ctx_errnum (host_data->host_poll->sel_ctx) == IPMI_SEL_ERR_RESERVATION_CANCELED)
1218             retry_flag++;
1219           else
1220             {
1221               ipmiseld_err_output (host_data, "ipmi_sel_clear_sel: %s",
1222                           ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1223               goto save_state_out;
1224             }
1225         }
1226 
1227       ipmiseld_syslog_host (host_data, "SEL cleared");
1228 
1229       if (ipmiseld_sel_info_get (host_data, &tmp_sel_info) < 0)
1230         goto save_state_out;
1231 
1232       memcpy (&(host_data->now_host_state.sel_info), &tmp_sel_info, sizeof (ipmiseld_sel_info_t));
1233       host_data->now_host_state.last_record_id.record_id = 0;
1234     }
1235 
1236  save_state_out:
1237 
1238   host_data->now_host_state.initialized = 1;
1239 
1240   if (ipmiseld_save_state (host_data) < 0)
1241     goto cleanup;
1242 
1243  out:
1244 
1245   if (retry_flag)
1246     rv = 1;
1247   else
1248     rv = 0;
1249 
1250  cleanup:
1251   return (rv);
1252 }
1253 
1254 static int
ipmiseld_sel_parse(ipmiseld_host_data_t * host_data)1255 ipmiseld_sel_parse (ipmiseld_host_data_t *host_data)
1256 {
1257   unsigned int retry_count = 0;
1258   int rv;
1259 
1260   assert (host_data);
1261 
1262   if (host_data->prog_data->args->test_run)
1263     return (ipmiseld_sel_parse_test_run (host_data));
1264 
1265   while (retry_count < IPMISELD_RETRY_ATTEMPT_MAX)
1266     {
1267       if ((rv = ipmiseld_sel_parse_log (host_data)) < 0)
1268         break;
1269 
1270       if (!rv)
1271         break;
1272 
1273       retry_count++;
1274     }
1275 
1276   return (rv);
1277 }
1278 
1279 static int
_ipmiseld_poll(void * arg)1280 _ipmiseld_poll (void *arg)
1281 {
1282   ipmiseld_host_data_t *host_data;
1283   ipmiseld_host_poll_t host_poll;
1284   unsigned int sel_flags = 0;
1285   unsigned int interpret_flags = 0;
1286   int exit_code = EXIT_FAILURE;
1287 
1288   assert (arg);
1289 
1290   host_data = (ipmiseld_host_data_t *)arg;
1291 
1292   assert (!host_data->host_poll);
1293 
1294   if (host_data->prog_data->args->foreground
1295       && host_data->prog_data->args->common_args.debug)
1296     IPMISELD_DEBUG (("Poll %s", host_data->hostname ? host_data->hostname : "localhost"));
1297 
1298   memset (&host_poll, '\0', sizeof (ipmiseld_host_poll_t));
1299   host_data->host_poll = &host_poll;
1300 
1301   if (ipmiseld_ipmi_setup (host_data) < 0)
1302     goto cleanup;
1303 
1304   if (!host_data->prog_data->args->ignore_sdr)
1305     {
1306       if (ipmiseld_sdr_cache_create_and_load (host_data) < 0)
1307         goto cleanup;
1308     }
1309   else
1310     host_data->host_poll->sdr_ctx = NULL;
1311 
1312   if (!(host_data->host_poll->sel_ctx = ipmi_sel_ctx_create (host_data->host_poll->ipmi_ctx, host_data->host_poll->sdr_ctx)))
1313     {
1314       ipmiseld_err_output (host_data, "ipmi_sel_ctx_create: %s", strerror (errno));
1315       goto cleanup;
1316     }
1317 
1318   if (host_data->prog_data->args->foreground
1319       && host_data->prog_data->args->common_args.debug > 1)
1320     sel_flags |= IPMI_SEL_FLAGS_DEBUG_DUMP;
1321 
1322   if (host_data->prog_data->args->common_args.section_specific_workaround_flags & IPMI_PARSE_SECTION_SPECIFIC_WORKAROUND_FLAGS_ASSUME_SYSTEM_EVENT)
1323     sel_flags |= IPMI_SEL_FLAGS_ASSUME_SYTEM_EVENT_RECORDS;
1324 
1325   if (sel_flags)
1326     {
1327       /* Don't error out, if this fails we can still continue */
1328       if (ipmi_sel_ctx_set_flags (host_data->host_poll->sel_ctx, sel_flags) < 0)
1329         ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_flags: %s",
1330                     ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1331     }
1332 
1333   if (host_data->prog_data->args->foreground
1334       && host_data->prog_data->args->common_args.debug > 1
1335       && host_data->hostname)
1336     {
1337       if (ipmi_sel_ctx_set_debug_prefix (host_data->host_poll->sel_ctx, host_data->hostname) < 0)
1338         ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_debug_prefix: %s",
1339                     ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1340     }
1341 
1342   if (!(host_data->host_poll->interpret_ctx = ipmi_interpret_ctx_create ()))
1343     {
1344       ipmiseld_err_output (host_data, "ipmi_interpret_ctx_create: %s", strerror (errno));
1345       goto cleanup;
1346     }
1347 
1348   if (ipmi_interpret_load_sel_config (host_data->host_poll->interpret_ctx,
1349                                       host_data->prog_data->args->event_state_config_file) < 0)
1350     {
1351       /* if default file is missing its ok */
1352       if (!(!host_data->prog_data->args->event_state_config_file
1353             && ipmi_interpret_ctx_errnum (host_data->host_poll->interpret_ctx) == IPMI_INTERPRET_ERR_SEL_CONFIG_FILE_DOES_NOT_EXIST))
1354         {
1355           ipmiseld_err_output (host_data, "ipmi_interpret_load_sel_config: %s", ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1356           goto cleanup;
1357         }
1358     }
1359 
1360   if (host_data->prog_data->args->interpret_oem_data)
1361     interpret_flags |= IPMI_INTERPRET_FLAGS_INTERPRET_OEM_DATA;
1362 
1363   if (interpret_flags)
1364     {
1365       if (ipmi_interpret_ctx_set_flags (host_data->host_poll->interpret_ctx, interpret_flags) < 0)
1366         {
1367           ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_flags: %s",
1368                       ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1369           goto cleanup;
1370         }
1371     }
1372 
1373   if (ipmi_sel_ctx_set_parameter (host_data->host_poll->sel_ctx,
1374                                   IPMI_SEL_PARAMETER_INTERPRET_CONTEXT,
1375                                   &(host_data->host_poll->interpret_ctx)) < 0)
1376     {
1377       err_output("ipmi_sel_ctx_set_interpret: %s",
1378                  ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1379       goto cleanup;
1380     }
1381 
1382   if (ipmi_sel_ctx_set_separator (host_data->host_poll->sel_ctx, EVENT_OUTPUT_SEPARATOR) < 0)
1383     {
1384       ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_separator: %s",
1385                   ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1386       return (-1);
1387     }
1388 
1389   if (host_data->prog_data->args->interpret_oem_data
1390       || host_data->prog_data->args->output_oem_event_strings)
1391     {
1392       if (ipmi_get_oem_data (NULL,
1393                              host_data->host_poll->ipmi_ctx,
1394                              &host_data->host_poll->oem_data) < 0)
1395         return (-1);
1396 
1397       if (ipmi_sel_ctx_set_manufacturer_id (host_data->host_poll->sel_ctx,
1398                                             host_data->host_poll->oem_data.manufacturer_id) < 0)
1399         {
1400           ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_manufacturer_id: %s",
1401                       ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1402           return (-1);
1403         }
1404 
1405       if (ipmi_sel_ctx_set_product_id (host_data->host_poll->sel_ctx,
1406                                        host_data->host_poll->oem_data.product_id) < 0)
1407         {
1408           ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_product_id: %s",
1409                       ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1410           return (-1);
1411         }
1412 
1413       if (ipmi_sel_ctx_set_ipmi_version (host_data->host_poll->sel_ctx,
1414                                          host_data->host_poll->oem_data.ipmi_version_major,
1415                                          host_data->host_poll->oem_data.ipmi_version_minor) < 0)
1416         {
1417           ipmiseld_err_output (host_data, "ipmi_sel_ctx_set_ipmi_version: %s",
1418                       ipmi_sel_ctx_errormsg (host_data->host_poll->sel_ctx));
1419           return (-1);
1420         }
1421 
1422       if (host_data->prog_data->args->interpret_oem_data)
1423         {
1424           if (ipmi_interpret_ctx_set_manufacturer_id (host_data->host_poll->interpret_ctx,
1425                                                       host_data->host_poll->oem_data.manufacturer_id) < 0)
1426             {
1427               ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_manufacturer_id: %s",
1428                           ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1429               return (-1);
1430             }
1431 
1432           if (ipmi_interpret_ctx_set_product_id (host_data->host_poll->interpret_ctx,
1433                                                  host_data->host_poll->oem_data.product_id) < 0)
1434             {
1435               ipmiseld_err_output (host_data, "ipmi_interpret_ctx_set_product_id: %s",
1436                           ipmi_interpret_ctx_errormsg (host_data->host_poll->interpret_ctx));
1437               return (-1);
1438             }
1439         }
1440     }
1441 
1442   if (ipmiseld_sel_parse (host_data) < 0)
1443     goto cleanup;
1444 
1445   exit_code = EXIT_SUCCESS;
1446  cleanup:
1447   ipmi_interpret_ctx_destroy (host_data->host_poll->interpret_ctx);
1448   ipmi_sel_ctx_destroy (host_data->host_poll->sel_ctx);
1449   ipmi_sdr_ctx_destroy (host_data->host_poll->sdr_ctx);
1450   ipmi_ctx_close (host_data->host_poll->ipmi_ctx);
1451   ipmi_ctx_destroy (host_data->host_poll->ipmi_ctx);
1452   host_data->host_poll = NULL;
1453   return (exit_code);
1454 }
1455 
1456 static int
_ipmiseld_poll_postprocess(void * arg)1457 _ipmiseld_poll_postprocess (void *arg)
1458 {
1459   ipmiseld_host_data_t *host_data;
1460   struct timeval tv;
1461   int rv = -1;
1462 
1463   assert (arg);
1464 
1465   host_data = (ipmiseld_host_data_t *)arg;
1466 
1467   assert (!host_data->host_poll);
1468 
1469   gettimeofday (&tv, NULL);
1470   host_data->next_poll_time = tv.tv_sec + host_data->prog_data->args->poll_interval;
1471 
1472   pthread_mutex_lock (&host_data_heap_lock);
1473 
1474   if (!heap_insert (host_data_heap, host_data))
1475     {
1476       pthread_mutex_unlock (&host_data_heap_lock);
1477       ipmiseld_err_output (host_data, "heap_insert: %s", strerror (errno));
1478       goto cleanup;
1479     }
1480 
1481   pthread_mutex_unlock (&host_data_heap_lock);
1482   rv = 0;
1483  cleanup:
1484   return (rv);
1485 }
1486 
1487 static void
_signal_handler_callback(int sig)1488 _signal_handler_callback (int sig)
1489 {
1490   exit_flag = 0;
1491 }
1492 
1493 static void
_free_host_data(void * x)1494 _free_host_data (void *x)
1495 {
1496   ipmiseld_host_data_t *host_data;;
1497 
1498   assert (x);
1499 
1500   host_data = (ipmiseld_host_data_t *)x;
1501   free (host_data->hostname);
1502   free (host_data);
1503 }
1504 
1505 static ipmiseld_host_data_t *
_alloc_host_data(ipmiseld_prog_data_t * prog_data,const char * hostname)1506 _alloc_host_data (ipmiseld_prog_data_t *prog_data, const char *hostname)
1507 {
1508   ipmiseld_host_data_t *host_data;
1509 
1510   assert (prog_data);
1511 
1512   if (!(host_data = (ipmiseld_host_data_t *) malloc (sizeof (ipmiseld_host_data_t))))
1513     {
1514       err_output ("malloc: %s", strerror (errno));
1515       return (NULL);
1516     }
1517 
1518   memset (host_data, '\0', sizeof (ipmiseld_host_data_t));
1519   host_data->prog_data = prog_data;
1520   if (hostname)
1521     {
1522       if (!(host_data->hostname = strdup (hostname)))
1523         {
1524           err_output ("strdup: %s", strerror (errno));
1525           free (host_data);
1526           return (NULL);
1527         }
1528     }
1529   else
1530     host_data->hostname = NULL;
1531   host_data->host_poll = NULL;
1532   host_data->re_download_sdr_done = 0;
1533   host_data->clear_sel_done = 0;
1534   host_data->next_poll_time = 0; /* 0 will first immediate check first time through */
1535   host_data->last_ipmi_errnum = 0;
1536   host_data->last_ipmi_errnum_count = 0;
1537 
1538   return (host_data);
1539 }
1540 
1541 static int
hostdata_timecmp(void * x,void * y)1542 hostdata_timecmp (void *x, void *y)
1543 {
1544   ipmiseld_host_data_t *hd1, *hd2;
1545 
1546   assert (x);
1547   assert (y);
1548 
1549   hd1 = (ipmiseld_host_data_t *)x;
1550   hd2 = (ipmiseld_host_data_t *)y;
1551 
1552   if (hd1->next_poll_time < hd2->next_poll_time)
1553     return (1);
1554   else if (hd1->next_poll_time > hd2->next_poll_time)
1555     return (-1);
1556   return (0);
1557 }
1558 
1559 static int
_ipmiseld(ipmiseld_prog_data_t * prog_data)1560 _ipmiseld (ipmiseld_prog_data_t *prog_data)
1561 {
1562   int hosts_count = 0;
1563   fi_hostlist_t hlist = NULL;
1564   fi_hostlist_iterator_t hitr = NULL;
1565   ipmiseld_host_data_t *host_data;
1566   char *host = NULL;
1567   int rv = -1;
1568   int ret;
1569 
1570   assert (prog_data);
1571   assert (!host_data_heap);
1572 
1573   if (prog_data->args->common_args.hostname)
1574     {
1575       if ((hosts_count = pstdout_hostnames_count (prog_data->args->common_args.hostname)) < 0)
1576         {
1577           err_output ("pstdout_hostnames_count: %s", pstdout_strerror (pstdout_errnum));
1578           goto cleanup;
1579         }
1580 
1581       if (!hosts_count)
1582         {
1583           err_output ("invalid number of hosts specified");
1584           goto cleanup;
1585         }
1586     }
1587   else /* inband communication, hosts_count = 1 */
1588     hosts_count = 1;
1589 
1590   /* don't need more threads than hosts */
1591   if (hosts_count < prog_data->args->threadpool_count)
1592     prog_data->args->threadpool_count = hosts_count;
1593 
1594   if (!(host_data_heap = heap_create (hosts_count,
1595                                       (HeapCmpF)hostdata_timecmp,
1596                                       (HeapDelF)_free_host_data)))
1597     {
1598       err_output ("heap_create: %s", strerror (errno));
1599       goto cleanup;
1600     }
1601 
1602   if ((ret = pthread_mutex_init (&host_data_heap_lock, NULL)))
1603     {
1604       err_output ("pthread_mutex_init: %s", strerror (ret));
1605       goto cleanup;
1606     }
1607 
1608   if (hosts_count == 1)
1609     {
1610       if (!(host_data = _alloc_host_data (prog_data, prog_data->args->common_args.hostname)))
1611         goto cleanup;
1612 
1613       if (!heap_insert (host_data_heap, host_data))
1614         {
1615           err_output ("heap_insert: %s", strerror (errno));
1616           goto cleanup;
1617         }
1618     }
1619   else
1620     {
1621       if (!(hlist = fi_hostlist_create (prog_data->args->common_args.hostname)))
1622         {
1623           err_output ("fi_hostlist_create: %s", strerror (errno));
1624           goto cleanup;
1625         }
1626 
1627       if (!(hitr = fi_hostlist_iterator_create (hlist)))
1628         {
1629           err_output ("fi_hostlist_iterator_create: %s", strerror (errno));
1630           goto cleanup;
1631         }
1632 
1633       while ((host = fi_hostlist_next (hitr)))
1634         {
1635           if (!(host_data = _alloc_host_data (prog_data, host)))
1636             goto cleanup;
1637 
1638           if (!heap_insert (host_data_heap, host_data))
1639             {
1640               err_output ("heap_insert: %s", strerror (errno));
1641               goto cleanup;
1642             }
1643 
1644           free(host);
1645         }
1646       host = NULL;
1647     }
1648 
1649   if (ipmiseld_threadpool_init (prog_data,
1650                                 _ipmiseld_poll,
1651                                 _ipmiseld_poll_postprocess) < 0)
1652     goto cleanup;
1653 
1654   if (prog_data->args->test_run)
1655     {
1656       while (!heap_is_empty (host_data_heap))
1657         {
1658           if (!(host_data = heap_pop (host_data_heap)))
1659             {
1660               err_output ("heap_pop: %s", strerror (errno));
1661               goto cleanup;
1662             }
1663 
1664           _ipmiseld_poll (host_data);
1665         }
1666     }
1667   else
1668     {
1669       while (exit_flag)
1670         {
1671           pthread_mutex_lock (&host_data_heap_lock);
1672 
1673           host_data = heap_pop (host_data_heap);
1674 
1675           pthread_mutex_unlock (&host_data_heap_lock);
1676 
1677           /* empty heap, small chance of this happening, but
1678            * everything is processing and previous sleeps didn't sleep
1679            * long enough.  So we just need to wait until something
1680            * else finishes.
1681            *
1682            * There's no way to know the exact right amount of time, so
1683            * we're going to make an estimate.  What we'll do is
1684            * estimate 1/5th the time of a IPMI session timeout.  So in
1685            * the event the previous poll fully timed out, we will
1686            * interrupt and go through this loop only 5 times.
1687            */
1688           if (!host_data)
1689             {
1690               unsigned int waittime;
1691 
1692               if (prog_data->args->common_args.session_timeout)
1693                 waittime = prog_data->args->common_args.session_timeout;
1694               else
1695                 waittime = IPMI_SESSION_TIMEOUT_DEFAULT;
1696 
1697               /* session timeout is in milliseconds */
1698               waittime /= 1000;
1699 
1700               /* now take a 5th of it  */
1701               waittime /= 5;
1702 
1703               if (!waittime)
1704                 waittime = 1;
1705 
1706               daemon_sleep (waittime);
1707               continue;
1708             }
1709 
1710           if (ipmiseld_threadpool_queue (host_data) < 0)
1711             {
1712               pthread_mutex_lock (&host_data_heap_lock);
1713 
1714               if (!heap_insert (host_data_heap, host_data))
1715                 ipmiseld_err_output (host_data, "heap_insert: %s", strerror (errno));
1716 
1717               pthread_mutex_unlock (&host_data_heap_lock);
1718             }
1719 
1720           pthread_mutex_lock (&host_data_heap_lock);
1721 
1722           host_data = heap_peek (host_data_heap);
1723 
1724           pthread_mutex_unlock (&host_data_heap_lock);
1725 
1726           /* empty heap, everything must be processing, so we'll sleep
1727            * for the poll interval, b/c no one should be scheduled
1728            * until after this time has passed anyways.
1729            */
1730           if (!host_data)
1731             daemon_sleep (prog_data->args->poll_interval + 1);
1732           else
1733             {
1734               struct timeval tv;
1735 
1736               gettimeofday (&tv, NULL);
1737 
1738               /* If next_poll_time == 0, no sleep, its the first time through */
1739               if (host_data->next_poll_time
1740                   && (host_data->next_poll_time > tv.tv_sec))
1741                 daemon_sleep ((host_data->next_poll_time - tv.tv_sec) + 1);
1742             }
1743         }
1744     }
1745 
1746   rv = 0;
1747  cleanup:
1748   ipmiseld_threadpool_destroy ();
1749   heap_destroy (host_data_heap);
1750   fi_hostlist_iterator_destroy (hitr);
1751   fi_hostlist_destroy (hlist);
1752   free (host);
1753   return (rv);
1754 }
1755 
1756 int
main(int argc,char ** argv)1757 main (int argc, char **argv)
1758 {
1759   ipmiseld_prog_data_t prog_data;
1760   struct ipmiseld_arguments cmd_args;
1761 
1762   err_init (argv[0]);
1763   err_set_flags (ERROR_STDERR);
1764 
1765   ipmi_disable_coredump ();
1766 
1767   prog_data.progname = argv[0];
1768   ipmiseld_argp_parse (argc, argv, &cmd_args);
1769   prog_data.args = &cmd_args;
1770 
1771   if (prog_data.args->event_state_filter_str)
1772     prog_data.event_state_filter_mask = ipmiseld_event_state_filter_parse (prog_data.args->event_state_filter_str);
1773   else
1774     prog_data.event_state_filter_mask = 0;
1775 
1776   if (prog_data.args->log_facility_str)
1777     prog_data.log_facility = ipmiseld_log_facility_parse (prog_data.args->log_facility_str);
1778   else
1779     prog_data.log_facility = LOG_DAEMON;
1780 
1781   if (prog_data.args->log_priority_str)
1782     prog_data.log_priority = ipmiseld_log_priority_parse (prog_data.args->log_priority_str);
1783   else
1784     prog_data.log_priority = LOG_ERR;
1785 
1786   if (!cmd_args.test_run)
1787     {
1788       if (!cmd_args.foreground)
1789         {
1790           daemonize_common (IPMISELD_PIDFILE);
1791           err_set_flags (ERROR_SYSLOG);
1792         }
1793       else
1794         err_set_flags (ERROR_STDERR);
1795 
1796       daemon_signal_handler_setup (_signal_handler_callback);
1797 
1798       /* Call after daemonization, since daemonization closes currently
1799        * open fds
1800        */
1801       if (argv[0][0] == '/')
1802         argv[0] = strrchr(argv[0], '/') + 1;
1803       openlog (argv[0], LOG_ODELAY | LOG_PID, prog_data.log_facility);
1804     }
1805 
1806   return (_ipmiseld (&prog_data));
1807 }
1808