1 /*
2 ** portmon.c -- Main procedure for the portmon daemon
3 ** Copyright (C) 2002 Nik Reiman <nik@aboleo.net>
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA
18 */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <signal.h>
26 #include <ctype.h>
27 #include <time.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 
31 #include "portmon.h"
32 #include "config.h"
33 
34 #ifdef HAVE_GETOPT_H
35 #include <getopt.h>
36 #endif
37 
38 int report_flag;
39 int daemonize_flag;
40 
41 struct host_struct *hosts;
42 
43 char *err_msg;
44 char logfile[STRLARGE];
45 int timeout;
46 int verbose;
47 
48 
49 
version(void)50 void version(void)
51 {
52  printf("This is %s, version %s\n", PACKAGE, VERSION);
53  printf("By Nik Reiman, nik@aboleo.net\n");
54 }
55 
portmon_exit(int signal)56 void portmon_exit(int signal)
57 {
58  snprintf(err_msg, STRLARGE, "Exiting on signal %d\n", signal);
59  log_write(err_msg);
60  exit(0);
61 }
62 
log_write(char * msg)63 int log_write(char *msg)
64 {
65  FILE *log_fp;
66 
67  if((log_fp = fopen(logfile, "a")) == NULL) {
68   perror("fopen");
69   return (1);
70  }
71  fprintf(log_fp, "(%s) - %s", get_time(time(NULL)), err_msg);
72  fclose(log_fp);
73  return (0);
74 }
75 
76 // return the current time in pretty form
get_time(time_t cur_time)77 char *get_time(time_t cur_time)
78 {
79  char *fmt_time = (char *)malloc(STRSMALL * sizeof(char));
80 
81  strncpy(fmt_time, ctime(&cur_time), STRSMALL);
82  // kill the dumb newline ctime makes
83  fmt_time[strlen(fmt_time) - 1] = '\0';
84  return (fmt_time);
85 }
86 
87 // the place where stupid people go... ;)
help(void)88 void help(void)
89 {
90  version();
91  printf("Usage: portmon [options]\n");
92  printf("Where options include:\n");
93 #ifdef HAVE_GETOPT_LONG
94  printf(" -n, --sleep=<time>\tMinutes between checks (1 hour)\n");
95  printf(" -c, --config=<file>\tHost file (./hosts)\n");
96  printf(" -l, --logfile=<file>\tlogfile location (/dev/null)\n");
97  printf
98   (" -s, --severe=<number>\tHow many times to wait before severe notification (12)\n");
99  printf
100   (" -e, --error=<command>\tCommand to run in case of error (built-in)\n");
101  printf(" -t, --timeout=<time>\tNumber of seconds for connect timeout (3)\n");
102  printf
103   (" -g, --lag=<time>\tNumber of milliseconds for lagout notification\n");
104  printf(" -p, --probes=<number>\tNumber of probes to send out (3)\n");
105  printf(" -r, --report=[format]\tRun in report mode, with optional format\n");
106  printf(" -v, --verbose\t\tVerbose mode\n");
107  printf(" -d, --daemon\t\tDaemonize portmon\n");
108  printf(" -V, --version\t\tVersion\n");
109  printf(" -h, --help\t\tThis screen\n");
110 #else
111  printf(" -n <time>\tMinutes between checks (1 hour)\n");
112  printf(" -c <file>\tHost file (./hosts)\n");
113  printf(" -l <file>\tlogfile location (/dev/null)\n");
114  printf(" -s <number>\tHow many times to wait before severe notification (12)\n");
115  printf(" -e <command>\tCommand to run in case of error (built-in)\n");
116  printf(" -t <time>\tNumber of seconds for connect timeout (3)\n");
117  printf(" -g <time>\tNumber of milliseconds for lagout notification\n");
118  printf(" -p <number>\tNumber of probes to send out (3)\n");
119  printf(" -r [format]\tRun in report mode\n");
120  printf(" -v\t\tVerbose mode\n");
121  printf(" -d\t\tDaemonize portmon\n");
122  printf(" -V\t\tVersion\n");
123  printf(" -h\t\tThis screen\n");
124 #endif
125 }
126 
main(int argc,char * argv[])127 int main(int argc, char *argv[])
128 {
129  // 128 bytes for the name of the filename containing our hosts
130  char *host_file = malloc(STRMED * sizeof(char));
131  // default sleep time is 1 hour
132  int sleep_time = 60 * 60;
133  // severe being when to send another notification
134  int severe = 12;
135  // where we go when we die
136  void portmon_exit(int);
137  // options to parse for on command line
138 #ifdef HAVE_GETOPT_LONG
139  int option_index;
140  static struct option long_options[] = {
141   {"sleep", 1, 0, 'n'},
142   {"error", 1, 0, 'e'},
143   {"config", 1, 0, 'c'},
144   {"severe", 1, 0, 's'},
145   {"logfile", 1, 0, 'l'},
146   {"timeout", 1, 0, 't'},
147   {"probes", 1, 0, 'p'},
148   {"lag", 1, 0, 'g'},
149   {"daemon", 0, 0, 'd'},
150   {"report", 2, 0, 'r'},
151   {"verbose", 0, 0, 'v'},
152   {"version", 0, 0, 'V'},
153   {"help", 0, 0, 'h'},
154   {0, 0, 0, 0}
155  };
156 #endif
157 
158  int i, j, k, num_hosts = 0, total_milli, num_good_probes;
159  int host_down, ret = 0, num_probes = 3, lag_timeout = MAXINT, avg_time;
160  pid_t pid = getpid();
161  time_t tmp_time;
162  char *host_msg = (char *)malloc(STRLARGE * sizeof(char));
163  int days, hrs, mins;
164  char run_script[STRLARGE];
165  char *output_fmt = NULL, opt;
166 
167  err_msg = (char *)malloc(STRLARGE * sizeof(char));
168  // default file for our hosts
169  strncpy(host_file, "hosts", STRMED);
170  // default place to log. ;)
171  strncpy(logfile, "/dev/null", STRLARGE);
172  // default timeout
173  timeout = 3;
174  run_script[0] = '\0';
175  verbose = 0;
176  daemonize_flag = 0;
177  report_flag = 0;
178 
179  // read args off of the command line
180  // getopt_long seems to be a linux extension, so all those bsd lamers
181  // get to miss out on the fun!
182 #ifdef HAVE_GETOPT_LONG
183  while((opt =
184         getopt_long(argc, argv, "hvVdre:c:n:s:l:t:p:0", long_options,
185                     &option_index)) != -1) {
186 #else
187  while((opt = getopt(argc, argv, "hvVdre:c:n:s:l:t:p:")) != -1) {
188 #endif
189   switch (opt) {
190    case 'n':
191     // -n <number> ... will be the new number of minutes to sleep for
192     sleep_time = atoi(optarg) * 60;
193     break;
194    case 'e':
195     // -e <file> ... run file as an error handler instead of err_action
196     strncpy(run_script, optarg, 256);
197     break;
198    case 'c':
199     // -c <file> ... will be an alternate host file
200     strncpy(host_file, optarg, 128);
201     break;
202    case 's':
203     // -s <time> ... severe warning (optional)
204     severe = atoi(optarg);
205     break;
206    case 'l':
207     // -l <logfile> ... where to log to
208     strncpy(logfile, optarg, 256);
209     break;
210    case 't':
211     // -t <seconds> ... set the connect timeout
212     timeout = atoi(optarg);
213     break;
214    case 'p':
215     // -p <number> ... number of probes to send out
216     num_probes = atoi(optarg);
217     break;
218    case 'g':
219     // -g <milliseconds> ... number of milliseconds to wait before lagging out
220     lag_timeout = atoi(optarg);
221     break;
222    case 'd':
223     // daemonize
224     daemonize_flag = 1;
225     break;
226    case 'r':
227     // report mode
228     report_flag = 1;
229     if(optarg) {
230      output_fmt = (char *)malloc(STRMED * sizeof(char));
231      strcpy(output_fmt, optarg);
232     }
233     break;
234    case 'v':
235     verbose = 1;
236     break;
237    case 'V':
238     version();
239     return (0);
240     break;
241    case 'h':
242     // help me!
243     help();
244     return (-1);
245     break;
246    case '?':
247     help();
248     return (-1);
249     break;
250    default:
251     printf("Unknown error parsing command line...exiting.\n");
252     return (-1);
253     break;
254   }
255  }
256 
257  // trap some signals to quit on
258  if(signal(SIGQUIT, portmon_exit) == SIG_ERR) {
259   perror("signal");
260   exit(1);
261  }
262  if(signal(SIGINT, portmon_exit) == SIG_ERR) {
263   perror("signal");
264   exit(1);
265  }
266  if(signal(SIGTERM, portmon_exit) == SIG_ERR) {
267   perror("signal");
268   exit(1);
269  }
270 
271  snprintf(err_msg, STRLARGE, "Portmon started by user %s\n", getenv("USER"));
272  log_write(err_msg);
273 
274  // read in the configuration file
275  if((num_hosts = read_config(host_file)) == -1) {
276   printf("Failed reading config file %s\n", host_file);
277   return (1);
278  }
279 
280  if(report_flag) {
281   report(num_hosts, num_probes, lag_timeout, output_fmt);
282   return (0);
283  }
284 
285  // fork into the background if we are told to daemonize
286  if(daemonize_flag) {
287   pid = fork();
288   if(pid < 0) {
289    perror("fork");
290    return (1);
291   }
292  }
293 
294  // either go here if being run in the foreground, or as the child proc
295  if(pid == 0 || daemonize_flag == 0) {
296   // steps needed for proper daemonization
297   if(daemonize_flag) {
298    setsid();
299    chdir("/");
300    umask(0);
301   }
302   // main loop
303   while(1) {
304    total_milli = 0;
305    num_good_probes = 0;
306    // go through the array of hosts, and try to connect to each one
307    for(i = 0; i < num_hosts; i++) {
308     host_down = 0;
309     host_msg[0] = '\0';
310     for(j = 0; j < hosts[i].num_ports; j++) {
311      num_good_probes = 0;
312      avg_time = 0;
313      if(hosts[i].ports[j].port == 0) {
314       for(k = 0; k < num_probes; k++) {
315        ret = icmp_ping(hosts[i].ports[j].addr);
316        if(ret >= 0) {
317         total_milli += ret;
318         num_good_probes++;
319        }
320       }
321      }
322      else {
323       for(k = 0; k < num_probes; k++) {
324        if(verbose) {
325         printf("Attempting to contact %s:%d -> ", hosts[i].name, hosts[i].ports[j].port);
326        }
327        ret = tcp_ping(hosts[i].ports[j].addr, hosts[i].ports[j].port);
328        if(ret >= 0) {
329         total_milli += ret;
330         num_good_probes++;
331        }
332       }
333      }
334 
335      if(ret < 0) {
336       // host is first found to be down
337       if((hosts[i].ports[j].is_down == 0
338           && hosts[i].ports[j].downtime == 0) || severe < 1) {
339        strcat(host_msg, err_msg);
340        host_down = 1;
341        hosts[i].ports[j].is_down = 1;
342        hosts[i].ports[j].downtime = time(NULL);
343       }
344       else {
345        hosts[i].ports[j].is_down++;
346        // severe notification.  Only applies when the host
347        // has been down for <severe> passes, and severe is
348        // set to a number greater than 0
349        if((hosts[i].ports[j].is_down % severe == 0) && severe >= 1) {
350         tmp_time = time(NULL);
351         tmp_time -= hosts[i].ports[j].downtime;
352 
353         days = (int)(tmp_time / (60 * 60 * 24));
354 	tmp_time -= days * (60 * 60 * 24);
355 
356         hrs = (int)(tmp_time / (60 * 60));
357 	tmp_time -= hrs * (60 * 60);
358 
359         mins = (int)(tmp_time / 60);
360         snprintf(err_msg, STRLARGE,
361                  "%s:%d is STILL down (down: %dd,%dh,%dm)\n",
362                  hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
363         log_write(err_msg);
364         strcat(host_msg, err_msg);
365         host_down = 1;
366         hosts[i].ports[j].is_down = 0;
367        }
368       }
369      }
370      // the host is up
371      else {
372       // calculate average lag
373       avg_time = total_milli / num_good_probes;
374       // see if the host was down last time, and send notification
375       // that it is back up
376       if(hosts[i].ports[j].is_down) {
377        tmp_time = time(NULL);
378        tmp_time -= hosts[i].ports[j].downtime;
379        days = (int)(tmp_time / (60 * 60 * 24));
380        tmp_time -= days * (60 * 60 * 24);
381 
382        hrs = (int)(tmp_time / (60 * 60));
383        tmp_time -= hrs * (60 * 60);
384 
385        mins = (int)(tmp_time / 60);
386        if(avg_time > lag_timeout) {
387         snprintf(err_msg, STRLARGE,
388                  "%s:%d appears to be back up, but is above the lag threshold (down: %dd:%dh:%dm, %dms lag)\n",
389                  hosts[i].name, hosts[i].ports[j].port, days, hrs, mins,
390                  avg_time);
391        }
392        else {
393         snprintf(err_msg, STRLARGE,
394                  "%s:%d appears to be back up (down: %dd:%dh:%dm)\n",
395                  hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
396        }
397        log_write(err_msg);
398        strncat(host_msg, err_msg, STRLARGE);
399        host_down = 1;
400        hosts[i].ports[j].is_down = 0;
401        hosts[i].ports[j].downtime = 0;
402       }
403       else if(avg_time > lag_timeout) {
404        // since lag_timeout is defined to be INT_MAX, this will only
405        // be entered if it was explicitly defined on the command line.
406        snprintf(err_msg, STRLARGE,
407                 "%s:%d is up, but is above the lag threshold (%dms lag)\n",
408                 hosts[i].name, hosts[i].ports[j].port, avg_time);
409        log_write(err_msg);
410        strncat(host_msg, err_msg, STRLARGE);
411        host_down = 1;
412       }
413      }
414     }
415     if(host_down == 1) {
416      // run a script?
417      if(strlen(run_script)) {
418       exec_proc(run_script, time(NULL), hosts[i].name, host_msg);
419      }
420      // run the compiled in module
421      else
422       err_action(hosts[i].name, host_msg);
423     }
424    }
425    // precious sleep
426    sleep(sleep_time);
427   }
428  }
429  // parent exits
430  else if(daemonize_flag) {
431   exit(0);
432  }
433  else {
434  }
435 
436  return (0);
437 }
438