1 /*
2 ** portmon.c -- Main procedure for the portmon daemon
3 ** Copyright (C) 2002 Nik Reiman <nik@aboleo.net>
4 **
5 ** This program is free software; you can redistribute it and/or modify
6 ** it under the terms of the GNU General Public License as published by
7 ** the Free Software Foundation; either version 2 of the License, or
8 ** (at your option) any later version.
9 **
10 ** This program is distributed in the hope that it will be useful,
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 ** GNU General Public License for more details.
14 **
15 ** You should have received a copy of the GNU General Public License
16 ** along with this program; if not, write to the Free Software
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <errno.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <signal.h>
26 #include <ctype.h>
27 #include <time.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30
31 #include "portmon.h"
32 #include "config.h"
33
34 #ifdef HAVE_GETOPT_H
35 #include <getopt.h>
36 #endif
37
38 int report_flag;
39 int daemonize_flag;
40
41 struct host_struct *hosts;
42
43 char *err_msg;
44 char logfile[STRLARGE];
45 int timeout;
46 int verbose;
47
48
49
version(void)50 void version(void)
51 {
52 printf("This is %s, version %s\n", PACKAGE, VERSION);
53 printf("By Nik Reiman, nik@aboleo.net\n");
54 }
55
portmon_exit(int signal)56 void portmon_exit(int signal)
57 {
58 snprintf(err_msg, STRLARGE, "Exiting on signal %d\n", signal);
59 log_write(err_msg);
60 exit(0);
61 }
62
log_write(char * msg)63 int log_write(char *msg)
64 {
65 FILE *log_fp;
66
67 if((log_fp = fopen(logfile, "a")) == NULL) {
68 perror("fopen");
69 return (1);
70 }
71 fprintf(log_fp, "(%s) - %s", get_time(time(NULL)), err_msg);
72 fclose(log_fp);
73 return (0);
74 }
75
76 // return the current time in pretty form
get_time(time_t cur_time)77 char *get_time(time_t cur_time)
78 {
79 char *fmt_time = (char *)malloc(STRSMALL * sizeof(char));
80
81 strncpy(fmt_time, ctime(&cur_time), STRSMALL);
82 // kill the dumb newline ctime makes
83 fmt_time[strlen(fmt_time) - 1] = '\0';
84 return (fmt_time);
85 }
86
87 // the place where stupid people go... ;)
help(void)88 void help(void)
89 {
90 version();
91 printf("Usage: portmon [options]\n");
92 printf("Where options include:\n");
93 #ifdef HAVE_GETOPT_LONG
94 printf(" -n, --sleep=<time>\tMinutes between checks (1 hour)\n");
95 printf(" -c, --config=<file>\tHost file (./hosts)\n");
96 printf(" -l, --logfile=<file>\tlogfile location (/dev/null)\n");
97 printf
98 (" -s, --severe=<number>\tHow many times to wait before severe notification (12)\n");
99 printf
100 (" -e, --error=<command>\tCommand to run in case of error (built-in)\n");
101 printf(" -t, --timeout=<time>\tNumber of seconds for connect timeout (3)\n");
102 printf
103 (" -g, --lag=<time>\tNumber of milliseconds for lagout notification\n");
104 printf(" -p, --probes=<number>\tNumber of probes to send out (3)\n");
105 printf(" -r, --report=[format]\tRun in report mode, with optional format\n");
106 printf(" -v, --verbose\t\tVerbose mode\n");
107 printf(" -d, --daemon\t\tDaemonize portmon\n");
108 printf(" -V, --version\t\tVersion\n");
109 printf(" -h, --help\t\tThis screen\n");
110 #else
111 printf(" -n <time>\tMinutes between checks (1 hour)\n");
112 printf(" -c <file>\tHost file (./hosts)\n");
113 printf(" -l <file>\tlogfile location (/dev/null)\n");
114 printf(" -s <number>\tHow many times to wait before severe notification (12)\n");
115 printf(" -e <command>\tCommand to run in case of error (built-in)\n");
116 printf(" -t <time>\tNumber of seconds for connect timeout (3)\n");
117 printf(" -g <time>\tNumber of milliseconds for lagout notification\n");
118 printf(" -p <number>\tNumber of probes to send out (3)\n");
119 printf(" -r [format]\tRun in report mode\n");
120 printf(" -v\t\tVerbose mode\n");
121 printf(" -d\t\tDaemonize portmon\n");
122 printf(" -V\t\tVersion\n");
123 printf(" -h\t\tThis screen\n");
124 #endif
125 }
126
main(int argc,char * argv[])127 int main(int argc, char *argv[])
128 {
129 // 128 bytes for the name of the filename containing our hosts
130 char *host_file = malloc(STRMED * sizeof(char));
131 // default sleep time is 1 hour
132 int sleep_time = 60 * 60;
133 // severe being when to send another notification
134 int severe = 12;
135 // where we go when we die
136 void portmon_exit(int);
137 // options to parse for on command line
138 #ifdef HAVE_GETOPT_LONG
139 int option_index;
140 static struct option long_options[] = {
141 {"sleep", 1, 0, 'n'},
142 {"error", 1, 0, 'e'},
143 {"config", 1, 0, 'c'},
144 {"severe", 1, 0, 's'},
145 {"logfile", 1, 0, 'l'},
146 {"timeout", 1, 0, 't'},
147 {"probes", 1, 0, 'p'},
148 {"lag", 1, 0, 'g'},
149 {"daemon", 0, 0, 'd'},
150 {"report", 2, 0, 'r'},
151 {"verbose", 0, 0, 'v'},
152 {"version", 0, 0, 'V'},
153 {"help", 0, 0, 'h'},
154 {0, 0, 0, 0}
155 };
156 #endif
157
158 int i, j, k, num_hosts = 0, total_milli, num_good_probes;
159 int host_down, ret = 0, num_probes = 3, lag_timeout = MAXINT, avg_time;
160 pid_t pid = getpid();
161 time_t tmp_time;
162 char *host_msg = (char *)malloc(STRLARGE * sizeof(char));
163 int days, hrs, mins;
164 char run_script[STRLARGE];
165 char *output_fmt = NULL, opt;
166
167 err_msg = (char *)malloc(STRLARGE * sizeof(char));
168 // default file for our hosts
169 strncpy(host_file, "hosts", STRMED);
170 // default place to log. ;)
171 strncpy(logfile, "/dev/null", STRLARGE);
172 // default timeout
173 timeout = 3;
174 run_script[0] = '\0';
175 verbose = 0;
176 daemonize_flag = 0;
177 report_flag = 0;
178
179 // read args off of the command line
180 // getopt_long seems to be a linux extension, so all those bsd lamers
181 // get to miss out on the fun!
182 #ifdef HAVE_GETOPT_LONG
183 while((opt =
184 getopt_long(argc, argv, "hvVdre:c:n:s:l:t:p:0", long_options,
185 &option_index)) != -1) {
186 #else
187 while((opt = getopt(argc, argv, "hvVdre:c:n:s:l:t:p:")) != -1) {
188 #endif
189 switch (opt) {
190 case 'n':
191 // -n <number> ... will be the new number of minutes to sleep for
192 sleep_time = atoi(optarg) * 60;
193 break;
194 case 'e':
195 // -e <file> ... run file as an error handler instead of err_action
196 strncpy(run_script, optarg, 256);
197 break;
198 case 'c':
199 // -c <file> ... will be an alternate host file
200 strncpy(host_file, optarg, 128);
201 break;
202 case 's':
203 // -s <time> ... severe warning (optional)
204 severe = atoi(optarg);
205 break;
206 case 'l':
207 // -l <logfile> ... where to log to
208 strncpy(logfile, optarg, 256);
209 break;
210 case 't':
211 // -t <seconds> ... set the connect timeout
212 timeout = atoi(optarg);
213 break;
214 case 'p':
215 // -p <number> ... number of probes to send out
216 num_probes = atoi(optarg);
217 break;
218 case 'g':
219 // -g <milliseconds> ... number of milliseconds to wait before lagging out
220 lag_timeout = atoi(optarg);
221 break;
222 case 'd':
223 // daemonize
224 daemonize_flag = 1;
225 break;
226 case 'r':
227 // report mode
228 report_flag = 1;
229 if(optarg) {
230 output_fmt = (char *)malloc(STRMED * sizeof(char));
231 strcpy(output_fmt, optarg);
232 }
233 break;
234 case 'v':
235 verbose = 1;
236 break;
237 case 'V':
238 version();
239 return (0);
240 break;
241 case 'h':
242 // help me!
243 help();
244 return (-1);
245 break;
246 case '?':
247 help();
248 return (-1);
249 break;
250 default:
251 printf("Unknown error parsing command line...exiting.\n");
252 return (-1);
253 break;
254 }
255 }
256
257 // trap some signals to quit on
258 if(signal(SIGQUIT, portmon_exit) == SIG_ERR) {
259 perror("signal");
260 exit(1);
261 }
262 if(signal(SIGINT, portmon_exit) == SIG_ERR) {
263 perror("signal");
264 exit(1);
265 }
266 if(signal(SIGTERM, portmon_exit) == SIG_ERR) {
267 perror("signal");
268 exit(1);
269 }
270
271 snprintf(err_msg, STRLARGE, "Portmon started by user %s\n", getenv("USER"));
272 log_write(err_msg);
273
274 // read in the configuration file
275 if((num_hosts = read_config(host_file)) == -1) {
276 printf("Failed reading config file %s\n", host_file);
277 return (1);
278 }
279
280 if(report_flag) {
281 report(num_hosts, num_probes, lag_timeout, output_fmt);
282 return (0);
283 }
284
285 // fork into the background if we are told to daemonize
286 if(daemonize_flag) {
287 pid = fork();
288 if(pid < 0) {
289 perror("fork");
290 return (1);
291 }
292 }
293
294 // either go here if being run in the foreground, or as the child proc
295 if(pid == 0 || daemonize_flag == 0) {
296 // steps needed for proper daemonization
297 if(daemonize_flag) {
298 setsid();
299 chdir("/");
300 umask(0);
301 }
302 // main loop
303 while(1) {
304 total_milli = 0;
305 num_good_probes = 0;
306 // go through the array of hosts, and try to connect to each one
307 for(i = 0; i < num_hosts; i++) {
308 host_down = 0;
309 host_msg[0] = '\0';
310 for(j = 0; j < hosts[i].num_ports; j++) {
311 num_good_probes = 0;
312 avg_time = 0;
313 if(hosts[i].ports[j].port == 0) {
314 for(k = 0; k < num_probes; k++) {
315 ret = icmp_ping(hosts[i].ports[j].addr);
316 if(ret >= 0) {
317 total_milli += ret;
318 num_good_probes++;
319 }
320 }
321 }
322 else {
323 for(k = 0; k < num_probes; k++) {
324 if(verbose) {
325 printf("Attempting to contact %s:%d -> ", hosts[i].name, hosts[i].ports[j].port);
326 }
327 ret = tcp_ping(hosts[i].ports[j].addr, hosts[i].ports[j].port);
328 if(ret >= 0) {
329 total_milli += ret;
330 num_good_probes++;
331 }
332 }
333 }
334
335 if(ret < 0) {
336 // host is first found to be down
337 if((hosts[i].ports[j].is_down == 0
338 && hosts[i].ports[j].downtime == 0) || severe < 1) {
339 strcat(host_msg, err_msg);
340 host_down = 1;
341 hosts[i].ports[j].is_down = 1;
342 hosts[i].ports[j].downtime = time(NULL);
343 }
344 else {
345 hosts[i].ports[j].is_down++;
346 // severe notification. Only applies when the host
347 // has been down for <severe> passes, and severe is
348 // set to a number greater than 0
349 if((hosts[i].ports[j].is_down % severe == 0) && severe >= 1) {
350 tmp_time = time(NULL);
351 tmp_time -= hosts[i].ports[j].downtime;
352
353 days = (int)(tmp_time / (60 * 60 * 24));
354 tmp_time -= days * (60 * 60 * 24);
355
356 hrs = (int)(tmp_time / (60 * 60));
357 tmp_time -= hrs * (60 * 60);
358
359 mins = (int)(tmp_time / 60);
360 snprintf(err_msg, STRLARGE,
361 "%s:%d is STILL down (down: %dd,%dh,%dm)\n",
362 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
363 log_write(err_msg);
364 strcat(host_msg, err_msg);
365 host_down = 1;
366 hosts[i].ports[j].is_down = 0;
367 }
368 }
369 }
370 // the host is up
371 else {
372 // calculate average lag
373 avg_time = total_milli / num_good_probes;
374 // see if the host was down last time, and send notification
375 // that it is back up
376 if(hosts[i].ports[j].is_down) {
377 tmp_time = time(NULL);
378 tmp_time -= hosts[i].ports[j].downtime;
379 days = (int)(tmp_time / (60 * 60 * 24));
380 tmp_time -= days * (60 * 60 * 24);
381
382 hrs = (int)(tmp_time / (60 * 60));
383 tmp_time -= hrs * (60 * 60);
384
385 mins = (int)(tmp_time / 60);
386 if(avg_time > lag_timeout) {
387 snprintf(err_msg, STRLARGE,
388 "%s:%d appears to be back up, but is above the lag threshold (down: %dd:%dh:%dm, %dms lag)\n",
389 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins,
390 avg_time);
391 }
392 else {
393 snprintf(err_msg, STRLARGE,
394 "%s:%d appears to be back up (down: %dd:%dh:%dm)\n",
395 hosts[i].name, hosts[i].ports[j].port, days, hrs, mins);
396 }
397 log_write(err_msg);
398 strncat(host_msg, err_msg, STRLARGE);
399 host_down = 1;
400 hosts[i].ports[j].is_down = 0;
401 hosts[i].ports[j].downtime = 0;
402 }
403 else if(avg_time > lag_timeout) {
404 // since lag_timeout is defined to be INT_MAX, this will only
405 // be entered if it was explicitly defined on the command line.
406 snprintf(err_msg, STRLARGE,
407 "%s:%d is up, but is above the lag threshold (%dms lag)\n",
408 hosts[i].name, hosts[i].ports[j].port, avg_time);
409 log_write(err_msg);
410 strncat(host_msg, err_msg, STRLARGE);
411 host_down = 1;
412 }
413 }
414 }
415 if(host_down == 1) {
416 // run a script?
417 if(strlen(run_script)) {
418 exec_proc(run_script, time(NULL), hosts[i].name, host_msg);
419 }
420 // run the compiled in module
421 else
422 err_action(hosts[i].name, host_msg);
423 }
424 }
425 // precious sleep
426 sleep(sleep_time);
427 }
428 }
429 // parent exits
430 else if(daemonize_flag) {
431 exit(0);
432 }
433 else {
434 }
435
436 return (0);
437 }
438