1 /* Copyright (C) 2009 Trend Micro Inc.
2  * All rights reserved.
3  *
4  * This program is a free software; you can redistribute it
5  * and/or modify it under the terms of the GNU General Public
6  * License (version 2) as published by the FSF - Free Software
7  * Foundation.
8  */
9 
10 #include "cleanevent.h"
11 
12 #include "shared.h"
13 #include "os_regex/os_regex.h"
14 #include "analysisd.h"
15 #include "fts.h"
16 #include "config.h"
17 
18 /* To translate between month (int) to month (char) */
19 static const char *(month[]) = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
20                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
21                   };
22 
23 
24 /* Format a received message in the Eventinfo structure */
OS_CleanMSG(char * msg,Eventinfo * lf)25 int OS_CleanMSG(char *msg, Eventinfo *lf)
26 {
27     size_t loglen;
28     char *pieces;
29     struct tm *p;
30 
31     /* The message is formatted in the following way:
32      * id:location:message.
33      */
34 
35     /* Ignore the id of the message in here */
36     msg += 2;
37 
38     /* Set pieces as the message */
39     pieces = strchr(msg, ':');
40     if (!pieces) {
41         merror(FORMAT_ERROR, ARGV0);
42         return (-1);
43     }
44 
45     /* Is this from an agent? */
46     if ( *msg == '(' )
47     {   /* look past '->' for the first ':' */
48         pieces = strstr(msg, "->");
49         if(!pieces) {
50             merror(FORMAT_ERROR, ARGV0);
51             return(-1);
52         }
53         pieces = strchr(pieces, ':');
54         if(!pieces)
55         {
56             merror(FORMAT_ERROR, ARGV0);
57             return(-1);
58         }
59     }
60 
61     *pieces = '\0';
62     pieces++;
63 
64     os_strdup(msg, lf->location);
65 
66     /* Get the log length */
67     loglen = strlen(pieces) + 1;
68 
69     /* Assign the values in the structure (lf->full_log) */
70     os_malloc((2 * loglen) + 1, lf->full_log);
71 
72     /* Set the whole message at full_log */
73     strncpy(lf->full_log, pieces, loglen);
74 
75     /* Log is the one used for parsing in the decoders and rules */
76     lf->log = lf->full_log + loglen;
77     strncpy(lf->log, pieces, loglen);
78 
79     /* check if month contains an umlaut and repair
80      * umlauts are non-ASCII and use 2 slots in the char array
81      * repair to only one slot so we can detect the correct date format in the next step
82      * ex: Mär 02 17:30:52
83      */
84     if (pieces[1] == (char) 195) {
85         if (pieces[2] == (char) 164) {
86             pieces[0] = '\0';
87             pieces[1] = 'M';
88             pieces[2] = 'a';
89             pieces++;
90         }
91     }
92 
93     /* Check for the syslog date format
94      * ( ex: Dec 29 10:00:01
95      *   or  2015-04-16 21:51:02,805 for proftpd 1.3.5
96      *   or  2007-06-14T15:48:55-04:00 for syslog-ng isodate
97      *   or  2007-06-14T15:48:55.3352-04:00 for syslog-ng isodate with up to 6 optional fraction of a second
98      *   or  2009-05-22T09:36:46.214994-07:00 for rsyslog
99      *   or  2015 Dec 29 10:00:01 )
100      */
101     if (
102         (   /* ex: Dec 29 10:00:01 */
103             (loglen > 17) &&
104             (pieces[3] == ' ') &&
105             (pieces[6] == ' ') &&
106             (pieces[9] == ':') &&
107             (pieces[12] == ':') &&
108             (pieces[15] == ' ') && (lf->log += 16)
109         )
110         ||
111         (   /* ex: 2015-04-16 21:51:02,805 */
112             (loglen > 24) &&
113             (pieces[4] == '-') &&
114             (pieces[7] == '-') &&
115             (pieces[10] == ' ') &&
116             (pieces[13] == ':') &&
117             (pieces[16] == ':') &&
118             (pieces[19] == ',') &&
119             (lf->log += 23)
120         )
121         ||
122         (
123             (loglen > 33) &&
124             (pieces[4] == '-') &&
125             (pieces[7] == '-') &&
126             (pieces[10] == 'T') &&
127             (pieces[13] == ':') &&
128             (pieces[16] == ':') &&
129             (   /* ex: 2007-06-14T15:48:55-04:00 */
130                 (
131                     (pieces[22] == ':') &&
132                     (pieces[25] == ' ') && (lf->log += 26)
133                 )
134                 ||
135                 /* ex: 2007-06-14T15:48:55.3-04:00 or 2009-05-22T09:36:46,214994-07:00 */
136                 (
137                     (
138                         (pieces[19] == '.') || (pieces[19] == ',')
139                     )
140                     &&
141                     (
142                         ( (pieces[24] == ':') && (lf->log += 27) ) ||
143                         ( (pieces[25] == ':') && (lf->log += 28) ) ||
144                         ( (pieces[26] == ':') && (lf->log += 29) ) ||
145                         ( (pieces[27] == ':') && (lf->log += 30) ) ||
146                         ( (pieces[28] == ':') && (lf->log += 31) ) ||
147                         ( (pieces[29] == ':') && (lf->log += 32) )
148                     )
149                 )
150             )
151         )
152         ||
153         (   /* ex: 2015 Dec 29 10:00:01 */
154             (loglen > 21) &&
155             (isdigit(pieces[0])) &&
156             (pieces[4] == ' ') &&
157             (pieces[8] == ' ') &&
158             (pieces[11] == ' ') &&
159             (pieces[14] == ':') &&
160             (pieces[17] == ':') &&
161             (pieces[20] == ' ') && (lf->log += 21)
162         )
163         ||
164         (
165             /* ex: 2019:11:06-00:08:03 */
166             (loglen > 20) &&
167             (isdigit(pieces[0])) &&
168             (pieces[4] == ':') &&
169             (pieces[7] == ':') &&
170             (pieces[10] == '-') &&
171             (pieces[13] == ':') &&
172             (pieces[16] == ':') && (lf->log += 20)
173         )
174     ) {
175         /* Check for an extra space in here */
176         if (*lf->log == ' ') {
177             lf->log++;
178         }
179 
180 
181         /* Hostname */
182         pieces = lf->hostname = lf->log;
183 
184 
185         /* Check for a valid hostname */
186         while (isValidChar(*pieces) == 1) {
187             pieces++;
188         }
189 
190         /* Check if it is a syslog without hostname (common on Solaris) */
191         if (*pieces == ':' && pieces[1] == ' ') {
192             /* Getting solaris 8/9 messages without hostname.
193              * In these cases, the process_name should be there.
194              * http://www.ossec.net/wiki/index.php/Log_Samples_Solaris
195              */
196             lf->program_name = lf->hostname;
197             lf->hostname = NULL;
198 
199             /* End the program name string */
200             *pieces = '\0';
201 
202             pieces += 2;
203             lf->log = pieces;
204         }
205 
206         /* Extract the hostname */
207         else if (*pieces != ' ') {
208             /* Invalid hostname */
209             lf->hostname = NULL;
210             pieces = NULL;
211         } else {
212             /* End the hostname string */
213             *pieces = '\0';
214 
215             /* Move pieces to the beginning of the log message */
216             pieces++;
217             lf->log = pieces;
218 
219             /* Get program_name */
220             lf->program_name = pieces;
221 
222             /* Extract program_name */
223             /* Valid names:
224              * p_name:
225              * p_name[pid]:
226              * p_name[pid]: [ID xx facility.severity]
227              * auth|security:info p_name:
228              */
229             while (isValidChar(*pieces) == 1) {
230                 pieces++;
231             }
232 
233             /* Check for the first format: p_name: */
234             if ((*pieces == ':') && (pieces[1] == ' ')) {
235                 *pieces = '\0';
236                 pieces += 2;
237             }
238 
239             /* Check for the second format: p_name[pid]: */
240             else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
241                 *pieces = '\0';
242                 pieces += 2;
243                 while (isdigit((int)*pieces)) {
244                     pieces++;
245                 }
246 
247                 if ((*pieces == ']') && (pieces[1] == ':') && (pieces[2] == ' ')) {
248                     pieces += 3;
249                 }
250                 /* Some systems are not terminating the program name with
251                  * a ':'. Working around this in here...
252                  */
253                 else if ((*pieces == ']') && (pieces[1] == ' ')) {
254                     pieces += 2;
255                 } else {
256                     /* Fix for some weird log formats */
257                     pieces--;
258                     while (isdigit((int)*pieces)) {
259                         pieces--;
260                     }
261 
262                     if (*pieces == '\0') {
263                         *pieces = '[';
264                     }
265                     pieces = NULL;
266                     lf->program_name = NULL;
267                 }
268             }
269             /* AIX syslog */
270             else if ((*pieces == '|') && islower((int)pieces[1])) {
271                 pieces += 2;
272 
273                 /* Remove facility */
274                 while (isalnum((int)*pieces)) {
275                     pieces++;
276                 }
277 
278                 if (*pieces == ':') {
279                     /* Remove severity */
280                     pieces++;
281                     while (isalnum((int)*pieces)) {
282                         pieces++;
283                     }
284 
285                     if (*pieces == ' ') {
286                         pieces++;
287                         lf->program_name = pieces;
288 
289 
290                         /* Get program name again */
291                         while (isValidChar(*pieces) == 1) {
292                             pieces++;
293                         }
294 
295                         /* Check for the first format: p_name: */
296                         if ((*pieces == ':') && (pieces[1] == ' ')) {
297                             *pieces = '\0';
298                             pieces += 2;
299                         }
300 
301                         /* Check for the second format: p_name[pid]: */
302                         else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
303                             *pieces = '\0';
304                             pieces += 2;
305                             while (isdigit((int)*pieces)) {
306                                 pieces++;
307                             }
308 
309                             if ((*pieces == ']') && (pieces[1] == ':') &&
310                                     (pieces[2] == ' ')) {
311                                 pieces += 3;
312                             } else {
313                                 pieces = NULL;
314                             }
315                         }
316                     } else {
317                         pieces = NULL;
318                         lf->program_name = NULL;
319                     }
320                 }
321                 /* Invalid AIX */
322                 else {
323                     pieces = NULL;
324                     lf->program_name = NULL;
325                 }
326             } else {
327                 pieces = NULL;
328                 lf->program_name = NULL;
329             }
330         }
331 
332         /* Remove [ID xx facility.severity] */
333         if (pieces) {
334             /* Set log after program name */
335             lf->log = pieces;
336 
337             if ((pieces[0] == '[') &&
338                     (pieces[1] == 'I') &&
339                     (pieces[2] == 'D') &&
340                     (pieces[3] == ' ')) {
341                 pieces += 4;
342 
343                 /* Going after the "] " */
344                 pieces = strstr(pieces, "] ");
345                 if (pieces) {
346                     pieces += 2;
347                     lf->log = pieces;
348                 }
349             }
350         }
351 
352         /* Get program name size */
353         if (lf->program_name) {
354             lf->p_name_size = strlen(lf->program_name);
355         }
356     }
357 
358     /* xferlog date format
359      * Mon Apr 17 18:27:14 2006 1 64.160.42.130
360      */
361     else if ((loglen > 28) &&
362              (pieces[3] == ' ') &&
363              (pieces[7] == ' ') &&
364              (pieces[10] == ' ') &&
365              (pieces[13] == ':') &&
366              (pieces[16] == ':') &&
367              (pieces[19] == ' ') &&
368              (pieces[24] == ' ') &&
369              (pieces[26] == ' ')) {
370         /* Move log to the beginning of the message */
371         lf->log += 24;
372     }
373 
374     /* Check for snort date format
375      * ex: 01/28-09:13:16.240702  [**]
376      */
377     else if ( (loglen > 24) &&
378               (pieces[2] == '/') &&
379               (pieces[5] == '-') &&
380               (pieces[8] == ':') &&
381               (pieces[11] == ':') &&
382               (pieces[14] == '.') &&
383               (pieces[21] == ' ') ) {
384         lf->log += 23;
385     }
386 
387     /* Check for suricata (new) date format
388      * ex: 01/28/1979-09:13:16.240702  [**]
389      */
390     else if ( (loglen > 26) &&
391               (pieces[2] == '/') &&
392               (pieces[5] == '/') &&
393               (pieces[10] == '-') &&
394               (pieces[13] == ':') &&
395               (pieces[16] == ':') &&
396               (pieces[19] == '.') &&
397               (pieces[26] == ' ') ) {
398         lf->log += 28;
399     }
400 
401 
402     /* Check for apache log format */
403     /* [Fri Feb 11 18:06:35 2004] [warn] */
404     else if ( (loglen > 27) &&
405               (pieces[0] == '[') &&
406               (pieces[4] == ' ') &&
407               (pieces[8] == ' ') &&
408               (pieces[11] == ' ') &&
409               (pieces[14] == ':') &&
410               (pieces[17] == ':') &&
411               (pieces[20] == ' ') &&
412               (pieces[25] == ']') ) {
413         lf->log += 27;
414     }
415 
416     /* Check for the osx asl log format.
417      * Examples:
418      * [Time 2006.12.28 15:53:55 UTC] [Facility auth] [Sender sshd] [PID 483] [Message error: PAM: Authentication failure for username from 192.168.0.2] [Level 3] [UID -2] [GID -2] [Host Hostname]
419      * [Time 2006.11.02 14:02:11 UTC] [Facility auth] [Sender sshd] [PID 856]
420      [Message refused connect from 59.124.44.34] [Level 4] [UID -2] [GID -2]
421      [Host robert-wyatts-emac]
422      */
423     else if ((loglen > 26) &&
424              (pieces[0] == '[')  &&
425              (pieces[1] == 'T')  &&
426              (pieces[5] == ' ')  &&
427              (pieces[10] == '.') &&
428              (pieces[13] == '.') &&
429              (pieces[16] == ' ') &&
430              (pieces[19] == ':')) {
431         /* Do not read more than 1 message entry -> log tampering */
432         short unsigned int done_message = 0;
433 
434         /* Remove the date */
435         lf->log += 25;
436 
437         /* Get the desired values */
438         pieces = strchr(lf->log, '[');
439         while (pieces) {
440             pieces++;
441 
442             /* Get the sender (set to program name) */
443             if ((strncmp(pieces, "Sender ", 7) == 0) &&
444                     (lf->program_name == NULL)) {
445                 pieces += 7;
446                 lf->program_name = pieces;
447 
448                 /* Get the closing brackets */
449                 pieces = strchr(pieces, ']');
450                 if (pieces) {
451                     *pieces = '\0';
452 
453                     /* Set program_name size */
454                     lf->p_name_size = strlen(lf->program_name);
455 
456                     pieces++;
457                 }
458                 /* Invalid program name */
459                 else {
460                     lf->program_name = NULL;
461                     break;
462                 }
463             }
464 
465             /* Get message */
466             else if ((strncmp(pieces, "Message ", 8) == 0) &&
467                      (done_message == 0)) {
468                 pieces += 8;
469                 done_message = 1;
470 
471                 lf->log = pieces;
472 
473                 /* Get the closing brackets */
474                 pieces = strchr(pieces, ']');
475                 if (pieces) {
476                     *pieces = '\0';
477                     pieces++;
478                 }
479                 /* Invalid log closure */
480                 else {
481                     break;
482                 }
483             }
484 
485             /* Get hostname */
486             else if (strncmp(pieces, "Host ", 5) == 0) {
487                 pieces += 5;
488                 lf->hostname = pieces;
489 
490                 /* Get the closing brackets */
491                 pieces = strchr(pieces, ']');
492                 if (pieces) {
493                     *pieces = '\0';
494                     pieces++;
495                 }
496 
497                 /* Invalid hostname */
498                 else {
499                     lf->hostname = NULL;
500                 }
501                 break;
502             }
503 
504             /* Get next entry */
505             pieces = strchr(pieces, '[');
506         }
507     }
508 
509     /* Check for squid date format
510      * 1140804070.368  11623
511      * seconds from 00:00:00 1970-01-01 UTC
512      */
513     else if ((loglen > 32) &&
514              (pieces[0] == '1') &&
515              (isdigit((int)pieces[1])) &&
516              (isdigit((int)pieces[2])) &&
517              (isdigit((int)pieces[3])) &&
518              (pieces[10] == '.') &&
519              (isdigit((int)pieces[13])) &&
520              (pieces[14] == ' ') &&
521              ((pieces[21] == ' ') || (pieces[22] == ' '))) {
522         lf->log += 14;
523 
524         /* We need to start at the size of the event */
525         while (*lf->log == ' ') {
526             lf->log++;
527         }
528     }
529 
530     /* Every message must be in the format
531      * hostname->location or
532      * (agent) ip->location.
533      */
534 
535     /* Set hostname for local messages */
536     if (lf->location[0] == '(') {
537         /* Messages from an agent */
538         lf->hostname = lf->location;
539     } else if (lf->hostname == NULL) {
540         lf->hostname = __shost;
541     }
542 
543     /* Set up the event data */
544     lf->time = c_time;
545     p = localtime(&c_time);
546 
547     /* Assign hour, day, year and month values */
548     lf->day = p->tm_mday;
549     lf->year = p->tm_year + 1900;
550     strncpy(lf->mon, month[p->tm_mon], 3);
551     snprintf(lf->hour, 9, "%02d:%02d:%02d",
552              p->tm_hour,
553              p->tm_min,
554              p->tm_sec);
555 
556     /* Set the global hour/weekday */
557     __crt_hour = p->tm_hour;
558     __crt_wday = p->tm_wday;
559 
560 #ifdef TESTRULE
561     if (!alert_only) {
562         print_out("**Phase 1: Completed pre-decoding.");
563         print_out("       full event: '%s'", lf->full_log);
564         print_out("       hostname: '%s'", lf->hostname);
565         print_out("       program_name: '%s'", lf->program_name);
566         print_out("       log: '%s'", lf->log);
567     }
568 #endif
569     return (0);
570 }
571 
572