1 /* Copyright (C) 2009 Trend Micro Inc.
2 * All rights reserved.
3 *
4 * This program is a free software; you can redistribute it
5 * and/or modify it under the terms of the GNU General Public
6 * License (version 2) as published by the FSF - Free Software
7 * Foundation.
8 */
9
10 #include "cleanevent.h"
11
12 #include "shared.h"
13 #include "os_regex/os_regex.h"
14 #include "analysisd.h"
15 #include "fts.h"
16 #include "config.h"
17
18 /* To translate between month (int) to month (char) */
19 static const char *(month[]) = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
20 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
21 };
22
23
24 /* Format a received message in the Eventinfo structure */
OS_CleanMSG(char * msg,Eventinfo * lf)25 int OS_CleanMSG(char *msg, Eventinfo *lf)
26 {
27 size_t loglen;
28 char *pieces;
29 struct tm *p;
30
31 /* The message is formatted in the following way:
32 * id:location:message.
33 */
34
35 /* Ignore the id of the message in here */
36 msg += 2;
37
38 /* Set pieces as the message */
39 pieces = strchr(msg, ':');
40 if (!pieces) {
41 merror(FORMAT_ERROR, ARGV0);
42 return (-1);
43 }
44
45 /* Is this from an agent? */
46 if ( *msg == '(' )
47 { /* look past '->' for the first ':' */
48 pieces = strstr(msg, "->");
49 if(!pieces) {
50 merror(FORMAT_ERROR, ARGV0);
51 return(-1);
52 }
53 pieces = strchr(pieces, ':');
54 if(!pieces)
55 {
56 merror(FORMAT_ERROR, ARGV0);
57 return(-1);
58 }
59 }
60
61 *pieces = '\0';
62 pieces++;
63
64 os_strdup(msg, lf->location);
65
66 /* Get the log length */
67 loglen = strlen(pieces) + 1;
68
69 /* Assign the values in the structure (lf->full_log) */
70 os_malloc((2 * loglen) + 1, lf->full_log);
71
72 /* Set the whole message at full_log */
73 strncpy(lf->full_log, pieces, loglen);
74
75 /* Log is the one used for parsing in the decoders and rules */
76 lf->log = lf->full_log + loglen;
77 strncpy(lf->log, pieces, loglen);
78
79 /* check if month contains an umlaut and repair
80 * umlauts are non-ASCII and use 2 slots in the char array
81 * repair to only one slot so we can detect the correct date format in the next step
82 * ex: Mär 02 17:30:52
83 */
84 if (pieces[1] == (char) 195) {
85 if (pieces[2] == (char) 164) {
86 pieces[0] = '\0';
87 pieces[1] = 'M';
88 pieces[2] = 'a';
89 pieces++;
90 }
91 }
92
93 /* Check for the syslog date format
94 * ( ex: Dec 29 10:00:01
95 * or 2015-04-16 21:51:02,805 for proftpd 1.3.5
96 * or 2007-06-14T15:48:55-04:00 for syslog-ng isodate
97 * or 2007-06-14T15:48:55.3352-04:00 for syslog-ng isodate with up to 6 optional fraction of a second
98 * or 2009-05-22T09:36:46.214994-07:00 for rsyslog
99 * or 2015 Dec 29 10:00:01 )
100 */
101 if (
102 ( /* ex: Dec 29 10:00:01 */
103 (loglen > 17) &&
104 (pieces[3] == ' ') &&
105 (pieces[6] == ' ') &&
106 (pieces[9] == ':') &&
107 (pieces[12] == ':') &&
108 (pieces[15] == ' ') && (lf->log += 16)
109 )
110 ||
111 ( /* ex: 2015-04-16 21:51:02,805 */
112 (loglen > 24) &&
113 (pieces[4] == '-') &&
114 (pieces[7] == '-') &&
115 (pieces[10] == ' ') &&
116 (pieces[13] == ':') &&
117 (pieces[16] == ':') &&
118 (pieces[19] == ',') &&
119 (lf->log += 23)
120 )
121 ||
122 (
123 (loglen > 33) &&
124 (pieces[4] == '-') &&
125 (pieces[7] == '-') &&
126 (pieces[10] == 'T') &&
127 (pieces[13] == ':') &&
128 (pieces[16] == ':') &&
129 ( /* ex: 2007-06-14T15:48:55-04:00 */
130 (
131 (pieces[22] == ':') &&
132 (pieces[25] == ' ') && (lf->log += 26)
133 )
134 ||
135 /* ex: 2007-06-14T15:48:55.3-04:00 or 2009-05-22T09:36:46,214994-07:00 */
136 (
137 (
138 (pieces[19] == '.') || (pieces[19] == ',')
139 )
140 &&
141 (
142 ( (pieces[24] == ':') && (lf->log += 27) ) ||
143 ( (pieces[25] == ':') && (lf->log += 28) ) ||
144 ( (pieces[26] == ':') && (lf->log += 29) ) ||
145 ( (pieces[27] == ':') && (lf->log += 30) ) ||
146 ( (pieces[28] == ':') && (lf->log += 31) ) ||
147 ( (pieces[29] == ':') && (lf->log += 32) )
148 )
149 )
150 )
151 )
152 ||
153 ( /* ex: 2015 Dec 29 10:00:01 */
154 (loglen > 21) &&
155 (isdigit(pieces[0])) &&
156 (pieces[4] == ' ') &&
157 (pieces[8] == ' ') &&
158 (pieces[11] == ' ') &&
159 (pieces[14] == ':') &&
160 (pieces[17] == ':') &&
161 (pieces[20] == ' ') && (lf->log += 21)
162 )
163 ||
164 (
165 /* ex: 2019:11:06-00:08:03 */
166 (loglen > 20) &&
167 (isdigit(pieces[0])) &&
168 (pieces[4] == ':') &&
169 (pieces[7] == ':') &&
170 (pieces[10] == '-') &&
171 (pieces[13] == ':') &&
172 (pieces[16] == ':') && (lf->log += 20)
173 )
174 ) {
175 /* Check for an extra space in here */
176 if (*lf->log == ' ') {
177 lf->log++;
178 }
179
180
181 /* Hostname */
182 pieces = lf->hostname = lf->log;
183
184
185 /* Check for a valid hostname */
186 while (isValidChar(*pieces) == 1) {
187 pieces++;
188 }
189
190 /* Check if it is a syslog without hostname (common on Solaris) */
191 if (*pieces == ':' && pieces[1] == ' ') {
192 /* Getting solaris 8/9 messages without hostname.
193 * In these cases, the process_name should be there.
194 * http://www.ossec.net/wiki/index.php/Log_Samples_Solaris
195 */
196 lf->program_name = lf->hostname;
197 lf->hostname = NULL;
198
199 /* End the program name string */
200 *pieces = '\0';
201
202 pieces += 2;
203 lf->log = pieces;
204 }
205
206 /* Extract the hostname */
207 else if (*pieces != ' ') {
208 /* Invalid hostname */
209 lf->hostname = NULL;
210 pieces = NULL;
211 } else {
212 /* End the hostname string */
213 *pieces = '\0';
214
215 /* Move pieces to the beginning of the log message */
216 pieces++;
217 lf->log = pieces;
218
219 /* Get program_name */
220 lf->program_name = pieces;
221
222 /* Extract program_name */
223 /* Valid names:
224 * p_name:
225 * p_name[pid]:
226 * p_name[pid]: [ID xx facility.severity]
227 * auth|security:info p_name:
228 */
229 while (isValidChar(*pieces) == 1) {
230 pieces++;
231 }
232
233 /* Check for the first format: p_name: */
234 if ((*pieces == ':') && (pieces[1] == ' ')) {
235 *pieces = '\0';
236 pieces += 2;
237 }
238
239 /* Check for the second format: p_name[pid]: */
240 else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
241 *pieces = '\0';
242 pieces += 2;
243 while (isdigit((int)*pieces)) {
244 pieces++;
245 }
246
247 if ((*pieces == ']') && (pieces[1] == ':') && (pieces[2] == ' ')) {
248 pieces += 3;
249 }
250 /* Some systems are not terminating the program name with
251 * a ':'. Working around this in here...
252 */
253 else if ((*pieces == ']') && (pieces[1] == ' ')) {
254 pieces += 2;
255 } else {
256 /* Fix for some weird log formats */
257 pieces--;
258 while (isdigit((int)*pieces)) {
259 pieces--;
260 }
261
262 if (*pieces == '\0') {
263 *pieces = '[';
264 }
265 pieces = NULL;
266 lf->program_name = NULL;
267 }
268 }
269 /* AIX syslog */
270 else if ((*pieces == '|') && islower((int)pieces[1])) {
271 pieces += 2;
272
273 /* Remove facility */
274 while (isalnum((int)*pieces)) {
275 pieces++;
276 }
277
278 if (*pieces == ':') {
279 /* Remove severity */
280 pieces++;
281 while (isalnum((int)*pieces)) {
282 pieces++;
283 }
284
285 if (*pieces == ' ') {
286 pieces++;
287 lf->program_name = pieces;
288
289
290 /* Get program name again */
291 while (isValidChar(*pieces) == 1) {
292 pieces++;
293 }
294
295 /* Check for the first format: p_name: */
296 if ((*pieces == ':') && (pieces[1] == ' ')) {
297 *pieces = '\0';
298 pieces += 2;
299 }
300
301 /* Check for the second format: p_name[pid]: */
302 else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
303 *pieces = '\0';
304 pieces += 2;
305 while (isdigit((int)*pieces)) {
306 pieces++;
307 }
308
309 if ((*pieces == ']') && (pieces[1] == ':') &&
310 (pieces[2] == ' ')) {
311 pieces += 3;
312 } else {
313 pieces = NULL;
314 }
315 }
316 } else {
317 pieces = NULL;
318 lf->program_name = NULL;
319 }
320 }
321 /* Invalid AIX */
322 else {
323 pieces = NULL;
324 lf->program_name = NULL;
325 }
326 } else {
327 pieces = NULL;
328 lf->program_name = NULL;
329 }
330 }
331
332 /* Remove [ID xx facility.severity] */
333 if (pieces) {
334 /* Set log after program name */
335 lf->log = pieces;
336
337 if ((pieces[0] == '[') &&
338 (pieces[1] == 'I') &&
339 (pieces[2] == 'D') &&
340 (pieces[3] == ' ')) {
341 pieces += 4;
342
343 /* Going after the "] " */
344 pieces = strstr(pieces, "] ");
345 if (pieces) {
346 pieces += 2;
347 lf->log = pieces;
348 }
349 }
350 }
351
352 /* Get program name size */
353 if (lf->program_name) {
354 lf->p_name_size = strlen(lf->program_name);
355 }
356 }
357
358 /* xferlog date format
359 * Mon Apr 17 18:27:14 2006 1 64.160.42.130
360 */
361 else if ((loglen > 28) &&
362 (pieces[3] == ' ') &&
363 (pieces[7] == ' ') &&
364 (pieces[10] == ' ') &&
365 (pieces[13] == ':') &&
366 (pieces[16] == ':') &&
367 (pieces[19] == ' ') &&
368 (pieces[24] == ' ') &&
369 (pieces[26] == ' ')) {
370 /* Move log to the beginning of the message */
371 lf->log += 24;
372 }
373
374 /* Check for snort date format
375 * ex: 01/28-09:13:16.240702 [**]
376 */
377 else if ( (loglen > 24) &&
378 (pieces[2] == '/') &&
379 (pieces[5] == '-') &&
380 (pieces[8] == ':') &&
381 (pieces[11] == ':') &&
382 (pieces[14] == '.') &&
383 (pieces[21] == ' ') ) {
384 lf->log += 23;
385 }
386
387 /* Check for suricata (new) date format
388 * ex: 01/28/1979-09:13:16.240702 [**]
389 */
390 else if ( (loglen > 26) &&
391 (pieces[2] == '/') &&
392 (pieces[5] == '/') &&
393 (pieces[10] == '-') &&
394 (pieces[13] == ':') &&
395 (pieces[16] == ':') &&
396 (pieces[19] == '.') &&
397 (pieces[26] == ' ') ) {
398 lf->log += 28;
399 }
400
401
402 /* Check for apache log format */
403 /* [Fri Feb 11 18:06:35 2004] [warn] */
404 else if ( (loglen > 27) &&
405 (pieces[0] == '[') &&
406 (pieces[4] == ' ') &&
407 (pieces[8] == ' ') &&
408 (pieces[11] == ' ') &&
409 (pieces[14] == ':') &&
410 (pieces[17] == ':') &&
411 (pieces[20] == ' ') &&
412 (pieces[25] == ']') ) {
413 lf->log += 27;
414 }
415
416 /* Check for the osx asl log format.
417 * Examples:
418 * [Time 2006.12.28 15:53:55 UTC] [Facility auth] [Sender sshd] [PID 483] [Message error: PAM: Authentication failure for username from 192.168.0.2] [Level 3] [UID -2] [GID -2] [Host Hostname]
419 * [Time 2006.11.02 14:02:11 UTC] [Facility auth] [Sender sshd] [PID 856]
420 [Message refused connect from 59.124.44.34] [Level 4] [UID -2] [GID -2]
421 [Host robert-wyatts-emac]
422 */
423 else if ((loglen > 26) &&
424 (pieces[0] == '[') &&
425 (pieces[1] == 'T') &&
426 (pieces[5] == ' ') &&
427 (pieces[10] == '.') &&
428 (pieces[13] == '.') &&
429 (pieces[16] == ' ') &&
430 (pieces[19] == ':')) {
431 /* Do not read more than 1 message entry -> log tampering */
432 short unsigned int done_message = 0;
433
434 /* Remove the date */
435 lf->log += 25;
436
437 /* Get the desired values */
438 pieces = strchr(lf->log, '[');
439 while (pieces) {
440 pieces++;
441
442 /* Get the sender (set to program name) */
443 if ((strncmp(pieces, "Sender ", 7) == 0) &&
444 (lf->program_name == NULL)) {
445 pieces += 7;
446 lf->program_name = pieces;
447
448 /* Get the closing brackets */
449 pieces = strchr(pieces, ']');
450 if (pieces) {
451 *pieces = '\0';
452
453 /* Set program_name size */
454 lf->p_name_size = strlen(lf->program_name);
455
456 pieces++;
457 }
458 /* Invalid program name */
459 else {
460 lf->program_name = NULL;
461 break;
462 }
463 }
464
465 /* Get message */
466 else if ((strncmp(pieces, "Message ", 8) == 0) &&
467 (done_message == 0)) {
468 pieces += 8;
469 done_message = 1;
470
471 lf->log = pieces;
472
473 /* Get the closing brackets */
474 pieces = strchr(pieces, ']');
475 if (pieces) {
476 *pieces = '\0';
477 pieces++;
478 }
479 /* Invalid log closure */
480 else {
481 break;
482 }
483 }
484
485 /* Get hostname */
486 else if (strncmp(pieces, "Host ", 5) == 0) {
487 pieces += 5;
488 lf->hostname = pieces;
489
490 /* Get the closing brackets */
491 pieces = strchr(pieces, ']');
492 if (pieces) {
493 *pieces = '\0';
494 pieces++;
495 }
496
497 /* Invalid hostname */
498 else {
499 lf->hostname = NULL;
500 }
501 break;
502 }
503
504 /* Get next entry */
505 pieces = strchr(pieces, '[');
506 }
507 }
508
509 /* Check for squid date format
510 * 1140804070.368 11623
511 * seconds from 00:00:00 1970-01-01 UTC
512 */
513 else if ((loglen > 32) &&
514 (pieces[0] == '1') &&
515 (isdigit((int)pieces[1])) &&
516 (isdigit((int)pieces[2])) &&
517 (isdigit((int)pieces[3])) &&
518 (pieces[10] == '.') &&
519 (isdigit((int)pieces[13])) &&
520 (pieces[14] == ' ') &&
521 ((pieces[21] == ' ') || (pieces[22] == ' '))) {
522 lf->log += 14;
523
524 /* We need to start at the size of the event */
525 while (*lf->log == ' ') {
526 lf->log++;
527 }
528 }
529
530 /* Every message must be in the format
531 * hostname->location or
532 * (agent) ip->location.
533 */
534
535 /* Set hostname for local messages */
536 if (lf->location[0] == '(') {
537 /* Messages from an agent */
538 lf->hostname = lf->location;
539 } else if (lf->hostname == NULL) {
540 lf->hostname = __shost;
541 }
542
543 /* Set up the event data */
544 lf->time = c_time;
545 p = localtime(&c_time);
546
547 /* Assign hour, day, year and month values */
548 lf->day = p->tm_mday;
549 lf->year = p->tm_year + 1900;
550 strncpy(lf->mon, month[p->tm_mon], 3);
551 snprintf(lf->hour, 9, "%02d:%02d:%02d",
552 p->tm_hour,
553 p->tm_min,
554 p->tm_sec);
555
556 /* Set the global hour/weekday */
557 __crt_hour = p->tm_hour;
558 __crt_wday = p->tm_wday;
559
560 #ifdef TESTRULE
561 if (!alert_only) {
562 print_out("**Phase 1: Completed pre-decoding.");
563 print_out(" full event: '%s'", lf->full_log);
564 print_out(" hostname: '%s'", lf->hostname);
565 print_out(" program_name: '%s'", lf->program_name);
566 print_out(" log: '%s'", lf->log);
567 }
568 #endif
569 return (0);
570 }
571
572