1 /*
2 * Copyright (c) 2009-2020, Peter Haag
3 * Copyright (c) 2004-2008, SWITCH - Teleinformatikdienste fuer Lehre und Forschung
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 * * Neither the name of the author nor the names of its contributors may be
15 * used to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 */
31
32 #include "config.h"
33
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37 #include <time.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <dirent.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <sys/param.h>
45 #include <fcntl.h>
46 #include <ctype.h>
47 #include <netinet/in.h>
48
49 #ifdef HAVE_STDINT_H
50 # include <stdint.h>
51 #endif
52
53 #ifdef HAVE_FTS_H
54 # include <fts.h>
55 #else
56 # include "fts_compat.h"
57 #define fts_children fts_children_compat
58 #define fts_close fts_close_compat
59 #define fts_open fts_open_compat
60 #define fts_read fts_read_compat
61 #define fts_set fts_set_compat
62 #endif
63
64 #include "util.h"
65 #include "nfdump.h"
66 #include "nffile.h"
67 #include "flist.h"
68
69 /*
70 * Select a single file
71 * --------------------
72 * -r [/]path/to/single_file
73 * Select a single file: absolute or relativ path to a single file.
74 * Recursive: no
75 *
76 * Selecting a range of files
77 * --------------------------
78 * -R [/]path/to/first_file
79 * Select a range of files in directory specified by absolut or relative path [/]path/to/
80 * Files are selected in alphabetical order starting with 'first_file' to the end of
81 * the directory.
82 *
83 * -R [/]path/to/first_file:last_file
84 * Select a range of files in directory specified by absolut or relative path [/]path/to/
85 * Files are selected in alphabetical order starting with 'first_file' and ending with
86 * 'last_file'.
87 *
88 * -R [/]path/to/directory
89 * Select all files in alphabetical order in directory specified by absolut or relative
90 * path [/]path/to/directory
91 *
92 * Selecting files over multiple sources
93 * -------------------------------------
94 * -M /path/to/multiple/source1:source2[:..:sourceN]
95 * It is assumed, that each source directory 'source1', 'source2' etc. exists in directory
96 * /path/to/multiple. This will expand to multiple directories:
97 * /path/to/multiple/source1
98 * /path/to/multiple/source2
99 * ..
100 * /path/to/multiple/sourceN
101 * Each of these directories contain the same files.
102 * Used in combination with -r and -R to prepend file selections.
103 *
104 * Select a single file from multiple directories
105 * ----------------------------------------------
106 * -M /path/to/source1:source2 -r single_file
107 * Select the same file 'single_file' from each source directory: e.g.
108 * /path/to/source1/single_file
109 * /path/to/source2/single_file
110 *
111 *
112 * Select a range of files from multiple directories
113 * -------------------------------------------------
114 * -M /path/to/source1:source2[:..] -R first_file
115 * For each expanded directory specified by -M /path/to/source1:source2
116 * select a range of files as described above. Would be identical to
117 * -R /path/to/source1/first_file -R /path/to/source2/first_file
118 *
119 * -M /path/to/source1:source2[:..] -R first_file:last_file
120 * For each expanded directory specified by -M /path/to/source1:source2
121 * select a range of files as described above. Would be identical to
122 * -R /path/to/source1/first_file:last_file -R /path/to/source2/first_file:last_file [-R .. ]
123 *
124 * -M /path/to/source1:source2[:..] -R .
125 * For each expanded directory specified by -M /path/to/source1:source2
126 * select all files of the directory as described above. Would be to
127 * -R /path/to/source1 -R /path/to/source2 [-R ...]
128 *
129 *
130 * Hierarchical file organinisation:
131 * For performance reasons, files may be store in various sub directries instead of a
132 * single directory. These sub directories are assumed to be created in alpabetical order.
133 * For example daily sub directories: 2006/04/01 .. 2006/04/30 as created by nfcapd with
134 * option -S %y/%m/%d
135 *
136 * Single file selection is identical to the flat file layout:
137 * -r [/]path/to/sub1/sub2/sub3/single_file
138 *
139 * Selecting a range of files in a hierarchical file layout
140 * --------------------------------------------------------
141 * -R [/]path/to/sub1/sub2/first_file
142 * Select a range of files in directory specified by absolut or relative path
143 * [/]path/to/sub1/sub2/. Files are selected in alphabetical order starting with
144 * 'first_file' to the end of the directory. The hierarchy has no impact here.
145 *
146 * -R [/]path/to/first_sub1/first_sub2/first_file:last_sub1/last_sub2/last_file
147 * Select a range of files over multiple sub directories starting at absolut or
148 * relative path [/]path/to/first_sub1/first_sub2/first_file up to and including
149 * [/]path/to/last_sub1/last_sub2/last_file. Files are selected in alphabetical
150 * order by iterating over the required sub directory hierachy
151 * Example:
152 * -R /path/to/2006/03/31/nfcapd.200603312300:2006/04/01/nfcapd.200604010600
153 *
154 * -R [/]path/to/directory
155 * Select all files in alphabetical order in directory specified by absolut or relative
156 * path [/]path/to/directory, identical to flat layout
157 *
158 * The same methode applies for selecting a range of files over multiple sub directories
159 * and multiple sources.
160 *
161 * Example:
162 * -M /path/to/source1:source2 -R 2006/03/31/nfcapd.200603312300:2006/04/01/nfcapd.200604010600
163 *
164 */
165
166 /*
167 * syntax for possible sub dir definitions:
168 *
169 * %Y is replaced by the year with century as a decimal number.
170 * %y is replaced by the year without century as a decimal number (00-99).
171 * %m is replaced by the month as a decimal number (01-12).
172 * %d is replaced by the day of the month as a decimal number (01-31).
173 * %j is replaced by the day of the year as a decimal number (001-366).
174 * %H is replaced by the hour (24-hour clock) as a decimal number (00-23).
175 * %M is replaced by the minute as a decimal number (00-59).
176 * %s is replaced by the number of seconds since the Epoch, UTC
177 * %U is replaced by the week number of the year (Sunday as the first day
178 * of the week) as a decimal number (00-53).
179 * %W is replaced by the week number of the year (Monday as the first day
180 * of the week) as a decimal number (00-53).
181 * %w is replaced by the weekday (Sunday as the first day of the week) as
182 * a decimal number (0-6).
183 * %u is replaced by the weekday (Monday as the first day of the week) as
184 * a decimal number (1-7).
185 * %F is equivalent to ``%Y-%m-%d''.
186 */
187
188 // predefined and accpeted formats
189 static const char *subdir_def[] = {
190 "", // default index 0 - no subdir hierarchy
191 "%Y/%m/%d",
192 "%Y/%m/%d/%H",
193 "%Y/%W/%u",
194 "%Y/%W/%u/%H",
195 "%Y/%j",
196 "%Y/%j/%H",
197 "%F",
198 "%F/%H",
199 NULL
200 };
201
202
203 // all accpeted char in a string
204 #define AcceptedFormatChar "YymdjHMsUWwuF"
205
206 static mode_t mode, dir_mode;
207 static const char *subdir_format;
208
209 static struct entry_filter_s {
210 char *first_entry;
211 char *last_entry;
212 int list_files;
213 } *dir_entry_filter;
214
215 #define NUM_PTR 16
216
217 // globals
218 extern uint32_t twin_first, twin_last;
219
220 static char *first_file, *last_file;
221 static char *current_file = NULL;
222 static stringlist_t source_dirs, file_list;
223
224 /* Function prototypes */
225 static inline int CheckTimeWindow(uint32_t t_start, uint32_t t_end, stat_record_t *stat_record);
226
227 static void GetFileList(char *path);
228
229 static void CleanPath(char *entry);
230
231 static void Getsource_dirs(char *dirs);
232
233 static int mkpath(char *path, char *p, mode_t mode, mode_t dir_mode, char *error, size_t errlen);
234
235 static char *GuessSubDir(char *channeldir, char *filename);
236
237 static char *VerifyFileRange(char *path, char *last_file);
238
239 /* Functions */
240
compare(const FTSENT ** f1,const FTSENT ** f2)241 static int compare(const FTSENT **f1, const FTSENT **f2) {
242 return strcmp( (*f1)->fts_name, (*f2)->fts_name);
243 } // End of compare
244
CleanPath(char * entry)245 static void CleanPath(char *entry) {
246 char *p, *q;
247 size_t len;
248
249 // wash out any '//' in entry
250 while ( (p = strstr(entry, "//")) != NULL ) {
251 p++;
252 q = p+1; // q points to first char after '//'
253 while ( *p )
254 *p++ = *q++;
255 }
256
257 // remove trailing '/'
258 len = strlen(entry);
259 if ( entry[len-1] == '/' )
260 entry[len-1] = '\0';
261
262 // wash out any '/./' in entry
263 while ( (p = strstr(entry, "/./")) != NULL ) {
264 p++;
265 q = p+2; // q points to first char after '/./'
266 while ( *p )
267 *p++ = *q++;
268 }
269
270 // remove leading './' in entry
271 if ( strstr(entry, "./") == entry ) {
272 p = entry;
273 q = p + 2;
274 while ( *p )
275 *p++ = *q++;
276 }
277
278 } // End of CleanPath
279
CheckTimeWindow(uint32_t t_start,uint32_t t_end,stat_record_t * stat_record)280 static inline int CheckTimeWindow(uint32_t t_start, uint32_t t_end, stat_record_t *stat_record) {
281
282 /*
283 printf("t start %u %s", t_start, ctime(&t_start));
284 printf("t end %u %s", t_end, ctime(&t_end));
285 printf("f start %u %s", NetflowStat.first_seen, ctime(&NetflowStat.first_seen));
286 printf("f end %u %s", NetflowStat.last_seen, ctime(&NetflowStat.last_seen));
287 */
288
289 // if no time window is set, return true
290 if ( t_start == 0 )
291 return 1;
292
293 if ( stat_record->first_seen == 0 )
294 return 0;
295
296 if ( t_start >= stat_record->first_seen && t_start <= stat_record->last_seen )
297 return 1;
298
299 if ( t_end >= stat_record->first_seen && t_end <= stat_record->last_seen )
300 return 1;
301
302 if ( t_start < stat_record->first_seen && t_end > stat_record->last_seen )
303 return 1;
304
305 return 0;
306
307 } // End of CheckTimeWindow
308
309 // file filter for scandir function
310
dirlevels(char * dir)311 static int dirlevels(char *dir) {
312 int num;
313
314 if ( !dir )
315 return 0;
316
317 num = 0;
318 if ( dir[0] == '/' )
319 dir++;
320
321 while ( *dir ) {
322 if ( *dir == '/' )
323 num++;
324 dir++;
325 }
326
327 return num;
328
329 } // End of dirlevels
330
CreateDirListFilter(char * first_path,char * last_path,int file_list_level)331 static void CreateDirListFilter(char *first_path, char *last_path, int file_list_level) {
332 int i;
333 char *p, *q, *first_mark, *last_mark;
334
335 // printf("First Dir: '%s', first_path: '%s', last_path '%s', first_file '%s', last_file '%s', list_level: %i\n",
336 // source_dirs.list[0], first_path, last_path, first_file, last_file, file_list_level);
337
338 if ( file_list_level == 0 )
339 return;
340
341 if ( file_list_level < 0 ) {
342 fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
343 exit(250);
344 }
345
346 dir_entry_filter = (struct entry_filter_s *)malloc((file_list_level+1) * sizeof(struct entry_filter_s));
347 if ( !dir_entry_filter ) {
348 fprintf(stderr, "malloc() error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
349 exit(250);
350 }
351
352 // first default entry - the directory itself
353 dir_entry_filter[0].first_entry = NULL;
354 dir_entry_filter[0].last_entry = NULL;
355 dir_entry_filter[0].list_files = 0;
356
357 first_mark = first_path;
358 last_mark = last_path;
359 // intermediate directory level filters
360 for ( i=1; i<file_list_level; i++ ) {
361 if ( first_mark ) {
362 p = strchr(first_mark, '/');
363 if ( p ) {
364 *p = '\0';
365 dir_entry_filter[i].first_entry = strdup(first_path);
366 *p++ = '/';
367 first_mark = p;
368 } else {
369 dir_entry_filter[i].first_entry = strdup(first_path);
370 first_mark = NULL;
371 }
372 } else {
373 dir_entry_filter[i].first_entry = NULL;
374 }
375 dir_entry_filter[i].list_files = 0;
376
377 if ( last_mark ) {
378 q = strchr(last_mark, '/');
379 if ( q ) {
380 *q = '\0';
381 dir_entry_filter[i].last_entry = strdup(last_path);
382 *q++ = '/';
383 last_mark = q;
384 } else {
385 dir_entry_filter[i].last_entry = strdup(last_path);
386 last_mark = NULL;
387 }
388 } else {
389 dir_entry_filter[i].last_entry = NULL;
390 }
391 if ( dir_entry_filter[i].first_entry && dir_entry_filter[i].last_entry &&
392 strcmp(dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry) > 0 )
393 fprintf(stderr, "WARNING: Entry '%s' > '%s'. Will not match anything!\n",
394 dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry);
395
396 // printf("%i first: '%s', last: '%s'\n",
397 // i, dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry);
398 }
399
400 // the last level - files are listed here
401 dir_entry_filter[file_list_level].first_entry = first_file;
402 dir_entry_filter[file_list_level].last_entry = last_file;
403 dir_entry_filter[file_list_level].list_files = 1;
404
405 if ( dir_entry_filter[file_list_level].first_entry && dir_entry_filter[file_list_level].last_entry &&
406 strcmp(dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry) > 0 )
407 fprintf(stderr, "WARNING: File '%s' > '%s'. Will not match anything!\n",
408 dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry);
409
410 // printf("%i first: '%s', last: '%s'\n",
411 // file_list_level, dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry);
412
413 } // End of CreateDirListFilter
414
GetFileList(char * path)415 static void GetFileList(char *path) {
416 struct stat stat_buf;
417 char *last_file_ptr, *first_path, *last_path;
418 int levels_first_file, levels_last_file, file_list_level;
419 int sub_index;
420
421 FTS *fts;
422 FTSENT *ftsent;
423
424 CleanPath(path);
425
426 // Check for last_file option
427 last_file_ptr = strchr(path, ':');
428 first_path = last_path = NULL;
429 levels_first_file = levels_last_file = 0;
430 if ( last_file_ptr ) {
431 // make sure we have only a single ':' in path
432 if ( strrchr(path, ':') != last_file_ptr ) {
433 fprintf(stderr, "Multiple file separators ':' in path not allowed!\n");
434 exit(250);
435 }
436 *last_file_ptr++ = '\0';
437 // last_file_ptr points to last_file
438
439 if ( strlen(last_file_ptr) == 0 ) {
440 fprintf(stderr, "Missing last file option after ':'!\n");
441 exit(250);
442 }
443
444 CleanPath(last_file_ptr);
445 // make sure last_file option is not a full path
446 if ( last_file_ptr[0] == '/') {
447 fprintf(stderr, "Last file name in -R list must not start with '/'\n");
448 exit(250);
449 }
450 // how may sub dir levels has last_file option?
451 levels_last_file = dirlevels(last_file_ptr);
452
453 // if no subdirs are given for last_file, try to find out, if the last_file
454 // exists in any possible subdirs
455 if ( levels_last_file == 0 ) {
456 char s[MAXPATHLEN];
457 char *r = VerifyFileRange(path, last_file_ptr);
458
459 if ( r != last_file_ptr && r[0] != '\0' ) {
460 snprintf(s, MAXPATHLEN-1, "%s/%s", r, last_file_ptr);
461 s[MAXPATHLEN-1] = '\0';
462 last_file_ptr = strdup(s);
463 levels_last_file = dirlevels(last_file_ptr);
464 }
465 }
466
467 }
468
469 levels_first_file = dirlevels(path);
470
471 if ( source_dirs.num_strings == 0 ) {
472 // No multiple sources option -M
473
474 // path contains the path to a file/directory
475 // stat this entry
476 if ( stat(path, &stat_buf) ) {
477 fprintf(stderr, "stat() error '%s': %s\n", path, strerror(errno));
478 exit(250);
479 }
480 if ( !S_ISDIR(stat_buf.st_mode) && !S_ISREG(stat_buf.st_mode) ) {
481 fprintf(stderr, "Not a file or directory: '%s'\n", path);
482 exit(250);
483 }
484
485 // Check, how many levels of directory in path
486 levels_first_file = dirlevels(path);
487
488 if ( last_file_ptr ) {
489 // path is [/]path/to/any/dir|file:last_file_ptr
490
491 // make sure first_file is a file
492 if ( S_ISDIR(stat_buf.st_mode) ) {
493 fprintf(stderr, "Not a file: '%s'\n", path);
494 exit(250);
495 }
496
497 if ( levels_last_file ) {
498 // we have levels_last_file number of sub dirs
499
500 // sub dir levels of first_file mus have at least the same number of levels as last_file
501 if ( levels_first_file < levels_last_file ) {
502 fprintf(stderr, "Number of sub dirs for sub level hierarchy for file list -R do not match\n");
503 exit(250);
504 }
505 if ( levels_first_file == levels_last_file ) {
506 char *p, *q;
507 // path = [/]sub1[/..]/first_file:sub1[/...]/last_file
508 if ( path[0] == '/' ) {
509 // this is rather strange, but strctly spoken, valid anyway
510 InsertString(&source_dirs, "/");
511 path++;
512 } else {
513 InsertString(&source_dirs, ".");
514 }
515
516 // path = sub_first[/..]/first_file:sub_last[/...]/last_file
517 p = strrchr(path, '/');
518 q = strrchr(last_file_ptr, '/');
519 if ( !p || !q ) {
520 // this should never happen
521 fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
522 exit(250);
523 }
524 *p++ = '\0';
525 *q++ = '\0';
526 first_file = strdup(p);
527 last_file = strdup(q);
528 file_list_level = levels_last_file + 1;
529 first_path = path;
530 last_path = last_file_ptr;
531
532 } else {
533 // path = [/]path/to/sub_first[/..]/first_file:sub_last[/...]/last_file
534 int i;
535 char *p, *r, *s;
536
537 p = strrchr(path, '/');
538 // levels_first_file > levels_last_file
539
540 // step back the number of sub dirs in first_file
541 for ( i=0; i<levels_last_file; i++ ) {
542 do {
543 p--;
544 } while ( p >= path && *p != '/');
545 }
546 *p++ = '\0';
547
548 InsertString(&source_dirs, path);
549
550 r = strrchr(p, '/');
551 s = strrchr(last_file_ptr, '/');
552 if ( !r || !s ) {
553 // this must never happen
554 fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
555 exit(250);
556 }
557 *r++ = '\0';
558 *s++ = '\0';
559 first_file = strdup(r);
560 last_file = strdup(s);
561 // files are listed at this sub dir level
562 file_list_level = levels_last_file + 1;
563 first_path = p;
564 last_path = last_file_ptr;
565
566 }
567
568 } else {
569 // we have no sub dir levels given
570
571 // path is [/]path/to/any/file
572 char *p = strrchr(path, '/');
573
574 if ( p ) {
575 // path is [/]path/to/any/first_file:last_file
576 *p++ = '\0';
577 // path is the direcory containing all the files
578 InsertString(&source_dirs, path);
579 first_file = strdup(p);
580 } else {
581 // path is first_file:last_file
582 InsertString(&source_dirs, ".");
583 first_file = strdup(path);
584 }
585 // set last_file filter
586 last_file = strdup(last_file_ptr);
587 // in any case we list the files of directory level 1
588 file_list_level = 1;
589 }
590 } else {
591 // path is [/]path/to/any/dir|file
592 if ( S_ISDIR(stat_buf.st_mode) ) {
593 // path is [/]path/to/any/dir
594 // list all files in this directory
595 InsertString(&source_dirs, path);
596 first_file = NULL;
597 file_list_level = 0;
598 } else {
599 // path is [/]path/to/any/file
600 char *p = strrchr(path, '/');
601 if ( p ) {
602 // path is [/]path/to/any/file
603 *p++ = '\0';
604 // path is the direcory containing all the files
605 InsertString(&source_dirs, path);
606 first_file = strdup(p);
607 } else {
608 // path is file
609 InsertString(&source_dirs, ".");
610 first_file = strdup(path);
611 }
612 // in any case we list the files of directory level 1
613 file_list_level = 1;
614 }
615 // in any case, no last_file filter
616 last_file = NULL;
617 }
618
619 } else {
620 char pathbuff[MAXPATHLEN];
621 // multiple sources option -M given
622 if ( path[0] == '/') {
623 fprintf(stderr, "File list -R must not start with '/' when combined with a source list -M\n");
624 exit(250);
625 }
626
627 // special case for all files in directory
628 if ( strcmp(path, ".") == 0 ) {
629 first_file = NULL;
630 last_file = NULL;
631 file_list_level = 0;
632 } else {
633 // pathbuff contains the path to a file/directory, compiled using the first entry
634 // in the source_dirs
635 snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", source_dirs.list[0], path);
636 pathbuff[MAXPATHLEN-1] = '\0';
637
638 // pathbuff must point to a file
639 if ( stat(pathbuff, &stat_buf) ) {
640 if ( errno == ENOENT ) {
641 // file not found - try to guess a possible subdir
642 char *sub_dir = GuessSubDir(source_dirs.list[0], path);
643 if ( sub_dir ) { // subdir found
644 snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", sub_dir, path);
645 pathbuff[MAXPATHLEN-1] = '\0';
646 // update path
647 path = strdup(pathbuff);
648 free(sub_dir);
649
650 // need guessing subdir with last_file too
651 if ( last_file_ptr ) {
652 sub_dir = GuessSubDir(source_dirs.list[0], last_file_ptr);
653 if ( sub_dir ) { // subdir found
654 snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", sub_dir, last_file_ptr);
655 pathbuff[MAXPATHLEN-1] = '\0';
656 last_file_ptr = strdup(pathbuff);
657 free(sub_dir);
658
659 // update dir levels of extended file path
660 levels_last_file = dirlevels(last_file_ptr);
661 } else {
662 fprintf(stderr, "'%s': %s\n", last_file_ptr, "File not found!");
663 exit(250);
664 }
665 }
666
667 } else { // no file in any possible subdir found
668 fprintf(stderr, "stat() error '%s': %s\n", pathbuff, "File not found!");
669 exit(250);
670 }
671 } else { // Any other stat error
672 fprintf(stderr, "stat() error '%s': %s\n", pathbuff, strerror(errno));
673 exit(250);
674 }
675 } else if ( !S_ISREG(stat_buf.st_mode) ) {
676 fprintf(stderr, "Not a file : '%s'\n", pathbuff);
677 exit(250);
678 }
679
680 // Check, how many levels of directory in path
681 levels_first_file = dirlevels(path);
682
683 if ( last_file_ptr ) {
684 // path is path/to/any/first_file:last_file_ptr
685 char *p, *q;
686
687 // the number of sub dirs must be eqal for first_file and last_file
688 if ( levels_first_file != levels_last_file ) {
689 fprintf(stderr, "Number of sub dirs must agree in '%s' and '%s'\n", path, last_file_ptr);
690 exit(250);
691 }
692
693 p = strrchr(path, '/');
694 if ( p ) {
695 // path is fist_sub/to/any/first_file
696 // recursive all files in sub dirs
697 file_list_level = dirlevels(path) + 1;
698 *p++ = '\0';
699 first_file = strdup(p);
700 first_path = path;
701 } else {
702 // path is first_file
703 first_file = strdup(path);
704 file_list_level = 1;
705 }
706
707 q = strrchr(last_file_ptr, '/');
708 if ( q ) {
709 *q++ = '\0';
710 last_file = strdup(q);
711 last_path = last_file_ptr;
712 } else {
713 last_file = strdup(last_file_ptr);
714 }
715
716 } else {
717 // path is path/to/any/first_file
718 char *p = strrchr(path, '/');
719 if ( p ) {
720 // path is fist_sub/to/any/first_file
721 // recursive all files in sub dirs
722 file_list_level = dirlevels(path) + 1;
723 *p++ = '\0';
724 first_file = strdup(p);
725 first_path = path;
726 } else {
727 // path is first_file
728 first_file = strdup(path);
729 file_list_level = 1;
730 }
731 last_file = NULL;
732 }
733 }
734 }
735
736 /*
737 printf("first_file %s\n", first_file ? first_file : "<none>");
738 printf("last_file %s\n", last_file ? last_file : "<none>");
739 printf("first_path %s\n", first_path ? first_path : "<none>");
740 printf("last_path %s\n", last_path ? last_path : "<none>");
741 printf("file_list_level: %i\n", file_list_level);
742 */
743 CreateDirListFilter(first_path, last_path, file_list_level );
744
745 // last entry must be NULL
746 InsertString(&source_dirs, NULL);
747 fts = fts_open(source_dirs.list, FTS_LOGICAL, compare);
748 sub_index = 0;
749 while ( (ftsent = fts_read(fts)) != NULL) {
750 int fts_level = ftsent->fts_level;
751 char *fts_path;
752
753 // printf("DBG: %u %i %s %s\n", ftsent->fts_info, ftsent->fts_level, ftsent->fts_path, ftsent->fts_name);
754
755 if ( fts_level == 0 ) {
756 sub_index = ftsent->fts_pathlen + 1;
757 continue;
758 }
759
760 if ( ftsent->fts_pathlen < sub_index ) {
761 LogError("ERROR: fts_pathlen error at %s line %d\n", __FILE__, __LINE__);
762 exit(250);
763 }
764 fts_path = &ftsent->fts_path[sub_index];
765
766 /*
767 if ( file_list_level )
768 printf("DGB: short fts: '%s', filer_first: '%s', filter_last: '%s'\n",
769 fts_path, dir_entry_filter[fts_level].first_entry , dir_entry_filter[fts_level].last_entry);
770 */
771 switch (ftsent->fts_info) {
772 case FTS_D:
773 // dir entry pre descend
774 if ( file_list_level && file_list_level && (
775 ( dir_entry_filter[fts_level].first_entry &&
776 ( strcmp(fts_path, dir_entry_filter[fts_level].first_entry ) < 0 ) ) ||
777 ( dir_entry_filter[fts_level].last_entry &&
778 ( strcmp(fts_path, dir_entry_filter[fts_level].last_entry ) > 0 ) )
779 ))
780 fts_set(fts, ftsent, FTS_SKIP );
781
782 break;
783 case FTS_DP:
784 break;
785 case FTS_F:
786 // file entry
787 // printf("==> Check: %s\n", ftsent->fts_name);
788
789 // skip stat file
790 if ( strcmp(ftsent->fts_name, ".nfstat") == 0 ||
791 strncmp(ftsent->fts_name, NF_DUMPFILE , strlen(NF_DUMPFILE)) == 0)
792 continue;
793 if ( strstr(ftsent->fts_name, ".stat") != NULL )
794 continue;
795 // skip OSX DS_Store files
796 if ( strstr(ftsent->fts_name, ".DS_Store") != NULL )
797 continue;
798 // skip pcap file
799 if ( strstr(ftsent->fts_name, "pcap") != NULL )
800 continue;
801
802 if ( file_list_level && (
803 ( fts_level != file_list_level ) ||
804 ( dir_entry_filter[fts_level].first_entry &&
805 ( strcmp(ftsent->fts_name, dir_entry_filter[fts_level].first_entry) < 0 ) ) ||
806 ( dir_entry_filter[fts_level].last_entry &&
807 ( strcmp(ftsent->fts_name, dir_entry_filter[fts_level].last_entry) > 0 ) )
808 ) )
809 continue;
810
811 // printf("==> Listed: %s\n", ftsent->fts_path);
812 InsertString(&file_list, ftsent->fts_path);
813
814 break;
815 }
816
817 }
818 fts_close(fts);
819
820 } // End of GetFileList
821
822 /*
823 * Get the list of directories
824 * dirs: user supplied parameter: /any/path/dir1:dir2:dir3:...
825 * source_dirs must result in
826 * /any/path/dir1
827 * /any/path/dir2
828 * /any/path/dir3
829 * /any/path is dir prefix, which may be NULL e.g. dir1:dir2:dir3:...
830 * dir1, dir2 etc entrys
831 */
Getsource_dirs(char * dirs)832 void Getsource_dirs(char *dirs) {
833 struct stat stat_buf;
834 char *p, *q, *dirprefix;
835 char path[MAXPATHLEN];
836
837 q = strchr(dirs, ':');
838 if ( q ) { // we have /path/to/firstdir:dir1:dir2:...
839 *q = 0;
840 p = strrchr(dirs, '/');
841 if ( p ) {
842 *p++ = 0; // p points now to the first name in the dir list
843 dirprefix = dirs;
844 } else { // we have a source_dirs in current directory
845 p = dirs; // p points now to the first name in the dir list
846 dirprefix = "."; // current directory
847 }
848 *q = ':'; // restore ':' in source_dirs
849
850 while ( p ) { // iterate over all elements in the dir list
851 q = strchr(p, ':');
852 if ( q )
853 *q = 0;
854
855 // p point to a dir name
856 snprintf(path, 1023, "%s/%s", dirprefix, p);
857 path[MAXPATHLEN-1] = 0;
858 if ( stat(dirs, &stat_buf) ) {
859 fprintf(stderr, "Can't stat '%s': %s\n", path, strerror(errno));
860 return;
861 }
862 if ( !S_ISDIR(stat_buf.st_mode) ) {
863 fprintf(stderr, "Not a directory: '%s'\n", path);
864 return;
865 }
866
867 // save path into source_dirs
868 InsertString(&source_dirs, path);
869
870 p = q ? q + 1 : NULL;
871 }
872
873 } else { // we have only one directory
874 if ( stat(dirs, &stat_buf) ) {
875 fprintf(stderr, "Can't stat '%s': %s\n", dirs, strerror(errno));
876 return;
877 }
878 if ( !S_ISDIR(stat_buf.st_mode) ) {
879 fprintf(stderr, "Not a directory: '%s'\n", dirs);
880 return;
881 }
882
883 // save the path into source_dirs
884 InsertString(&source_dirs, dirs);
885 }
886
887 } // End of Getsource_dirs
888
SetupInputFileSequence(char * multiple_dirs,char * single_file,char * multiple_files)889 void SetupInputFileSequence(char *multiple_dirs, char *single_file, char *multiple_files) {
890
891 twin_first = 0;
892 twin_last = 0xffffffff;
893
894 first_file = NULL;
895 last_file = NULL;
896
897 InitStringlist(&source_dirs, NUM_PTR);
898 InitStringlist(&file_list, 64);
899
900 if ( multiple_dirs )
901 Getsource_dirs(multiple_dirs);
902
903 if ( multiple_files ) {
904 // use multiple files
905 GetFileList(multiple_files);
906
907 // get time window spanning all the files
908 if ( file_list.num_strings ) {
909 stat_record_t stat_ptr;
910
911 // read the stat record
912 if ( !GetStatRecord(file_list.list[0], &stat_ptr) ) {
913 exit(250);
914 }
915 twin_first = stat_ptr.first_seen;
916
917 // read the stat record of last file
918 if ( !GetStatRecord(file_list.list[file_list.num_strings-1], &stat_ptr) ) {
919 exit(250);
920 }
921 twin_last = stat_ptr.last_seen;
922 }
923
924 } else if ( single_file ) {
925 CleanPath(single_file);
926
927 if ( source_dirs.num_strings == 0 ) {
928 stat_record_t stat_ptr;
929 InsertString(&file_list, single_file);
930 if ( !GetStatRecord(single_file, &stat_ptr) ) {
931 exit(250);
932 }
933 twin_first = stat_ptr.first_seen;
934 twin_last = stat_ptr.last_seen;
935
936 } else {
937 int i;
938
939 if ( single_file[0] == '/' ) {
940 fprintf(stderr, "File -r must not start with '/', when combined with a source list -M\n");
941 exit(250);
942 }
943
944 for ( i=0; i<source_dirs.num_strings; i++ ) {
945 char s[MAXPATHLEN];
946 struct stat stat_buf;
947
948 snprintf(s, MAXPATHLEN-1, "%s/%s", source_dirs.list[i], single_file);
949 s[MAXPATHLEN-1] = '\0';
950 if ( stat(s, &stat_buf) ) {
951 if ( errno == ENOENT ) {
952 // file not found - try to guess subdir
953 char *sub_dir = GuessSubDir(source_dirs.list[i], single_file);
954 if ( sub_dir ) { // subdir found
955 stat_record_t stat_ptr;
956 snprintf(s, MAXPATHLEN-1, "%s/%s/%s", source_dirs.list[i], sub_dir, single_file);
957 s[MAXPATHLEN-1] = '\0';
958 InsertString(&file_list, s);
959 if ( !GetStatRecord(s, &stat_ptr) ) {
960 exit(250);
961 }
962 twin_first = stat_ptr.first_seen;
963 twin_last = stat_ptr.last_seen;
964 } else { // no subdir found
965 fprintf(stderr, "stat() error '%s': %s\n", s, "File not found!");
966 }
967 } else { // Any other stat error
968 fprintf(stderr, "stat() error '%s': %s\n", s, strerror(errno));
969 exit(250);
970 }
971 } else { // stat() successful
972 if ( !S_ISREG(stat_buf.st_mode) ) {
973 fprintf(stderr, "Skip non file entry: '%s'\n", s);
974 } else {
975 stat_record_t stat_ptr;
976 InsertString(&file_list, s);
977 if ( !GetStatRecord(s, &stat_ptr) ) {
978 exit(250);
979 }
980 twin_first = stat_ptr.first_seen;
981 twin_last = stat_ptr.last_seen;
982
983 }
984 }
985 }
986 }
987
988 } else // else use stdin
989 InsertString(&file_list, NULL);
990
991 } // End of SetupInputFileSequence
992
GetCurrentFilename(void)993 char *GetCurrentFilename(void) {
994 return current_file;
995 } // End of GetCurrentFilename
996
GetNextFile(nffile_t * nffile,time_t twin_start,time_t twin_end)997 nffile_t *GetNextFile(nffile_t *nffile, time_t twin_start, time_t twin_end) {
998 static int cnt;
999
1000 // close current file before open the next one
1001 // stdin ( current = 0 ) is not closed
1002 if ( nffile ) {
1003 CloseFile(nffile);
1004 current_file = NULL;
1005 } else {
1006 // is it first time init ?
1007 cnt = 0;
1008 }
1009
1010 // no or no more files available
1011 if ( file_list.num_strings == cnt ) {
1012 current_file = NULL;
1013 return EMPTY_LIST;
1014 }
1015
1016
1017 while ( cnt < file_list.num_strings ) {
1018 #ifdef DEVEL
1019 printf("Process: '%s'\n", file_list.list[cnt] ? file_list.list[cnt] : "<stdin>");
1020 #endif
1021 nffile = OpenFile(file_list.list[cnt], nffile); // Open the file
1022 if ( !nffile ) {
1023 return NULL;
1024 }
1025 current_file = file_list.list[cnt];
1026 cnt++;
1027
1028 // stdin
1029 if ( nffile->fd == STDIN_FILENO ) {
1030 current_file = NULL;
1031 return nffile;
1032 }
1033
1034 if ( CheckTimeWindow(twin_start, twin_end, nffile->stat_record) ) {
1035 // printf("Return file: %s\n", string);
1036 return nffile;
1037 }
1038 CloseFile(nffile);
1039 }
1040
1041 current_file = NULL;
1042 return EMPTY_LIST;
1043
1044 } // End of GetNextFile
1045
1046
InitHierPath(int num)1047 int InitHierPath(int num) {
1048 int i;
1049
1050 subdir_format = NULL;
1051
1052 i=0;
1053 while ( subdir_def[i] != NULL ) {
1054 if ( i == num )
1055 break;
1056 i++;
1057 }
1058 if ( subdir_def[i] == NULL ) {
1059 fprintf(stderr, "No such subdir level %i\n", num);
1060 return 0;
1061 }
1062
1063 subdir_format = subdir_def[i];
1064
1065 /*
1066 * The default file mode is a=rwx (0777) with selected permissions
1067 * removed in accordance with the file mode creation mask. For
1068 * intermediate path name components, the mode is the default modified
1069 * by u+wx so that the subdirectories can always be created.
1070 */
1071
1072 // get umask
1073 mode = umask(0);
1074 umask(mode);
1075
1076 mode = 0777 & ~mode;
1077 dir_mode = mode | S_IWUSR | S_IXUSR;
1078
1079 return 1;
1080
1081 } // End of InitHierPath
1082
VerifyFileRange(char * path,char * last_file)1083 static char *VerifyFileRange(char *path, char *last_file) {
1084 char *p, *q, *r;
1085
1086 r = strdup(path);
1087 p = strrchr(r, '/');
1088 while ( p ) {
1089 *p = '\0';
1090
1091 q = GuessSubDir(r, last_file);
1092 if ( q ) {
1093 free(r);
1094 return q;
1095 }
1096 p = strrchr(r, '/');
1097 }
1098
1099 free(r);
1100 return last_file;
1101
1102 } // End of VerifyFileRange
1103
GuessSubDir(char * channeldir,char * filename)1104 static char *GuessSubDir(char *channeldir, char *filename) {
1105 char s[MAXPATHLEN];
1106 struct tm *t_tm;
1107 int i;
1108
1109 size_t len = strlen(filename);
1110 if ( (len == 19 || len == 21) && (strncmp(filename, "nfcapd.", 7) == 0) ) {
1111 char *p = &filename[7];
1112 time_t t = ISO2UNIX(p);
1113 t_tm = localtime(&t);
1114 } else
1115 return NULL;
1116
1117 i = 0;
1118 // if the file exists, it must be in any of the possible subdirs
1119 // so try one after the next - one will match
1120 while ( subdir_def[i] ) {
1121 char const *sub_fmt = subdir_def[i];
1122 char subpath[255];
1123 struct stat stat_buf;
1124 strftime(subpath, 254, sub_fmt, t_tm);
1125 subpath[254] = '\0';
1126
1127 snprintf(s, MAXPATHLEN-1, "%s/%s/%s", channeldir, subpath, filename);
1128 if ( stat(s, &stat_buf) == 0 && S_ISREG(stat_buf.st_mode) ) {
1129 // found file in subdir
1130 return strdup(subpath);
1131 }
1132 i++;
1133 }
1134
1135 return NULL;
1136
1137 } // End of GuessSubDir
1138
GetSubDir(struct tm * now)1139 char *GetSubDir(struct tm *now ) {
1140 static char subpath[255];
1141 size_t sublen;
1142
1143 sublen = strftime(subpath, 254, subdir_format, now);
1144
1145 return sublen == 0 ? NULL : subpath;
1146
1147 } // End of GetSubDir
1148
SetupSubDir(char * dir,char * subdir,char * error,size_t errlen)1149 int SetupSubDir(char *dir, char *subdir, char *error, size_t errlen ) {
1150 char *p, path[MAXPATHLEN];
1151 struct stat stat_buf;
1152 size_t sublen, pathlen;
1153 int err;
1154
1155 error[0] = '\0';
1156
1157 path[0] = '\0';
1158 strncat(path, dir, MAXPATHLEN-1);
1159 path[MAXPATHLEN-1] = '\0';
1160
1161 sublen = strlen(subdir);
1162 pathlen = strlen(path);
1163 // set p as reference between path and subdir
1164 if ( (sublen + pathlen + 2) >= (MAXPATHLEN-1) ) { // +2 : add 1 for '/'
1165 snprintf(error, errlen, "Path '%s': too long", path);
1166 return 0;
1167 }
1168
1169 p = path + pathlen; // points to '\0' of path
1170 *p++ = '/';
1171 *p = '\0';
1172
1173 strncat(path, subdir, MAXPATHLEN-pathlen-2); // +2: add 1 for '/'
1174
1175 // our cwd is basedir ( -l ) so test if, dir exists
1176 if ( stat(path, &stat_buf) == 0 ) {
1177 if ( S_ISDIR(stat_buf.st_mode) ) {
1178 // sub directory already exists
1179 return 1;
1180 } else {
1181 // an entry with this name exists, but it's not a directory
1182 snprintf(error, errlen, "Path '%s': %s ", path, strerror(ENOTDIR));
1183 return 0;
1184 }
1185 }
1186
1187 // no such entry exists - try to create the directory, assuming path below exists
1188 err = mkdir(path, dir_mode);
1189 if ( err == 0 ) // success
1190 return 1;
1191
1192 // else errno is set
1193 if ( errno == ENOENT ) { // we need to create intermediate directories as well
1194 err = mkpath(path, p, mode, dir_mode, error, errlen);
1195 if ( err == 0 ) // creation was successful
1196 return 1;
1197 } else {
1198 snprintf(error, errlen, "mkdir() error for '%s': %s\n", path, strerror(errno));
1199 }
1200
1201 // anything else failed and error string is set
1202 return 0;
1203
1204 } // End of SetupSubDir
1205
1206 /*
1207 * mkpath -- create directories.
1208 * path - path
1209 * p - separator path/subpath
1210 * mode - file mode of terminal directory
1211 * dir_mode - file mode of intermediate directories
1212 */
mkpath(char * path,char * p,mode_t mode,mode_t dir_mode,char * error,size_t errlen)1213 static int mkpath(char *path, char *p, mode_t mode, mode_t dir_mode, char *error, size_t errlen) {
1214 struct stat sb;
1215 char *slash;
1216 int done = 0;
1217
1218 slash = p;
1219
1220 while (!done) {
1221 slash += strspn(slash, "/");
1222 slash += strcspn(slash, "/");
1223
1224 done = (*slash == '\0');
1225 *slash = '\0';
1226
1227 if (stat(path, &sb)) {
1228 if (errno != ENOENT || (mkdir(path, done ? mode : dir_mode) && errno != EEXIST)) {
1229 snprintf(error, errlen, "mkdir() error for '%s': %s\n", path, strerror(errno));
1230 return (-1);
1231 }
1232 } else if (!S_ISDIR(sb.st_mode)) {
1233 snprintf(error, errlen, "Path '%s': %s ", path, strerror(ENOTDIR));
1234 return (-1);
1235 }
1236
1237 *slash = '/';
1238 }
1239
1240 return (0);
1241
1242 } // End of mkpath
1243
1244