1 /*
2  *  Copyright (c) 2009-2020, Peter Haag
3  *  Copyright (c) 2004-2008, SWITCH - Teleinformatikdienste fuer Lehre und Forschung
4  *  All rights reserved.
5  *
6  *  Redistribution and use in source and binary forms, with or without
7  *  modification, are permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright notice,
12  *     this list of conditions and the following disclaimer in the documentation
13  *     and/or other materials provided with the distribution.
14  *   * Neither the name of the author nor the names of its contributors may be
15  *     used to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  *  POSSIBILITY OF SUCH DAMAGE.
29  *
30  */
31 
32 #include "config.h"
33 
34 #include <stdio.h>
35 #include <unistd.h>
36 #include <stdlib.h>
37 #include <time.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <dirent.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <sys/param.h>
45 #include <fcntl.h>
46 #include <ctype.h>
47 #include <netinet/in.h>
48 
49 #ifdef HAVE_STDINT_H
50 #	include <stdint.h>
51 #endif
52 
53 #ifdef HAVE_FTS_H
54 #	include <fts.h>
55 #else
56 #	include "fts_compat.h"
57 #define fts_children fts_children_compat
58 #define fts_close fts_close_compat
59 #define fts_open  fts_open_compat
60 #define fts_read  fts_read_compat
61 #define fts_set   fts_set_compat
62 #endif
63 
64 #include "util.h"
65 #include "nfdump.h"
66 #include "nffile.h"
67 #include "flist.h"
68 
69 /*
70  * Select a single file
71  * --------------------
72  * -r [/]path/to/single_file
73  * Select a single file: absolute or relativ path to a single file.
74  * Recursive: no
75  *
76  * Selecting a range of files
77  * --------------------------
78  * -R [/]path/to/first_file
79  *  Select a range of files in directory specified by absolut or relative path [/]path/to/
80  *  Files are selected in alphabetical order starting with 'first_file' to the end of
81  *  the directory.
82  *
83  * -R [/]path/to/first_file:last_file
84  *  Select a range of files in directory specified by absolut or relative path [/]path/to/
85  *  Files are selected in alphabetical order starting with 'first_file' and ending with
86  *  'last_file'.
87  *
88  * -R [/]path/to/directory
89  *	Select all files in alphabetical order in directory specified by absolut or relative
90  *	path [/]path/to/directory
91  *
92  * Selecting files over multiple sources
93  * -------------------------------------
94  * -M /path/to/multiple/source1:source2[:..:sourceN]
95  * It is assumed, that each source directory 'source1', 'source2' etc. exists in directory
96  * /path/to/multiple. This will expand to multiple directories:
97  * 	/path/to/multiple/source1
98  * 	/path/to/multiple/source2
99  * 	..
100  * 	/path/to/multiple/sourceN
101  * 	Each of these directories contain the same files.
102  * Used in combination with -r and -R to prepend file selections.
103  *
104  * Select a single file from multiple directories
105  * ----------------------------------------------
106  *  -M /path/to/source1:source2	-r single_file
107  *  Select the same file 'single_file' from each source directory: e.g.
108  *  /path/to/source1/single_file
109  *  /path/to/source2/single_file
110  *
111  *
112  * Select a range of files from multiple directories
113  * -------------------------------------------------
114  *  -M /path/to/source1:source2[:..] -R first_file
115  *  For each expanded directory specified by -M /path/to/source1:source2
116  *	select a range of files as described above. Would be identical to
117  *	-R /path/to/source1/first_file -R /path/to/source2/first_file
118  *
119  *  -M /path/to/source1:source2[:..] -R first_file:last_file
120  *  For each expanded directory specified by -M /path/to/source1:source2
121  *	select a range of files as described above. Would be identical to
122  *	-R /path/to/source1/first_file:last_file -R /path/to/source2/first_file:last_file [-R .. ]
123  *
124  *  -M /path/to/source1:source2[:..] -R .
125  *  For each expanded directory specified by -M /path/to/source1:source2
126  *  select all files of the directory as described above. Would be to
127  *	-R /path/to/source1 -R /path/to/source2 [-R ...]
128  *
129  *
130  * Hierarchical file organinisation:
131  * For performance reasons, files may be store in various sub directries instead of a
132  * single directory. These sub directories are assumed to be created in alpabetical order.
133  * For example daily sub directories: 2006/04/01 .. 2006/04/30 as created by nfcapd with
134  * option -S %y/%m/%d
135  *
136  * Single file selection is identical to the flat file layout:
137  * -r [/]path/to/sub1/sub2/sub3/single_file
138  *
139  * Selecting a range of files in a hierarchical file layout
140  * --------------------------------------------------------
141  * -R [/]path/to/sub1/sub2/first_file
142  *  Select a range of files in directory specified by absolut or relative path
143  *  [/]path/to/sub1/sub2/. Files are selected in alphabetical order starting with
144  *  'first_file' to the end of the directory. The hierarchy has no impact here.
145  *
146  * -R [/]path/to/first_sub1/first_sub2/first_file:last_sub1/last_sub2/last_file
147  *  Select a range of files over multiple sub directories starting at absolut or
148  *  relative path [/]path/to/first_sub1/first_sub2/first_file up to and including
149  *  [/]path/to/last_sub1/last_sub2/last_file. Files are selected in alphabetical
150  *  order by iterating over the required sub directory hierachy
151  *	Example:
152  *	-R /path/to/2006/03/31/nfcapd.200603312300:2006/04/01/nfcapd.200604010600
153  *
154  * -R [/]path/to/directory
155  *	Select all files in alphabetical order in directory specified by absolut or relative
156  *	path [/]path/to/directory, identical to flat layout
157  *
158  * The same methode applies for selecting a range of files over multiple sub directories
159  * and multiple sources.
160  *
161  * Example:
162  * -M /path/to/source1:source2 -R 2006/03/31/nfcapd.200603312300:2006/04/01/nfcapd.200604010600
163  *
164  */
165 
166 /*
167  * syntax for possible sub dir definitions:
168  *
169  * %Y    is replaced by the year with century as a decimal number.
170  * %y    is replaced by the year without century as a decimal number (00-99).
171  * %m    is replaced by the month as a decimal number (01-12).
172  * %d    is replaced by the day of the month as a decimal number (01-31).
173  * %j    is replaced by the day of the year as a decimal number (001-366).
174  * %H    is replaced by the hour (24-hour clock) as a decimal number (00-23).
175  * %M    is replaced by the minute as a decimal number (00-59).
176  * %s    is replaced by the number of seconds since the Epoch, UTC
177  * %U    is replaced by the week number of the year (Sunday as the first day
178  *       of the week) as a decimal number (00-53).
179  * %W    is replaced by the week number of the year (Monday as the first day
180  *       of the week) as a decimal number (00-53).
181  * %w    is replaced by the weekday (Sunday as the first day of the week) as
182  *       a decimal number (0-6).
183  * %u    is replaced by the weekday (Monday as the first day of the week) as
184  *       a decimal number (1-7).
185  * %F    is equivalent to ``%Y-%m-%d''.
186  */
187 
188 // predefined and accpeted formats
189 static const char *subdir_def[] = {
190 	"",				// default index 0 - no subdir hierarchy
191 	"%Y/%m/%d",
192 	"%Y/%m/%d/%H",
193 	"%Y/%W/%u",
194 	"%Y/%W/%u/%H",
195 	"%Y/%j",
196 	"%Y/%j/%H",
197 	"%F",
198 	"%F/%H",
199 	NULL
200 };
201 
202 
203 // all accpeted char in a string
204 #define AcceptedFormatChar "YymdjHMsUWwuF"
205 
206 static mode_t mode, dir_mode;
207 static const char *subdir_format;
208 
209 static struct entry_filter_s {
210 	char	*first_entry;
211 	char	*last_entry;
212 	int		list_files;
213 } *dir_entry_filter;
214 
215 #define NUM_PTR 16
216 
217 // globals
218 extern uint32_t	twin_first, twin_last;
219 
220 static char		*first_file, *last_file;
221 static char		*current_file = NULL;
222 static stringlist_t source_dirs, file_list;
223 
224 /* Function prototypes */
225 static inline int CheckTimeWindow(uint32_t t_start, uint32_t t_end, stat_record_t *stat_record);
226 
227 static void GetFileList(char *path);
228 
229 static void CleanPath(char *entry);
230 
231 static void Getsource_dirs(char *dirs);
232 
233 static int mkpath(char *path, char *p, mode_t mode, mode_t dir_mode, char *error, size_t errlen);
234 
235 static char *GuessSubDir(char *channeldir, char *filename);
236 
237 static char *VerifyFileRange(char *path, char *last_file);
238 
239 /* Functions */
240 
compare(const FTSENT ** f1,const FTSENT ** f2)241 static int compare(const FTSENT **f1, const FTSENT **f2) {
242     return strcmp( (*f1)->fts_name, (*f2)->fts_name);
243 } // End of compare
244 
CleanPath(char * entry)245 static void CleanPath(char *entry) {
246 char *p, *q;
247 size_t	len;
248 
249 	// wash out any '//' in entry
250 	while ( (p = strstr(entry, "//")) != NULL ) {
251 		p++;
252 		q = p+1;	// q points to first char after '//'
253 		while ( *p )
254 			*p++ = *q++;
255 	}
256 
257 	// remove trailing '/'
258 	len = strlen(entry);
259 	if ( entry[len-1] == '/' )
260 		entry[len-1] = '\0';
261 
262 	// wash out any '/./' in entry
263 	while ( (p = strstr(entry, "/./")) != NULL ) {
264 		p++;
265 		q = p+2;	// q points to first char after '/./'
266 		while ( *p )
267 			*p++ = *q++;
268 	}
269 
270 	// remove leading './' in entry
271 	if ( strstr(entry, "./") == entry ) {
272 		p = entry;
273 		q = p + 2;
274 		while ( *p )
275 			*p++ = *q++;
276 	}
277 
278 } // End of CleanPath
279 
CheckTimeWindow(uint32_t t_start,uint32_t t_end,stat_record_t * stat_record)280 static inline int CheckTimeWindow(uint32_t t_start, uint32_t t_end, stat_record_t *stat_record) {
281 
282 /*
283 	printf("t start %u %s", t_start, ctime(&t_start));
284 	printf("t end   %u %s", t_end, ctime(&t_end));
285 	printf("f start %u %s", NetflowStat.first_seen, ctime(&NetflowStat.first_seen));
286 	printf("f end   %u %s", NetflowStat.last_seen, ctime(&NetflowStat.last_seen));
287 */
288 
289 	// if no time window is set, return true
290 	if ( t_start == 0 )
291 		return 1;
292 
293 	if ( stat_record->first_seen == 0 )
294 		return 0;
295 
296 	if ( t_start >= stat_record->first_seen  && t_start <= stat_record->last_seen )
297 		return 1;
298 
299 	if ( t_end >= stat_record->first_seen  && t_end <= stat_record->last_seen )
300 		return 1;
301 
302 	if ( t_start < stat_record->first_seen  && t_end > stat_record->last_seen )
303 		return 1;
304 
305 	return 0;
306 
307 } // End of CheckTimeWindow
308 
309 // file filter for scandir function
310 
dirlevels(char * dir)311 static int dirlevels(char *dir) {
312 int num;
313 
314 	if ( !dir )
315 		return 0;
316 
317 	num = 0;
318 	if ( dir[0] == '/' )
319 		dir++;
320 
321 	while ( *dir ) {
322 		if ( *dir == '/' )
323 			num++;
324 		dir++;
325 	}
326 
327 	return num;
328 
329 } // End of dirlevels
330 
CreateDirListFilter(char * first_path,char * last_path,int file_list_level)331 static void CreateDirListFilter(char *first_path, char *last_path, int file_list_level) {
332 int i;
333 char *p, *q, *first_mark, *last_mark;
334 
335 //	printf("First Dir: '%s', first_path: '%s', last_path '%s', first_file '%s', last_file '%s', list_level: %i\n",
336 //			source_dirs.list[0], first_path, last_path, first_file, last_file, file_list_level);
337 
338 	if ( file_list_level == 0 )
339 		return;
340 
341 	if ( file_list_level < 0 ) {
342 		fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
343 		exit(250);
344 	}
345 
346 	dir_entry_filter = (struct entry_filter_s *)malloc((file_list_level+1) * sizeof(struct entry_filter_s));
347 	if ( !dir_entry_filter ) {
348 		fprintf(stderr, "malloc() error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
349 		exit(250);
350 	}
351 
352 	// first default entry - the directory itself
353 	dir_entry_filter[0].first_entry = NULL;
354 	dir_entry_filter[0].last_entry  = NULL;
355 	dir_entry_filter[0].list_files  = 0;
356 
357 	first_mark = first_path;
358 	last_mark  = last_path;
359 	// intermediate directory level filters
360 	for ( i=1; i<file_list_level; i++ ) {
361 		if ( first_mark ) {
362 			p = strchr(first_mark, '/');
363 			if ( p ) {
364 				*p = '\0';
365 				dir_entry_filter[i].first_entry = strdup(first_path);
366 				*p++ = '/';
367 				first_mark = p;
368 			} else {
369 				dir_entry_filter[i].first_entry = strdup(first_path);
370 				first_mark = NULL;
371 			}
372 		} else {
373 			dir_entry_filter[i].first_entry = NULL;
374 		}
375 		dir_entry_filter[i].list_files  = 0;
376 
377 		if ( last_mark ) {
378 			q = strchr(last_mark, '/');
379 			if ( q ) {
380 				*q = '\0';
381 				dir_entry_filter[i].last_entry = strdup(last_path);
382 				*q++ = '/';
383 				last_mark = q;
384 			} else {
385 				dir_entry_filter[i].last_entry = strdup(last_path);
386 				last_mark = NULL;
387 			}
388 		} else {
389 			dir_entry_filter[i].last_entry = NULL;
390 		}
391 		if ( dir_entry_filter[i].first_entry && dir_entry_filter[i].last_entry &&
392 			 strcmp(dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry) > 0 )
393 			fprintf(stderr, "WARNING: Entry '%s' > '%s'. Will not match anything!\n",
394 					dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry);
395 
396 //		printf("%i first: '%s', last: '%s'\n",
397 //			i, dir_entry_filter[i].first_entry, dir_entry_filter[i].last_entry);
398 	}
399 
400 	// the last level - files are listed here
401 	dir_entry_filter[file_list_level].first_entry = first_file;
402 	dir_entry_filter[file_list_level].last_entry  = last_file;
403 	dir_entry_filter[file_list_level].list_files  = 1;
404 
405 	if ( dir_entry_filter[file_list_level].first_entry && dir_entry_filter[file_list_level].last_entry &&
406 		 strcmp(dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry) > 0 )
407 		fprintf(stderr, "WARNING: File '%s' > '%s'. Will not match anything!\n",
408 				dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry);
409 
410 //	printf("%i first: '%s', last: '%s'\n",
411 //		file_list_level, dir_entry_filter[file_list_level].first_entry, dir_entry_filter[file_list_level].last_entry);
412 
413 } // End of CreateDirListFilter
414 
GetFileList(char * path)415 static void GetFileList(char *path) {
416 struct stat stat_buf;
417 char *last_file_ptr, *first_path, *last_path;
418 int levels_first_file, levels_last_file, file_list_level;
419 int	sub_index;
420 
421 FTS *fts;
422 FTSENT *ftsent;
423 
424 	CleanPath(path);
425 
426 	// Check for last_file option
427 	last_file_ptr = strchr(path, ':');
428 	first_path = last_path = NULL;
429 	levels_first_file =  levels_last_file = 0;
430 	if ( last_file_ptr ) {
431 		// make sure we have only a single ':' in path
432 		if ( strrchr(path, ':') != last_file_ptr ) {
433 			fprintf(stderr, "Multiple file separators ':' in path not allowed!\n");
434 			exit(250);
435 		}
436 		*last_file_ptr++ = '\0';
437 		// last_file_ptr points to last_file
438 
439 		if ( strlen(last_file_ptr) == 0 ) {
440 			fprintf(stderr, "Missing last file option after ':'!\n");
441 			exit(250);
442 		}
443 
444 		CleanPath(last_file_ptr);
445 		// make sure last_file option is not a full path
446 		if ( last_file_ptr[0] == '/') {
447 			fprintf(stderr, "Last file name in -R list must not start with '/'\n");
448 			exit(250);
449 		}
450 		// how may sub dir levels has last_file option?
451 		levels_last_file  = dirlevels(last_file_ptr);
452 
453 		// if no subdirs are given for last_file, try to find out, if the last_file
454 		// exists in any possible subdirs
455 		if ( levels_last_file == 0 ) {
456 			char s[MAXPATHLEN];
457 			char *r = VerifyFileRange(path, last_file_ptr);
458 
459 			if ( r != last_file_ptr && r[0] != '\0' ) {
460 				snprintf(s, MAXPATHLEN-1, "%s/%s", r, last_file_ptr);
461 				s[MAXPATHLEN-1] = '\0';
462 				last_file_ptr = strdup(s);
463 				levels_last_file  = dirlevels(last_file_ptr);
464 			}
465 		}
466 
467 	}
468 
469 	levels_first_file = dirlevels(path);
470 
471 	if ( source_dirs.num_strings == 0 ) {
472 		// No multiple sources option -M
473 
474 		// path contains the path to a file/directory
475 		// stat this entry
476 		if ( stat(path, &stat_buf) ) {
477 			fprintf(stderr, "stat() error '%s': %s\n", path, strerror(errno));
478 			exit(250);
479 		}
480 		if ( !S_ISDIR(stat_buf.st_mode) && !S_ISREG(stat_buf.st_mode) ) {
481 			fprintf(stderr, "Not a file or directory: '%s'\n", path);
482 			exit(250);
483 		}
484 
485 		// Check, how many levels of directory in path
486 		levels_first_file = dirlevels(path);
487 
488 		if ( last_file_ptr ) {
489 			// path is [/]path/to/any/dir|file:last_file_ptr
490 
491 			// make sure first_file is a file
492 			if ( S_ISDIR(stat_buf.st_mode) ) {
493 				fprintf(stderr, "Not a file: '%s'\n", path);
494 				exit(250);
495 			}
496 
497 			if ( levels_last_file ) {
498 				// we have levels_last_file number of sub dirs
499 
500 				// sub dir levels of first_file mus have at least the same number of levels as last_file
501 				if ( levels_first_file < levels_last_file ) {
502 					fprintf(stderr, "Number of sub dirs for sub level hierarchy for file list -R do not match\n");
503 					exit(250);
504 				}
505 				if ( levels_first_file == levels_last_file ) {
506 					char *p, *q;
507 					// path = [/]sub1[/..]/first_file:sub1[/...]/last_file
508 					if ( path[0] == '/' ) {
509 						// this is rather strange, but strctly spoken, valid anyway
510 						InsertString(&source_dirs, "/");
511 						path++;
512 					} else {
513 						InsertString(&source_dirs, ".");
514 					}
515 
516 					// path = sub_first[/..]/first_file:sub_last[/...]/last_file
517 					p = strrchr(path, '/');
518 					q = strrchr(last_file_ptr, '/');
519 					if ( !p || !q ) {
520 						// this should never happen
521 						fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
522 						exit(250);
523 					}
524 					*p++ = '\0';
525 					*q++ = '\0';
526 					first_file = strdup(p);
527 					last_file = strdup(q);
528 					file_list_level = levels_last_file + 1;
529 					first_path = path;
530 					last_path  = last_file_ptr;
531 
532 				} else {
533 					// path = [/]path/to/sub_first[/..]/first_file:sub_last[/...]/last_file
534 					int i;
535 					char *p, *r, *s;
536 
537 					p = strrchr(path, '/');
538 					// levels_first_file > levels_last_file
539 
540 					// step back the number of sub dirs in first_file
541 					for ( i=0; i<levels_last_file; i++ ) {
542 						do {
543 							p--;
544 						} while ( p >= path && *p != '/');
545 					}
546 					*p++ = '\0';
547 
548 					InsertString(&source_dirs, path);
549 
550 					r = strrchr(p, '/');
551 					s = strrchr(last_file_ptr, '/');
552 					if ( !r || !s ) {
553 						// this must never happen
554 						fprintf(stderr, "software error in %s line %d: %s\n", __FILE__, __LINE__, strerror(errno) );
555 						exit(250);
556 					}
557 					*r++ = '\0';
558 					*s++ = '\0';
559 					first_file = strdup(r);
560 					last_file = strdup(s);
561 					// files are listed at this sub dir level
562 					file_list_level = levels_last_file + 1;
563 					first_path = p;
564 					last_path  = last_file_ptr;
565 
566 				}
567 
568 			} else {
569 				// we have no sub dir levels given
570 
571 				// path is [/]path/to/any/file
572 				char *p = strrchr(path, '/');
573 
574 				if ( p ) {
575 					// path is [/]path/to/any/first_file:last_file
576 					*p++ = '\0';
577 					// path is the direcory containing all the files
578 					InsertString(&source_dirs, path);
579 					first_file = strdup(p);
580 				} else {
581 					// path is first_file:last_file
582 					InsertString(&source_dirs, ".");
583 					first_file = strdup(path);
584 				}
585 				// set last_file filter
586 				last_file  = strdup(last_file_ptr);
587 				// in any case we list the files of directory level 1
588 				file_list_level = 1;
589 			}
590 		} else {
591 			// path is [/]path/to/any/dir|file
592 			if ( S_ISDIR(stat_buf.st_mode) ) {
593 				// path is [/]path/to/any/dir
594 				// list all files in this directory
595 				InsertString(&source_dirs, path);
596 				first_file = NULL;
597 				file_list_level = 0;
598 			} else {
599 				// path is [/]path/to/any/file
600 				char *p = strrchr(path, '/');
601 				if ( p ) {
602 					// path is [/]path/to/any/file
603 					*p++ = '\0';
604 					// path is the direcory containing all the files
605 					InsertString(&source_dirs, path);
606 					first_file = strdup(p);
607 				} else {
608 					// path is file
609 					InsertString(&source_dirs, ".");
610 					first_file = strdup(path);
611 				}
612 				// in any case we list the files of directory level 1
613 				file_list_level = 1;
614 			}
615 			// in any case, no last_file filter
616 			last_file  = NULL;
617 		}
618 
619 	} else {
620 		char pathbuff[MAXPATHLEN];
621 		// multiple sources option -M given
622 		if ( path[0] == '/') {
623 			fprintf(stderr, "File list -R must not start with '/' when combined with a source list -M\n");
624 			exit(250);
625 		}
626 
627 		// special case for all files in directory
628 		if ( strcmp(path, ".") == 0 ) {
629 			first_file = NULL;
630 			last_file  = NULL;
631 			file_list_level = 0;
632 		} else {
633 			// pathbuff contains the path to a file/directory, compiled using the first entry
634 			// in the source_dirs
635 			snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", source_dirs.list[0], path);
636 			pathbuff[MAXPATHLEN-1] = '\0';
637 
638 			// pathbuff must point to a file
639 			if ( stat(pathbuff, &stat_buf) ) {
640 				if ( errno == ENOENT ) {
641 					// file not found - try to guess a possible subdir
642 					char *sub_dir = GuessSubDir(source_dirs.list[0], path);
643 					if ( sub_dir ) {	// subdir found
644 						snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", sub_dir, path);
645 						pathbuff[MAXPATHLEN-1] = '\0';
646 						// update path
647 						path = strdup(pathbuff);
648 						free(sub_dir);
649 
650 						// need guessing subdir with last_file too
651 						if ( last_file_ptr ) {
652 							sub_dir = GuessSubDir(source_dirs.list[0], last_file_ptr);
653 							if ( sub_dir ) {	// subdir found
654 								snprintf(pathbuff, MAXPATHLEN-1, "%s/%s", sub_dir, last_file_ptr);
655 								pathbuff[MAXPATHLEN-1] = '\0';
656 								last_file_ptr = strdup(pathbuff);
657 								free(sub_dir);
658 
659 								// update dir levels of extended file path
660 								levels_last_file  = dirlevels(last_file_ptr);
661 							} else {
662 								fprintf(stderr, "'%s': %s\n", last_file_ptr, "File not found!");
663 								exit(250);
664 							}
665 						}
666 
667 					} else {	// no file in any possible subdir found
668 						fprintf(stderr, "stat() error '%s': %s\n", pathbuff, "File not found!");
669 						exit(250);
670 					}
671 				} else {	// Any other stat error
672 					fprintf(stderr, "stat() error '%s': %s\n", pathbuff, strerror(errno));
673 					exit(250);
674 				}
675 			} else if ( !S_ISREG(stat_buf.st_mode) ) {
676 				fprintf(stderr, "Not a file : '%s'\n", pathbuff);
677 				exit(250);
678 			}
679 
680 			// Check, how many levels of directory in path
681 			levels_first_file = dirlevels(path);
682 
683 			if ( last_file_ptr ) {
684 				// path is path/to/any/first_file:last_file_ptr
685 				char *p, *q;
686 
687 				// the number of sub dirs must be eqal for first_file and last_file
688 				if ( levels_first_file != levels_last_file ) {
689 					fprintf(stderr, "Number of sub dirs must agree in '%s' and '%s'\n", path, last_file_ptr);
690 					exit(250);
691 				}
692 
693 				p = strrchr(path, '/');
694 				if ( p ) {
695 					// path is fist_sub/to/any/first_file
696 					// recursive all files in sub dirs
697 					file_list_level = dirlevels(path) + 1;
698 					*p++ = '\0';
699 					first_file = strdup(p);
700 					first_path = path;
701 				} else {
702 					// path is first_file
703 					first_file = strdup(path);
704 					file_list_level = 1;
705 				}
706 
707 				q = strrchr(last_file_ptr, '/');
708 				if ( q ) {
709 					*q++ = '\0';
710 					last_file = strdup(q);
711 					last_path  = last_file_ptr;
712 				} else {
713 					last_file = strdup(last_file_ptr);
714 				}
715 
716 			} else {
717 				// path is path/to/any/first_file
718 				char *p = strrchr(path, '/');
719 				if ( p ) {
720 					// path is fist_sub/to/any/first_file
721 					// recursive all files in sub dirs
722 					file_list_level = dirlevels(path) + 1;
723 					*p++ = '\0';
724 					first_file = strdup(p);
725 					first_path = path;
726 				} else {
727 					// path is first_file
728 					first_file = strdup(path);
729 					file_list_level = 1;
730 				}
731 				last_file  = NULL;
732 			}
733 		}
734 	}
735 
736 /*
737 printf("first_file %s\n", first_file ? first_file : "<none>");
738 printf("last_file %s\n", last_file ? last_file : "<none>");
739 printf("first_path %s\n", first_path ? first_path : "<none>");
740 printf("last_path %s\n", last_path ? last_path : "<none>");
741 printf("file_list_level: %i\n", file_list_level);
742 */
743 	CreateDirListFilter(first_path, last_path, file_list_level );
744 
745 	// last entry must be NULL
746 	InsertString(&source_dirs, NULL);
747 	fts = fts_open(source_dirs.list, FTS_LOGICAL,  compare);
748 	sub_index = 0;
749 	while ( (ftsent = fts_read(fts)) != NULL) {
750 		int fts_level = ftsent->fts_level;
751 		char *fts_path;
752 
753 // printf("DBG: %u %i %s %s\n", ftsent->fts_info, ftsent->fts_level, ftsent->fts_path, ftsent->fts_name);
754 
755 		if ( fts_level == 0 ) {
756 			sub_index = ftsent->fts_pathlen + 1;
757 			continue;
758 		}
759 
760 		if ( ftsent->fts_pathlen < sub_index ) {
761 			LogError("ERROR: fts_pathlen error at %s line %d\n", __FILE__, __LINE__);
762 			exit(250);
763 		}
764 		fts_path = &ftsent->fts_path[sub_index];
765 
766 /*
767 if ( file_list_level )
768 printf("DGB: short fts: '%s', filer_first: '%s', filter_last: '%s'\n",
769 					fts_path, dir_entry_filter[fts_level].first_entry , dir_entry_filter[fts_level].last_entry);
770 */
771 		switch (ftsent->fts_info) {
772 			case FTS_D:
773 				// dir entry pre descend
774 				if ( file_list_level && file_list_level && (
775 					( dir_entry_filter[fts_level].first_entry &&
776 						( strcmp(fts_path, dir_entry_filter[fts_level].first_entry ) < 0 ) ) ||
777 					( dir_entry_filter[fts_level].last_entry &&
778 					  	( strcmp(fts_path, dir_entry_filter[fts_level].last_entry ) > 0 ) )
779 				   ))
780 					fts_set(fts, ftsent, FTS_SKIP );
781 
782 				break;
783 			case FTS_DP:
784 				break;
785 			case FTS_F:
786 				// file entry
787 // printf("==> Check: %s\n", ftsent->fts_name);
788 
789 				// skip stat file
790 				if ( strcmp(ftsent->fts_name, ".nfstat") == 0 ||
791 					 strncmp(ftsent->fts_name, NF_DUMPFILE , strlen(NF_DUMPFILE)) == 0)
792 					continue;
793 				if ( strstr(ftsent->fts_name, ".stat") != NULL )
794 					continue;
795 				// skip OSX DS_Store files
796 				if ( strstr(ftsent->fts_name, ".DS_Store") != NULL )
797 					continue;
798 				// skip pcap file
799 				if ( strstr(ftsent->fts_name, "pcap") != NULL )
800 					continue;
801 
802 				if ( file_list_level && (
803 					( fts_level != file_list_level ) ||
804 					( dir_entry_filter[fts_level].first_entry &&
805 						( strcmp(ftsent->fts_name, dir_entry_filter[fts_level].first_entry) < 0 ) ) ||
806 					( dir_entry_filter[fts_level].last_entry &&
807 					  	( strcmp(ftsent->fts_name, dir_entry_filter[fts_level].last_entry) > 0 ) )
808 				   ) )
809 					continue;
810 
811 // printf("==> Listed: %s\n", ftsent->fts_path);
812 				InsertString(&file_list, ftsent->fts_path);
813 
814 				break;
815 		}
816 
817 	}
818     fts_close(fts);
819 
820 } // End of GetFileList
821 
822 /*
823  * Get the list of directories
824  * dirs: user supplied parameter: /any/path/dir1:dir2:dir3:...
825  * 		source_dirs must result in
826  * 		/any/path/dir1
827  * 		/any/path/dir2
828  * 		/any/path/dir3
829  * 	/any/path is dir prefix, which may be NULL e.g. dir1:dir2:dir3:...
830  * 	dir1, dir2 etc entrys
831  */
Getsource_dirs(char * dirs)832 void Getsource_dirs(char *dirs) {
833 struct stat stat_buf;
834 char	*p, *q, *dirprefix;
835 char	path[MAXPATHLEN];
836 
837 	q = strchr(dirs, ':');
838 	if ( q ) { // we have /path/to/firstdir:dir1:dir2:...
839 		*q = 0;
840 		p = strrchr(dirs, '/');
841 		if ( p ) {
842 			*p++ = 0;	// p points now to the first name in the dir list
843 			dirprefix = dirs;
844 		} else  { // we have a source_dirs in current directory
845 			p = dirs;	// p points now to the first name in the dir list
846 			dirprefix = ".";	// current directory
847 		}
848 		*q = ':';	// restore ':' in source_dirs
849 
850 		while ( p ) { // iterate over all elements in the dir list
851 			q = strchr(p, ':');
852 			if ( q )
853 				*q = 0;
854 
855 			// p point to a dir name
856 			snprintf(path, 1023, "%s/%s", dirprefix, p);
857 			path[MAXPATHLEN-1] = 0;
858 			if ( stat(dirs, &stat_buf) ) {
859 				fprintf(stderr, "Can't stat '%s': %s\n", path, strerror(errno));
860 				return;
861 			}
862 			if ( !S_ISDIR(stat_buf.st_mode) ) {
863 				fprintf(stderr, "Not a directory: '%s'\n", path);
864 				return;
865 			}
866 
867 			// save path into source_dirs
868 			InsertString(&source_dirs, path);
869 
870 			p = q ? q + 1 : NULL;
871 		}
872 
873 	} else { // we have only one directory
874 		if ( stat(dirs, &stat_buf) ) {
875 			fprintf(stderr, "Can't stat '%s': %s\n", dirs, strerror(errno));
876 			return;
877 		}
878 		if ( !S_ISDIR(stat_buf.st_mode) ) {
879 			fprintf(stderr, "Not a directory: '%s'\n", dirs);
880 			return;
881 		}
882 
883 		// save the path into source_dirs
884 		InsertString(&source_dirs, dirs);
885 	}
886 
887 } // End of Getsource_dirs
888 
SetupInputFileSequence(char * multiple_dirs,char * single_file,char * multiple_files)889 void SetupInputFileSequence(char *multiple_dirs, char *single_file, char *multiple_files) {
890 
891 	twin_first  = 0;
892 	twin_last   = 0xffffffff;
893 
894 	first_file 	= NULL;
895 	last_file  	= NULL;
896 
897 	InitStringlist(&source_dirs, NUM_PTR);
898 	InitStringlist(&file_list, 64);
899 
900 	if ( multiple_dirs )
901 		Getsource_dirs(multiple_dirs);
902 
903 	if ( multiple_files ) {
904 		// use multiple files
905 		GetFileList(multiple_files);
906 
907 		// get time window spanning all the files
908 		if ( file_list.num_strings ) {
909 			stat_record_t stat_ptr;
910 
911 			// read the stat record
912 			if ( !GetStatRecord(file_list.list[0], &stat_ptr) ) {
913 				exit(250);
914 			}
915 			twin_first = stat_ptr.first_seen;
916 
917 			// read the stat record of last file
918 			if ( !GetStatRecord(file_list.list[file_list.num_strings-1], &stat_ptr) ) {
919 				exit(250);
920 			}
921 			twin_last  = stat_ptr.last_seen;
922 		}
923 
924 	} else if ( single_file ) {
925 		CleanPath(single_file);
926 
927 		if ( source_dirs.num_strings == 0 ) {
928 			stat_record_t stat_ptr;
929 			InsertString(&file_list, single_file);
930 			if ( !GetStatRecord(single_file, &stat_ptr) ) {
931 				exit(250);
932 			}
933 			twin_first = stat_ptr.first_seen;
934 			twin_last  = stat_ptr.last_seen;
935 
936 		} else {
937 			int i;
938 
939 			if ( single_file[0] == '/' ) {
940 				fprintf(stderr, "File -r must not start with '/', when combined with a source list -M\n");
941 				exit(250);
942 			}
943 
944 			for ( i=0; i<source_dirs.num_strings; i++ ) {
945 				char s[MAXPATHLEN];
946 				struct stat stat_buf;
947 
948 				snprintf(s, MAXPATHLEN-1, "%s/%s", source_dirs.list[i], single_file);
949 				s[MAXPATHLEN-1] = '\0';
950 				if ( stat(s, &stat_buf) ) {
951 					if ( errno == ENOENT ) {
952 						// file not found - try to guess subdir
953 						char *sub_dir = GuessSubDir(source_dirs.list[i], single_file);
954 						if ( sub_dir ) {	// subdir found
955 							stat_record_t stat_ptr;
956 							snprintf(s, MAXPATHLEN-1, "%s/%s/%s", source_dirs.list[i], sub_dir, single_file);
957 							s[MAXPATHLEN-1] = '\0';
958 							InsertString(&file_list, s);
959 							if ( !GetStatRecord(s, &stat_ptr) ) {
960 								exit(250);
961 							}
962 							twin_first = stat_ptr.first_seen;
963 							twin_last  = stat_ptr.last_seen;
964 						} else {	// no subdir found
965 							fprintf(stderr, "stat() error '%s': %s\n", s, "File not found!");
966 						}
967 					} else {	// Any other stat error
968 						fprintf(stderr, "stat() error '%s': %s\n", s, strerror(errno));
969 						exit(250);
970 					}
971 				} else {	// stat() successful
972 					if ( !S_ISREG(stat_buf.st_mode) ) {
973 						fprintf(stderr, "Skip non file entry: '%s'\n", s);
974 					} else {
975 						stat_record_t stat_ptr;
976 						InsertString(&file_list, s);
977 						if ( !GetStatRecord(s, &stat_ptr) ) {
978 							exit(250);
979 						}
980 						twin_first = stat_ptr.first_seen;
981 						twin_last  = stat_ptr.last_seen;
982 
983 					}
984 				}
985 			}
986 		}
987 
988 	} else // else use stdin
989 		InsertString(&file_list, NULL);
990 
991 } // End of SetupInputFileSequence
992 
GetCurrentFilename(void)993 char *GetCurrentFilename(void) {
994 	return current_file;
995 } // End of GetCurrentFilename
996 
GetNextFile(nffile_t * nffile,time_t twin_start,time_t twin_end)997 nffile_t *GetNextFile(nffile_t *nffile, time_t twin_start, time_t twin_end) {
998 static int cnt;
999 
1000 	// close current file before open the next one
1001 	// stdin ( current = 0 ) is not closed
1002 	if ( nffile ) {
1003 		CloseFile(nffile);
1004 		current_file = NULL;
1005 	} else {
1006 		// is it first time init ?
1007 		cnt  = 0;
1008 	}
1009 
1010 	// no or no more files available
1011 	if ( file_list.num_strings == cnt ) {
1012 		current_file = NULL;
1013 		return EMPTY_LIST;
1014 	}
1015 
1016 
1017 	while ( cnt < file_list.num_strings ) {
1018 #ifdef DEVEL
1019 		printf("Process: '%s'\n", file_list.list[cnt] ? file_list.list[cnt] : "<stdin>");
1020 #endif
1021 		nffile = OpenFile(file_list.list[cnt], nffile);	// Open the file
1022 		if ( !nffile ) {
1023 			return NULL;
1024 		}
1025 		current_file = file_list.list[cnt];
1026 		cnt++;
1027 
1028 		// stdin
1029 		if ( nffile->fd == STDIN_FILENO ) {
1030 			current_file = NULL;
1031 			return nffile;
1032 		}
1033 
1034 		if ( CheckTimeWindow(twin_start, twin_end, nffile->stat_record) ) {
1035 			// printf("Return file: %s\n", string);
1036 			return nffile;
1037 		}
1038 		CloseFile(nffile);
1039 	}
1040 
1041 	current_file = NULL;
1042 	return EMPTY_LIST;
1043 
1044 } // End of GetNextFile
1045 
1046 
InitHierPath(int num)1047 int InitHierPath(int num) {
1048 int i;
1049 
1050 	subdir_format = NULL;
1051 
1052 	i=0;
1053 	while ( subdir_def[i] != NULL ) {
1054 		if ( i == num )
1055 			break;
1056 		i++;
1057 	}
1058 	if ( subdir_def[i] == NULL ) {
1059 		fprintf(stderr, "No such subdir level %i\n", num);
1060 		return 0;
1061 	}
1062 
1063 	subdir_format = subdir_def[i];
1064 
1065     /*
1066      * The default file mode is a=rwx (0777) with selected permissions
1067      * removed in accordance with the file mode creation mask.  For
1068      * intermediate path name components, the mode is the default modified
1069      * by u+wx so that the subdirectories can always be created.
1070      */
1071 
1072 	// get umask
1073 	mode = umask(0);
1074 	umask(mode);
1075 
1076     mode = 0777 & ~mode;
1077     dir_mode = mode | S_IWUSR | S_IXUSR;
1078 
1079 	return 1;
1080 
1081 } // End of InitHierPath
1082 
VerifyFileRange(char * path,char * last_file)1083 static char *VerifyFileRange(char *path, char *last_file) {
1084 char *p, *q, *r;
1085 
1086 	r = strdup(path);
1087 	p = strrchr(r, '/');
1088 	while ( p ) {
1089 		*p = '\0';
1090 
1091 		q = GuessSubDir(r, last_file);
1092 		if ( q ) {
1093 			free(r);
1094 			return q;
1095 		}
1096 		p = strrchr(r, '/');
1097 	}
1098 
1099 	free(r);
1100 	return last_file;
1101 
1102 } // End of VerifyFileRange
1103 
GuessSubDir(char * channeldir,char * filename)1104 static char *GuessSubDir(char *channeldir, char *filename) {
1105 char s[MAXPATHLEN];
1106 struct  tm *t_tm;
1107 int	i;
1108 
1109 	size_t len = strlen(filename);
1110 	if ( (len == 19 || len == 21) && (strncmp(filename, "nfcapd.", 7) == 0) ) {
1111 		char *p  = &filename[7];
1112 		time_t t = ISO2UNIX(p);
1113 		t_tm = localtime(&t);
1114 	} else
1115 		return NULL;
1116 
1117 	i = 0;
1118 	// if the file exists, it must be in any of the possible subdirs
1119 	// so try one after the next - one will match
1120 	while ( subdir_def[i] ) {
1121 		char const *sub_fmt = subdir_def[i];
1122 		char subpath[255];
1123 		struct stat stat_buf;
1124 		strftime(subpath, 254, sub_fmt, t_tm);
1125 		subpath[254] = '\0';
1126 
1127 		snprintf(s, MAXPATHLEN-1, "%s/%s/%s", channeldir, subpath, filename);
1128 		if ( stat(s, &stat_buf) == 0 && S_ISREG(stat_buf.st_mode) ) {
1129 			// found file in subdir
1130 			return strdup(subpath);
1131 		}
1132 		i++;
1133 	}
1134 
1135 	return NULL;
1136 
1137 } // End of GuessSubDir
1138 
GetSubDir(struct tm * now)1139 char *GetSubDir(struct  tm *now ) {
1140 static char subpath[255];
1141 size_t sublen;
1142 
1143 	sublen = strftime(subpath, 254, subdir_format, now);
1144 
1145 	return sublen == 0 ? NULL : subpath;
1146 
1147 } // End of GetSubDir
1148 
SetupSubDir(char * dir,char * subdir,char * error,size_t errlen)1149 int SetupSubDir(char *dir, char *subdir, char *error, size_t errlen ) {
1150 char *p, path[MAXPATHLEN];
1151 struct stat stat_buf;
1152 size_t	sublen, pathlen;
1153 int err;
1154 
1155 	error[0] = '\0';
1156 
1157 	path[0] = '\0';
1158 	strncat(path, dir, MAXPATHLEN-1);
1159 	path[MAXPATHLEN-1] = '\0';
1160 
1161 	sublen  = strlen(subdir);
1162 	pathlen = strlen(path);
1163 	// set p as reference between path and subdir
1164 	if ( (sublen + pathlen + 2) >= (MAXPATHLEN-1) ) {	// +2 : add 1 for '/'
1165 		snprintf(error, errlen, "Path '%s': too long", path);
1166 		return 0;
1167 	}
1168 
1169 	p = path + pathlen;	// points to '\0' of path
1170 	*p++ = '/';
1171 	*p   = '\0';
1172 
1173 	strncat(path, subdir, MAXPATHLEN-pathlen-2);	// +2: add 1 for '/'
1174 
1175 	// our cwd is basedir ( -l ) so test if, dir exists
1176 	if ( stat(path, &stat_buf) == 0 ) {
1177 		if ( S_ISDIR(stat_buf.st_mode) ) {
1178 			// sub directory already exists
1179 			return 1;
1180 		} else {
1181 			// an entry with this name exists, but it's not a directory
1182 			snprintf(error, errlen, "Path '%s': %s ", path, strerror(ENOTDIR));
1183 			return 0;
1184 		}
1185 	}
1186 
1187 	// no such entry exists - try to create the directory, assuming path below exists
1188 	err = mkdir(path, dir_mode);
1189 	if ( err == 0 ) // success
1190 		return 1;
1191 
1192 	// else errno is set
1193 	if ( errno == ENOENT ) { // we need to create intermediate directories as well
1194 		err = mkpath(path, p, mode, dir_mode, error, errlen);
1195 		if ( err == 0 ) // creation was successful
1196 			return 1;
1197 	} else {
1198 		snprintf(error, errlen, "mkdir() error for '%s': %s\n", path, strerror(errno));
1199 	}
1200 
1201 	// anything else failed and error string is set
1202 	return 0;
1203 
1204 } // End of SetupSubDir
1205 
1206 /*
1207  * mkpath -- create directories.
1208  *  path     - path
1209  *  p        - separator path/subpath
1210  *  mode     - file mode of terminal directory
1211  *  dir_mode - file mode of intermediate directories
1212  */
mkpath(char * path,char * p,mode_t mode,mode_t dir_mode,char * error,size_t errlen)1213 static int mkpath(char *path, char *p, mode_t mode, mode_t dir_mode, char *error, size_t errlen) {
1214 struct stat sb;
1215 char *slash;
1216 int done = 0;
1217 
1218     slash = p;
1219 
1220     while (!done) {
1221         slash += strspn(slash, "/");
1222         slash += strcspn(slash, "/");
1223 
1224         done = (*slash == '\0');
1225         *slash = '\0';
1226 
1227         if (stat(path, &sb)) {
1228             if (errno != ENOENT || (mkdir(path, done ? mode : dir_mode) && errno != EEXIST)) {
1229 				snprintf(error, errlen, "mkdir() error for '%s': %s\n", path, strerror(errno));
1230                 return (-1);
1231             }
1232         } else if (!S_ISDIR(sb.st_mode)) {
1233 			snprintf(error, errlen, "Path '%s': %s ", path, strerror(ENOTDIR));
1234             return (-1);
1235         }
1236 
1237         *slash = '/';
1238     }
1239 
1240     return (0);
1241 
1242 } // End of mkpath
1243 
1244