1 /*
2 ** Copyright (C) 2006-2020 by Carnegie Mellon University.
3 **
4 ** @OPENSOURCE_LICENSE_START@
5 ** See license information in ../../LICENSE.txt
6 ** @OPENSOURCE_LICENSE_END@
7 */
8 
9 /*
10 **  rwtuc - Text Utility Converter
11 **
12 **  Takes the output from rwcut and generates SiLK flow records from it.
13 **
14 **  Mark Thomas, March 2006
15 **
16 */
17 
18 
19 #include <silk/silk.h>
20 
21 RCSIDENT("$SiLK: rwtuc.c ef14e54179be 2020-04-14 21:57:45Z mthomas $");
22 
23 #include <silk/rwascii.h>
24 #include <silk/rwrec.h>
25 #include <silk/sksite.h>
26 #include <silk/skstream.h>
27 #include <silk/skstringmap.h>
28 #include <silk/utils.h>
29 
30 
31 /* LOCAL DEFINES AND TYPEDEFS */
32 
33 /* where to write --help output */
34 #define USAGE_FH stdout
35 
36 /* size to use for arrays that hold field IDs; number of valid
37  * elements in array is given by 'max_avail_field' */
38 #define TUC_ARRAY_SIZE  32
39 
40 /* regular expression to match the old ("ancient" at this point)
41  * format for the time field: "MM/DD/YYYY hh:mm:ss".  We just need to
42  * match enough to know whether we have MM/DD/YYYY or YYYY/MM/DD. */
43 #define RWTUC_TIME_REGEX "^[0-9]{2}/[0-9]{2}/[0-9]{4} [0-9]{2}:"
44 
45 /* how big of an input line to accept; lines longer than this size are
46  * ignored */
47 #define RWTUC_LINE_BUFSIZE 2048
48 
49 /* whitespace chars used in strspn(); list taken from isspace() */
50 #define RWTUC_WHITESPACE    "\t\v\f\r "
51 
52 /* additional field types to define, it addition to the RWREC_FIELD_*
53  * values defined by rwascii.h; values must be contiguous with the
54  * RWREC_FIELD_* values. */
55 typedef enum {
56     TUC_FIELD_IGNORED = RWREC_PRINTABLE_FIELD_COUNT
57 } field_ident_t;
58 
59 /* depending on what we are parsing, there may be various parts of the
60  * time we need to calculate */
61 typedef enum {
62     /* sTime and elapsed are being set; nothing to calculate */
63     CALC_NONE,
64     /* must calculate sTime from eTime - elapsed */
65     CALC_STIME,
66     /* must calculate elapsed from eTime - sTime */
67     CALC_ELAPSED
68 } time_calc_t;
69 
70 /* various values that get parsed; either from the fixed values the
71  * user enters on the command line or one per line that is read. */
72 typedef struct parsed_values_st {
73     rwRec       rec;
74     char       *class_name;
75     char       *type_name;
76     sktime_t    eTime;
77     uint8_t     itype;
78     uint8_t     icode;
79     time_calc_t handle_time;
80     unsigned    bytes_equals_pkts :1;
81     unsigned    have_icmp         :1;
82 } parsed_values_t;
83 
84 /* current input line */
85 typedef struct current_line_st {
86     /* input line (as read from input) */
87     char        text[RWTUC_LINE_BUFSIZE];
88     /* input stream currently being processed */
89     skstream_t *stream;
90     /* line number in the 'stream' */
91     int         lineno;
92 } current_line_t;
93 
94 
95 /* LOCAL VARIABLES */
96 
97 /* one more than maximum valid field ID.  This is used when
98  * determining which fields were seen and which fields have
99  * defaults. */
100 static const uint32_t max_avail_field = TUC_FIELD_IGNORED;
101 
102 /* fields in addition to those provided by rwascii */
103 static sk_stringmap_entry_t tuc_fields[] = {
104     {"ignore", TUC_FIELD_IGNORED,   NULL, NULL},
105     SK_STRINGMAP_SENTINEL
106 };
107 
108 /* where to send output, set by --output-path */
109 static skstream_t *out_stream = NULL;
110 
111 /* where to copy bad input lines, set by --bad-output-lines */
112 static skstream_t *bad_stream = NULL;
113 
114 /* the number of bad input lines */
115 static unsigned int bad_line_count = 0;
116 
117 /* whether to report parsing errors, set by --verbose */
118 static int verbose = 0;
119 
120 /* whether to halt on first error, set by --stop-on-error */
121 static int stop_on_error = 0;
122 
123 /* whether to always parse the first line as data, set by --no-titles */
124 static int no_titles = 0;
125 
126 /* available fields */
127 static sk_stringmap_t *field_map = NULL;
128 
129 /* character that separates input fields (the delimiter) */
130 static char column_separator = '|';
131 
132 /* for processing the input files */
133 static sk_options_ctx_t *optctx;
134 
135 /* the fields (columns) to parse in the order to parse them; each
136  * value is an ID from field_map, set by --fields */
137 static uint32_t *field_list = NULL;
138 
139 /* number of fields to get from input; length of field_list */
140 static uint32_t num_fields = 0;
141 
142 /* default values from user */
143 static char *default_val[TUC_ARRAY_SIZE];
144 
145 /* regular expression used to determine the time format */
146 static regex_t time_regex;
147 
148 /* automatically set the class for sites that have a single class */
149 static char global_class_name[SK_MAX_STRLEN_FLOWTYPE];
150 
151 /* the compression method to use when writing the file.
152  * skCompMethodOptionsRegister() will set this to the default or
153  * to the value the user specifies. */
154 static sk_compmethod_t comp_method;
155 
156 /* current input line, and stream from which it was read */
157 static current_line_t current_line;
158 
159 /* a pointer to the current input line */
160 static current_line_t *curline = &current_line;
161 
162 
163 /* OPTIONS SETUP */
164 
165 typedef enum {
166     OPT_FIELDS, OPT_COLUMN_SEPARATOR,
167     OPT_OUTPUT_PATH, OPT_BAD_INPUT_LINES,
168     OPT_VERBOSE, OPT_STOP_ON_ERROR, OPT_NO_TITLES
169 } appOptionsEnum;
170 
171 
172 static struct option appOptions[] = {
173     {"fields",              REQUIRED_ARG, 0, OPT_FIELDS},
174     {"column-separator",    REQUIRED_ARG, 0, OPT_COLUMN_SEPARATOR},
175     {"output-path",         REQUIRED_ARG, 0, OPT_OUTPUT_PATH},
176     {"bad-input-lines",     REQUIRED_ARG, 0, OPT_BAD_INPUT_LINES},
177     {"verbose",             NO_ARG,       0, OPT_VERBOSE},
178     {"stop-on-error",       NO_ARG,       0, OPT_STOP_ON_ERROR},
179     {"no-titles",           NO_ARG,       0, OPT_NO_TITLES},
180     {0,0,0,0}               /* sentinel entry */
181 };
182 
183 static const char *appHelp[] = {
184     NULL, /* generated dynamically */
185     "Split input fields on this character. Def. '|'",
186     "Write the SiLK Flow records to this stream. Def. stdout",
187     ("Write each bad input line to this file or stream.\n"
188      "\tLines will have the file name and line number prepended. Def. none"),
189     ("Print an error message for each bad input line to the\n"
190      "\tstandard error. Def. Quietly ignore errors"),
191     ("Print an error message for a bad input line to stderr\n"
192      "\tand exit. Def. Quietly ignore errors and continue processing"),
193     ("Parse the first line as record values. Requires --fields.\n"
194      "\tDef. Skip first line if it appears to contain titles"),
195     (char *)NULL
196 };
197 
198 static struct option defaultValueOptions[] = {
199     {"saddress",          REQUIRED_ARG, 0, RWREC_FIELD_SIP},
200     {"daddress",          REQUIRED_ARG, 0, RWREC_FIELD_DIP},
201     {"sport",             REQUIRED_ARG, 0, RWREC_FIELD_SPORT},
202     {"dport",             REQUIRED_ARG, 0, RWREC_FIELD_DPORT},
203     {"protocol",          REQUIRED_ARG, 0, RWREC_FIELD_PROTO},
204 
205     {"packets",           REQUIRED_ARG, 0, RWREC_FIELD_PKTS},
206     {"bytes",             REQUIRED_ARG, 0, RWREC_FIELD_BYTES},
207     {"flags-all",         REQUIRED_ARG, 0, RWREC_FIELD_FLAGS},
208 
209     {"stime",             REQUIRED_ARG, 0, RWREC_FIELD_STIME},
210     {"duration",          REQUIRED_ARG, 0, RWREC_FIELD_ELAPSED},
211     {"etime",             REQUIRED_ARG, 0, RWREC_FIELD_ETIME},
212 
213     {"sensor",            REQUIRED_ARG, 0, RWREC_FIELD_SID},
214 
215     {"input-index",       REQUIRED_ARG, 0, RWREC_FIELD_INPUT},
216     {"output-index",      REQUIRED_ARG, 0, RWREC_FIELD_OUTPUT},
217     {"next-hop-ip",       REQUIRED_ARG, 0, RWREC_FIELD_NHIP},
218 
219     {"flags-initial",     REQUIRED_ARG, 0, RWREC_FIELD_INIT_FLAGS},
220     {"flags-session",     REQUIRED_ARG, 0, RWREC_FIELD_REST_FLAGS},
221     {"attributes",        REQUIRED_ARG, 0, RWREC_FIELD_TCP_STATE},
222     {"application",       REQUIRED_ARG, 0, RWREC_FIELD_APPLICATION},
223 
224     {"class",             REQUIRED_ARG, 0, RWREC_FIELD_FTYPE_CLASS},
225     {"type",              REQUIRED_ARG, 0, RWREC_FIELD_FTYPE_TYPE},
226 
227     {"stime+msec",        REQUIRED_ARG, 0, RWREC_FIELD_STIME_MSEC},
228     {"etime+msec",        REQUIRED_ARG, 0, RWREC_FIELD_ETIME_MSEC},
229     {"duration+msec",     REQUIRED_ARG, 0, RWREC_FIELD_ELAPSED_MSEC},
230 
231     {"icmp-type",         REQUIRED_ARG, 0, RWREC_FIELD_ICMP_TYPE},
232     {"icmp-code",         REQUIRED_ARG, 0, RWREC_FIELD_ICMP_CODE},
233 
234     {0,0,0,0}             /* sentinel entry */
235 };
236 
237 
238 
239 /* LOCAL FUNCTION PROTOTYPES */
240 
241 static int  appOptionsHandler(clientData cData, int opt_index, char *opt_arg);
242 static int  defaultValueHandler(clientData cData, int opt_ind, char *opt_arg);
243 static int  createStringmaps(void);
244 static int  parseFields(const char *field_string, char **errmsg);
245 static int
246 processFields(
247     parsed_values_t        *val,
248     uint32_t                field_count,
249     uint32_t               *field_type,
250     char                  **field_val,
251     int                     checking_defaults);
252 static void badLine(const char *fmt, ...)
253     SK_CHECK_PRINTF(1, 2);
254 
255 
256 /* FUNCTION DEFINITIONS */
257 
258 /*
259  *  appUsageLong();
260  *
261  *    Print complete usage information to USAGE_FH.  Pass this
262  *    function to skOptionsSetUsageCallback(); skOptionsParse() will
263  *    call this funciton and then exit the program when the --help
264  *    option is given.
265  */
266 static void
appUsageLong(void)267 appUsageLong(
268     void)
269 {
270 #define USAGE_MSG                                                             \
271     ("[SWITCHES] [FILES]\n"                                                   \
272      "\tGenerate SiLK flow records from textual input; the input should be\n" \
273      "\tin a form similar to what rwcut generates.\n")
274 
275     FILE *fh = USAGE_FH;
276     unsigned int i;
277 
278     fprintf(fh, "%s %s", skAppName(), USAGE_MSG);
279     fprintf(fh, "\nSWITCHES:\n");
280     skOptionsDefaultUsage(fh);
281 
282     for (i = 0; appOptions[i].name; ++i) {
283         fprintf(fh, "--%s %s. ", appOptions[i].name,
284                 SK_OPTION_HAS_ARG(appOptions[i]));
285         switch (appOptions[i].val) {
286           case OPT_FIELDS:
287             fprintf(fh, "Field(s) to parse from the input. List fields by"
288                     " name or\n\tnumber, separated by commas:\n");
289             skStringMapPrintUsage(field_map, fh, 4);
290             break;
291           default:
292             /* Simple static help text from the appHelp array */
293             fprintf(fh, "%s\n", appHelp[i]);
294             break;
295         }
296     }
297 
298     skOptionsNotesUsage(fh);
299     skCompMethodOptionsUsage(fh);
300     sksiteOptionsUsage(fh);
301     skOptionsCtxOptionsUsage(optctx, fh);
302 
303     for (i = 0; defaultValueOptions[i].name; ++i) {
304         fprintf(fh, "--%s %s. Use given value for the %s field.\n",
305                 defaultValueOptions[i].name,
306                 SK_OPTION_HAS_ARG(defaultValueOptions[i]),
307                 defaultValueOptions[i].name);
308     }
309 
310 }
311 
312 
313 /*
314  *  appTeardown()
315  *
316  *    Teardown all modules, close all files, and tidy up all
317  *    application state.
318  *
319  *    This function is idempotent.
320  */
321 static void
appTeardown(void)322 appTeardown(
323     void)
324 {
325     static int teardownFlag = 0;
326     int rv;
327 
328     if (teardownFlag) {
329         return;
330     }
331     teardownFlag = 1;
332 
333     if (out_stream) {
334         rv = skStreamClose(out_stream);
335         if (rv && rv != SKSTREAM_ERR_NOT_OPEN) {
336             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
337         }
338         skStreamDestroy(&out_stream);
339     }
340 
341     if (bad_stream) {
342         rv = skStreamClose(bad_stream);
343         if (SKSTREAM_OK == rv) {
344             if (0 == bad_line_count && skStreamIsSeekable(bad_stream)) {
345                 unlink(skStreamGetPathname(bad_stream));
346             }
347         } else if (rv != SKSTREAM_ERR_NOT_OPEN) {
348             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
349         }
350         skStreamDestroy(&bad_stream);
351         bad_stream = NULL;
352     }
353 
354     if (field_map) {
355         (void)skStringMapDestroy(field_map);
356     }
357     if (field_list) {
358         free(field_list);
359         field_list = NULL;
360     }
361 
362     regfree(&time_regex);
363 
364     skOptionsNotesTeardown();
365     skOptionsCtxDestroy(&optctx);
366     skAppUnregister();
367 }
368 
369 
370 /*
371  *  appSetup(argc, argv);
372  *
373  *    Perform all the setup for this application include setting up
374  *    required modules, parsing options, etc.  This function should be
375  *    passed the same arguments that were passed into main().
376  *
377  *    Returns to the caller if all setup succeeds.  If anything fails,
378  *    this function will cause the application to exit with a FAILURE
379  *    exit status.
380  */
381 static void
appSetup(int argc,char ** argv)382 appSetup(
383     int                 argc,
384     char              **argv)
385 {
386     SILK_FEATURES_DEFINE_STRUCT(features);
387     unsigned int optctx_flags;
388     int rv;
389 
390     /* verify same number of options and help strings */
391     assert((sizeof(appHelp)/sizeof(char *)) ==
392            (sizeof(appOptions)/sizeof(struct option)));
393 
394     assert(TUC_FIELD_IGNORED < TUC_ARRAY_SIZE);
395 
396     /* register the application */
397     skAppRegister(argv[0]);
398     skAppVerifyFeatures(&features, NULL);
399     skOptionsSetUsageCallback(&appUsageLong);
400 
401     /* initialize globals */
402     memset(default_val, 0, sizeof(default_val));
403 
404     /* although the input to rwtuc is text and not binary, set the
405      * INPUT_BINARY flag so rwtuc does not accept input from a TTY
406      * without the user explicitly providing an arg of "stdin" */
407     optctx_flags = (SK_OPTIONS_CTX_ALLOW_STDIN | SK_OPTIONS_CTX_XARGS
408                     | SK_OPTIONS_CTX_INPUT_BINARY);
409 
410     /* register the options */
411     if (skOptionsCtxCreate(&optctx, optctx_flags)
412         || skOptionsCtxOptionsRegister(optctx)
413         || skOptionsRegister(appOptions, &appOptionsHandler, NULL)
414         || skOptionsRegister(defaultValueOptions, &defaultValueHandler, NULL)
415         || skOptionsNotesRegister(NULL)
416         || skCompMethodOptionsRegister(&comp_method)
417         || sksiteOptionsRegister(SK_SITE_FLAG_CONFIG_FILE))
418     {
419         skAppPrintErr("Unable to register options");
420         exit(EXIT_FAILURE);
421     }
422 
423     /* set time regex */
424     rv = regcomp(&time_regex, RWTUC_TIME_REGEX, REG_EXTENDED|REG_NOSUB);
425     if (rv) {
426         char errbuf[1024];
427         regerror(rv, &time_regex, errbuf, sizeof(errbuf));
428         skAppPrintErr("Unable to compile time regex: %s", errbuf);
429         exit(EXIT_FAILURE);
430     }
431 
432     /* register the teardown handler */
433     if (atexit(appTeardown) < 0) {
434         skAppPrintErr("Unable to register appTeardown() with atexit()");
435         appTeardown();
436         exit(EXIT_FAILURE);
437     }
438 
439     /* initialize string-map of field identifiers, and add the locally
440      * defined fields. */
441     if (createStringmaps()) {
442         skAppPrintErr("Unable to setup fields stringmap");
443         exit(EXIT_FAILURE);
444     }
445 
446     /* parse the options */
447     rv = skOptionsCtxOptionsParse(optctx, argc, argv);
448     if (rv < 0) {
449         skAppUsage();           /* never returns */
450     }
451 
452     /* cannot specify --no-titles unless --fields is given */
453     if (no_titles && !field_list) {
454         skAppPrintErr("May only use --%s when --%s is specified",
455                       appOptions[OPT_NO_TITLES].name,
456                       appOptions[OPT_FIELDS].name);
457         skAppUsage();
458     }
459 
460     /* try to load site config file; if it fails, we will not be able
461      * to resolve flowtype and sensor from input file names */
462     sksiteConfigure(0);
463 
464     /* use "stdout" as default output path */
465     if (NULL == out_stream) {
466         if ((rv = skStreamCreate(&out_stream,SK_IO_WRITE,SK_CONTENT_SILK_FLOW))
467             || (rv = skStreamBind(out_stream, "stdout")))
468         {
469             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
470             skAppPrintErr("Could not create output stream");
471             exit(EXIT_FAILURE);
472         }
473     }
474 
475     /* open bad output, but first ensure it is not the same as the
476      * record output */
477     if (bad_stream) {
478         if (0 == strcmp(skStreamGetPathname(out_stream),
479                         skStreamGetPathname(bad_stream)))
480         {
481             skAppPrintErr("Cannot use same stream for bad input and records");
482             exit(EXIT_FAILURE);
483         }
484         rv = skStreamOpen(bad_stream);
485         if (rv) {
486             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
487             exit(EXIT_FAILURE);
488         }
489     }
490 
491     /* open output */
492     if ((rv = skStreamSetCompressionMethod(out_stream, comp_method))
493         || (rv = skOptionsNotesAddToStream(out_stream))
494         || (rv = skHeaderAddInvocation(skStreamGetSilkHeader(out_stream),
495                                        1, argc, argv))
496         || (rv = skStreamOpen(out_stream)))
497     {
498         skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
499         skAppPrintErr("Could not open output file");
500         exit(EXIT_FAILURE);
501     }
502 
503     return;  /* OK */
504 }
505 
506 
507 /*
508  *  status = appOptionsHandler(cData, opt_index, opt_arg);
509  *
510  *    This function is passed to skOptionsRegister(); it will be called
511  *    by skOptionsParse() for each user-specified switch that the
512  *    application has registered; it should handle the switch as
513  *    required---typically by setting global variables---and return 1
514  *    if the switch processing failed or 0 if it succeeded.  Returning
515  *    a non-zero from from the handler causes skOptionsParse() to return
516  *    a negative value.
517  *
518  *    The clientData in 'cData' is typically ignored; 'opt_index' is
519  *    the index number that was specified as the last value for each
520  *    struct option in appOptions[]; 'opt_arg' is the user's argument
521  *    to the switch for options that have a REQUIRED_ARG or an
522  *    OPTIONAL_ARG.
523  */
524 static int
appOptionsHandler(clientData UNUSED (cData),int opt_index,char * opt_arg)525 appOptionsHandler(
526     clientData   UNUSED(cData),
527     int                 opt_index,
528     char               *opt_arg)
529 {
530     const char *char_name;
531     char *errmsg;
532     int rv;
533 
534     switch ((appOptionsEnum)opt_index) {
535       case OPT_FIELDS:
536         if (field_list != NULL) {
537             skAppPrintErr("Invalid %s: Switch used multiple times",
538                           appOptions[opt_index].name);
539             return 1;
540         }
541         if (parseFields(opt_arg, &errmsg)) {
542             skAppPrintErr("Invalid %s: %s",
543                           appOptions[opt_index].name, errmsg);
544             return 1;
545         }
546         break;
547 
548       case OPT_COLUMN_SEPARATOR:
549         switch (opt_arg[0]) {
550           case '\n':
551             char_name = "newline";
552             break;
553           case '\r':
554             char_name = "carriage return";
555             break;
556           case '\0':
557             char_name = "end-of-string";
558             break;
559           default:
560             char_name = NULL;
561             break;
562         }
563         if (char_name) {
564             skAppPrintErr("Invalid %s: May not be the %s character",
565                           appOptions[opt_index].name, char_name);
566             return 1;
567         }
568         column_separator = opt_arg[0];
569         break;
570 
571       case OPT_OUTPUT_PATH:
572         if (out_stream) {
573             skAppPrintErr("Invalid %s: Switch used multiple times",
574                           appOptions[opt_index].name);
575             return 1;
576         }
577         if ((rv = skStreamCreate(&out_stream,SK_IO_WRITE,SK_CONTENT_SILK_FLOW))
578             || (rv = skStreamBind(out_stream, opt_arg)))
579         {
580             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
581             return 1;
582         }
583         break;
584 
585       case OPT_BAD_INPUT_LINES:
586         if (bad_stream) {
587             skAppPrintErr("Invalid %s: Switch used multiple times",
588                           appOptions[opt_index].name);
589             return 1;
590         }
591         if ((rv = skStreamCreate(&bad_stream, SK_IO_WRITE, SK_CONTENT_TEXT))
592             || (rv = skStreamBind(bad_stream, opt_arg)))
593         {
594             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
595             return 1;
596         }
597         break;
598 
599       case OPT_VERBOSE:
600         verbose = 1;
601         break;
602 
603       case OPT_STOP_ON_ERROR:
604         stop_on_error = 1;
605         break;
606 
607       case OPT_NO_TITLES:
608         no_titles = 1;
609         break;
610     }
611 
612     return 0;  /* OK */
613 }
614 
615 
616 /*
617  *  ok = defaultValueHandler(cData, opt_index, opt_arg);
618  *
619  *    Like appOptionsHandler(), except it handles the options
620  *    specified in the defaultValueOptions[] array.
621  */
622 static int
defaultValueHandler(clientData UNUSED (cData),int opt_index,char * opt_arg)623 defaultValueHandler(
624     clientData   UNUSED(cData),
625     int                 opt_index,
626     char               *opt_arg)
627 {
628     if (opt_index < 0 || opt_index >= TUC_ARRAY_SIZE) {
629         skAbort();
630     }
631     default_val[opt_index] = opt_arg;
632     return 0;
633 }
634 
635 
636 /*
637  *  ok = createStringmaps();
638  *
639  *    Create the global 'field_map'.  Return 0 on success, or -1 on
640  *    failure.
641  */
642 static int
createStringmaps(void)643 createStringmaps(
644     void)
645 {
646     if (rwAsciiFieldMapAddDefaultFields(&field_map)
647         || skStringMapAddEntries(field_map, -1, tuc_fields))
648     {
649         return -1;
650     }
651 
652     if (max_avail_field > TUC_ARRAY_SIZE) {
653         skAbort();
654     }
655 
656     return 0;
657 }
658 
659 
660 /*
661  *  status = parseFields(fields_string);
662  *
663  *    Parse the user's option for the --fields switch and fill in the
664  *    global 'outputs[]' array of out_stream_t's.  Return 0 on
665  *    success; -1 on failure.
666  */
667 static int
parseFields(const char * field_string,char ** errmsg)668 parseFields(
669     const char         *field_string,
670     char              **errmsg)
671 {
672     static char buf[256];
673     BITMAP_DECLARE(field_dup, TUC_ARRAY_SIZE);
674     sk_stringmap_iter_t *iter = NULL;
675     sk_stringmap_entry_t *entry;
676     int rv = -1;
677     uint32_t i;
678 
679     assert(NULL == field_list);
680 
681     /* parse the fields; duplicate 'ignore' fields are okay, but any
682      * other duplcate is an error */
683     if (skStringMapParse(field_map, field_string, SKSTRINGMAP_DUPES_KEEP,
684                          &iter, errmsg))
685     {
686         goto END;
687     }
688 
689     /* check for duplicate fields */
690     BITMAP_INIT(field_dup);
691     while (skStringMapIterNext(iter, &entry, NULL) == SK_ITERATOR_OK) {
692         assert(entry->id < TUC_ARRAY_SIZE);
693         if (BITMAP_GETBIT(field_dup, entry->id)
694             && TUC_FIELD_IGNORED != entry->id)
695         {
696             snprintf(buf, sizeof(buf), "Duplicate name '%s'", entry->name);
697             *errmsg = buf;
698             goto END;
699         }
700         BITMAP_SETBIT(field_dup, entry->id);
701     }
702 
703     skStringMapIterReset(iter);
704 
705     /* create an array to hold the IDs */
706     num_fields = skStringMapIterCountMatches(iter);
707     field_list = (uint32_t*)malloc(num_fields * sizeof(uint32_t));
708     if (NULL == field_list) {
709         skAppPrintOutOfMemory("field id list");
710         goto END;
711     }
712 
713     /* fill the array */
714     for (i = 0; skStringMapIterNext(iter, &entry, NULL)==SK_ITERATOR_OK; ++i) {
715         assert(i < num_fields);
716         field_list[i] = entry->id;
717     }
718 
719     rv = 0;
720 
721   END:
722     if (iter) {
723         skStringMapIterDestroy(iter);
724     }
725     return rv;
726 }
727 
728 
729 /*
730  *    If invalid input lines are being written to a stream, write the
731  *    text in 'curline', preceeded by the input file's name and line
732  *    number.
733  *
734  *    If verbose output or stop-on-error is set, format the error
735  *    message given by the arguments and print an error message.  The
736  *    error message includes the current input file and line number.
737  */
badLine(const char * fmt,...)738 static void badLine(
739     const char         *fmt,
740     ...)
741 {
742     char errbuf[2 * PATH_MAX];
743     va_list ap;
744 
745     ++bad_line_count;
746 
747     va_start(ap, fmt);
748     if (bad_stream) {
749         skStreamPrint(bad_stream, "%s:%d:%s\n",
750                       skStreamGetPathname(curline->stream),
751                       curline->lineno, curline->text);
752     }
753     if (verbose || stop_on_error) {
754         vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
755         skAppPrintErr("%s:%d:%s",
756                       skStreamGetPathname(curline->stream), curline->lineno,
757                       errbuf);
758         if (stop_on_error) {
759             va_end(ap);
760             exit(EXIT_FAILURE);
761         }
762     }
763     va_end(ap);
764 }
765 
766 
767 /*
768  *  is_title = firstLineIsTitle(first_line);
769  *
770  *    Determine if the input line in 'first_line' is a title line by
771  *    looking for a word that matches a field name.  Return 1 if it
772  *    is, 0 if it is not.
773  */
774 static int
firstLineIsTitle(char * first_line)775 firstLineIsTitle(
776     char               *first_line)
777 {
778     sk_stringmap_entry_t *entry;
779     char *cp;
780     char *ep;
781     uint32_t i;
782     int is_title = 0;
783 
784     assert(first_line);
785     assert(field_list);
786 
787     /* we have the fields, need to determine if first_line is a
788      * title line. */
789     cp = first_line;
790     for (i = 0; i < num_fields; ++i) {
791         ep = strchr(cp, column_separator);
792         if (!is_title && field_list[i] != TUC_FIELD_IGNORED) {
793             if (ep) {
794                 *ep = '\0';
795             }
796             while ((isspace((int)*cp))) {
797                 ++cp;
798             }
799             if ('\0' == *cp) {
800                 /* ignore */
801             } else if (!isdigit((int)*cp)) {
802                 if (skStringMapGetByName(field_map, cp, &entry)
803                     == SKSTRINGMAP_OK)
804                 {
805                     is_title = 1;
806                 }
807             }
808         }
809         if (ep) {
810             *ep = column_separator;
811             cp = ep + 1;
812         } else {
813             cp += strlen(cp);
814             if ((1 + i != num_fields) && is_title) {
815                 badLine(("Too few fields on title line:"
816                          " found %" PRIu32 " of %" PRIu32 " expected"),
817                         i, num_fields);
818             }
819             break;
820         }
821     }
822 
823     if (is_title && (*cp != '\0')
824         && (strlen(cp) != strspn(cp, RWTUC_WHITESPACE)))
825     {
826         badLine(("Too many fields on title line:"
827                  " text follows delimiter number %" PRIu32), num_fields);
828     }
829 
830     return is_title;
831 }
832 
833 
834 /*
835  *  is_title = determineFields(&field_type, &field_val, &defaults, firstline);
836  *
837  *    Set the types of fields to be parsed in this file (field_type),
838  *    an array to hold the strings to be parsed on each row
839  *    (field_val), and the default values for this file (defaults).
840  *    When finished with the file, the caller should free the
841  *    'field_type' and 'field_val' arrays.
842  *
843  *    The set of field_type's will be determined from the --fields
844  *    value if present, otherwise from the firstline of the file,
845  *    which must be a title-line.  If the user provided a fixed value
846  *    for the field, any field having that type will be set to
847  *    'ignore'.
848  *
849  *    Return 0 if the first line contains data to be parsed; 1 if it
850  *    contains a title; or -1 on error.
851  *
852  *    We should be smarter; if the user provided a --fields switch,
853  *    there is no need to recompute the defaults each time, and the
854  *    field_type and field_val arrays will have fixed sizes, so they
855  *    would not need to be reallocated each time.
856  */
857 static int
determineFields(uint32_t ** field_type,char *** field_val,parsed_values_t * defaults,char * first_line)858 determineFields(
859     uint32_t              **field_type,
860     char                 ***field_val,
861     parsed_values_t        *defaults,
862     char                   *first_line)
863 {
864     uint32_t have_field[TUC_ARRAY_SIZE];
865     uint32_t default_list[TUC_ARRAY_SIZE];
866     char *active_defaults[TUC_ARRAY_SIZE];
867     uint32_t num_defaults;
868     uint32_t i;
869     int is_title = 0;
870     int per_file_field_list = 0;
871     int have_stime, have_etime, have_elapsed;
872     char *errmsg;
873 
874     memset(defaults, 0, sizeof(parsed_values_t));
875     memset(have_field, 0, sizeof(have_field));
876 
877     if (field_list != NULL) {
878         /* already have a field list */
879         if (0 == no_titles) {
880             /* check whether the first line a title */
881             is_title = firstLineIsTitle(first_line);
882             if (is_title < 0) {
883                 return is_title;
884             }
885         }
886     } else {
887         /* need to get fields from the first line */
888         char *cp, *ep;
889         assert(0 == no_titles);
890         cp = ep = first_line;
891         while (*cp) {
892             if (*cp == column_separator) {
893                 /* convert column_separator to comma for parseFields() */
894                 *ep++ = ',';
895                 ++cp;
896             } else if (isspace((int)*cp)) {
897                 /* ignore spaces */
898                 ++cp;
899             } else {
900                 /* copy character */
901                 *ep++ = *cp++;
902             }
903         }
904         *ep = *cp;
905         if (parseFields(first_line, &errmsg)) {
906             skAppPrintErr(("Unable to determine fields from first line"
907                            " of stream '%s': %s"),
908                           skStreamGetPathname(curline->stream), errmsg);
909             return -1;
910         }
911         is_title = 1;
912         per_file_field_list = 1;
913     }
914 
915     /* create an array to hold a copy of the field_list */
916     *field_type = (uint32_t*)calloc(num_fields, sizeof(uint32_t));
917     if (*field_type == NULL) {
918         skAppPrintOutOfMemory("field list copy");
919         exit(EXIT_FAILURE);
920     }
921 
922     /* create an array to hold the field values */
923     *field_val = (char**)calloc(num_fields, sizeof(char*));
924     if (*field_val == NULL) {
925         skAppPrintOutOfMemory("field values");
926         exit(EXIT_FAILURE);
927     }
928 
929     /* set have_field[] for all the fields we saw.  In addition, copy
930      * the field_list into the copy (field_type) that gets returned,
931      * but set any fields that have default values to 'ignore' so they
932      * do not get parsed. */
933     for (i = 0; i < num_fields; ++i) {
934         have_field[field_list[i]] = 1;
935         if (default_val[field_list[i]] == NULL) {
936             (*field_type)[i] = field_list[i];
937         } else {
938             (*field_type)[i] = TUC_FIELD_IGNORED;
939         }
940     }
941 
942     /* destroy the field_list if we created it above */
943     if (per_file_field_list) {
944         free(field_list);
945         field_list = NULL;
946     }
947 
948     /* set have_field[] for all values that have defaults */
949     for (i = 0; i < max_avail_field; ++i) {
950         if (default_val[i] != NULL) {
951             have_field[i] = 1;
952         }
953     }
954 
955     /* if there is no packets value, set to 1 */
956     if ( !have_field[RWREC_FIELD_PKTS]) {
957         rwRecSetPkts(&defaults->rec, 1);
958     }
959 
960     /* if no bytes value, we will set it to the packets value */
961     if ( !have_field[RWREC_FIELD_BYTES]) {
962         if ( !have_field[RWREC_FIELD_PKTS]) {
963             /* packets field is fixed, so bytes field can be fixed too */
964             rwRecSetBytes(&defaults->rec, 1);
965         } else {
966             /* must do calculation each time */
967             defaults->bytes_equals_pkts = 1;
968         }
969     }
970 
971     /* must have both or neither ICMP type and ICMP code */
972     if (have_field[RWREC_FIELD_ICMP_TYPE]
973         != have_field[RWREC_FIELD_ICMP_CODE])
974     {
975         skAppPrintErr("Either both ICMP type and ICMP code"
976                       " must be present or neither may be present");
977         return -1;
978     }
979     if (have_field[RWREC_FIELD_ICMP_TYPE]) {
980         defaults->have_icmp = 1;
981     }
982 
983     /* must have both or neither initial and session flags */
984     if (have_field[RWREC_FIELD_INIT_FLAGS]
985         != have_field[RWREC_FIELD_REST_FLAGS])
986     {
987         skAppPrintErr("Either both initial- and session-flags"
988                       " must be present or neither may be present");
989         return -1;
990     }
991     if (have_field[RWREC_FIELD_INIT_FLAGS]) {
992         rwRecSetTcpState(&defaults->rec, SK_TCPSTATE_EXPANDED);
993     }
994 
995     /* need a time */
996     have_stime = (have_field[RWREC_FIELD_STIME]
997                   || have_field[RWREC_FIELD_STIME_MSEC]);
998     have_etime = (have_field[RWREC_FIELD_ETIME]
999                   || have_field[RWREC_FIELD_ETIME_MSEC]);
1000     have_elapsed = (have_field[RWREC_FIELD_ELAPSED]
1001                     || have_field[RWREC_FIELD_ELAPSED_MSEC]);
1002     if (have_stime) {
1003         if (have_elapsed) {
1004             defaults->handle_time = CALC_NONE;
1005             if (have_etime) {
1006                 /* we will set etime from stime+elapsed */
1007                 default_val[RWREC_FIELD_ETIME] = NULL;
1008                 default_val[RWREC_FIELD_ETIME_MSEC] = NULL;
1009                 for (i = 0; i < num_fields; ++i) {
1010                     if (((*field_type)[i] == RWREC_FIELD_ETIME)
1011                         || ((*field_type)[i] == RWREC_FIELD_ETIME_MSEC))
1012                     {
1013                         (*field_type)[i] = TUC_FIELD_IGNORED;
1014                     }
1015                 }
1016             }
1017         } else if (have_etime) {
1018             /* must compute elapsed from eTime - sTime */
1019             defaults->handle_time = CALC_ELAPSED;
1020         }
1021         /* else elapsed is fixed at 0 */
1022     } else if (have_etime) {
1023         /* must calculate stime from etime and duration */
1024         defaults->handle_time = CALC_STIME;
1025 
1026         /* we could be smarter here: if we have etime but no elapsed
1027          * time, then stime and etime will be equal, and we could just
1028          * set the stime instead of the etime */
1029     } else {
1030         /* have no stime or etime.  set stime to now */
1031         rwRecSetStartTime(&defaults->rec, sktimeNow());
1032         defaults->handle_time = CALC_NONE;
1033     }
1034 
1035     /* set the class to the default when 'type' is specified but class
1036      * isn't and silk.conf defines only one class. */
1037     if (have_field[RWREC_FIELD_FTYPE_TYPE]
1038         && !have_field[RWREC_FIELD_FTYPE_CLASS]
1039         && (0 == sksiteClassGetMaxID()))
1040     {
1041         sksiteClassGetName(global_class_name, sizeof(global_class_name), 0);
1042         defaults->class_name = global_class_name;
1043     }
1044 
1045     /* create a list of fields for which we have default values */
1046     num_defaults = 0;
1047     for (i = 0; i < max_avail_field; ++i) {
1048         if (default_val[i] != NULL) {
1049             default_list[num_defaults] = i;
1050             active_defaults[num_defaults] = default_val[i];
1051             ++num_defaults;
1052         }
1053     }
1054 
1055     /* process the default fields */
1056     if (processFields(defaults,num_defaults,default_list,active_defaults,1))
1057     {
1058         return -1;
1059     }
1060 
1061     /* verify class and type */
1062     if (defaults->class_name && defaults->type_name) {
1063         if (rwRecGetFlowType(&defaults->rec) == SK_INVALID_FLOWTYPE) {
1064             skAppPrintErr("Bad default class/type combination: %s/%s",
1065                           defaults->class_name, defaults->type_name);
1066             return -1;
1067         }
1068         /* we have set the flow_type on the default record, there is
1069          * no need to look it up for each line. */
1070         defaults->class_name = defaults->type_name = NULL;
1071     }
1072 
1073     return is_title;
1074 }
1075 
1076 
1077 /*
1078  *  convertOldTime(old_time_str);
1079  *
1080  *    Convert the 'old_time_str' that should have a form of
1081  *
1082  *        MM/DD/YYYY hh:mm:ss[.sss]
1083  *
1084  *    to the new form of YYYY/MM/DD:hh:mm:ss[.sss]
1085  */
1086 static void
convertOldTime(char * old_time_str)1087 convertOldTime(
1088     char               *old_time_str)
1089 {
1090     char tmp;
1091     int i;
1092 
1093     for (i = 0; i < 5; ++i) {
1094         tmp = old_time_str[i];
1095         old_time_str[i] = old_time_str[i+6];
1096         old_time_str[i+5] = tmp;
1097     }
1098     old_time_str[4] = '/';
1099     old_time_str[10] = ':';
1100 }
1101 
1102 
1103 /*
1104  *  ok = processFields(val, field_count, field_types, field_values, checking_defaults);
1105  *
1106  *    Parse the 'field_count' fields whose types and string-values are
1107  *    given in the 'field_types' and 'field_values' arrays,
1108  *    respectively.  Set the fields specified in the 'val' structure.
1109  *
1110  *    'checking_defaults' should be non-zero when processing the
1111  *    default values and 0 otherwise.  It is used to determine the
1112  *    text of the error message should a value be invalid.
1113  *
1114  *    Return 0 on success, non-zero on failure.
1115  */
1116 static int
processFields(parsed_values_t * val,uint32_t field_count,uint32_t * field_type,char ** field_val,int checking_defaults)1117 processFields(
1118     parsed_values_t        *val,
1119     uint32_t                field_count,
1120     uint32_t               *field_type,
1121     char                  **field_val,
1122     int                     checking_defaults)
1123 {
1124     char field_name[128];
1125     sktime_t t;
1126     skipaddr_t ipaddr;
1127     uint32_t tmp32;
1128     uint8_t proto;
1129     uint8_t flags;
1130     uint8_t tcp_state;
1131     uint32_t i;
1132     char *cp;
1133     int rv = 0;
1134 
1135     tcp_state = rwRecGetTcpState(&val->rec);
1136 
1137     for (i = 0; i < field_count; ++i) {
1138         cp = field_val[i];
1139         while (isspace((int)*cp)) {
1140             ++cp;
1141         }
1142 
1143         switch (field_type[i]) {
1144           case TUC_FIELD_IGNORED:
1145             break;
1146 
1147           case RWREC_FIELD_ICMP_TYPE:
1148             rv = skStringParseUint32(&tmp32, cp, 0, UINT8_MAX);
1149             if (rv) {
1150                 goto PARSE_ERROR;
1151             }
1152             val->itype = (uint8_t)tmp32;
1153             break;
1154 
1155           case RWREC_FIELD_ICMP_CODE:
1156             rv = skStringParseUint32(&tmp32, cp, 0, UINT8_MAX);
1157             if (rv) {
1158                 goto PARSE_ERROR;
1159             }
1160             val->icode = (uint8_t)tmp32;
1161             break;
1162 
1163           case RWREC_FIELD_SIP:
1164             rv = skStringParseIP(&ipaddr, cp);
1165             if (rv) {
1166                 goto PARSE_ERROR;
1167             }
1168             rwRecMemSetSIP(&val->rec, &ipaddr);
1169             break;
1170 
1171           case RWREC_FIELD_DIP:
1172             rv = skStringParseIP(&ipaddr, cp);
1173             if (rv) {
1174                 goto PARSE_ERROR;
1175             }
1176             rwRecMemSetDIP(&val->rec, &ipaddr);
1177             break;
1178 
1179           case RWREC_FIELD_SPORT:
1180             rv = skStringParseUint32(&tmp32, cp, 0, UINT16_MAX);
1181             if (rv) {
1182                 goto PARSE_ERROR;
1183             }
1184             rwRecSetSPort(&val->rec, (uint16_t)tmp32);
1185             break;
1186 
1187           case RWREC_FIELD_DPORT:
1188             rv = skStringParseUint32(&tmp32, cp, 0, UINT16_MAX);
1189             if (rv) {
1190                 goto PARSE_ERROR;
1191             }
1192             rwRecSetDPort(&val->rec, (uint16_t)tmp32);
1193             break;
1194 
1195           case RWREC_FIELD_PROTO:
1196             rv = skStringParseUint32(&tmp32, cp, 0, UINT8_MAX);
1197             if (rv) {
1198                 goto PARSE_ERROR;
1199             }
1200             rwRecSetProto(&val->rec, (uint8_t)tmp32);
1201             break;
1202 
1203           case RWREC_FIELD_PKTS:
1204             rv = skStringParseUint32(&tmp32, cp, 1, 0);
1205             if (rv) {
1206                 /* FIXME: Clamp value to max instead of rejecting */
1207                 goto PARSE_ERROR;
1208             }
1209             rwRecSetPkts(&val->rec, tmp32);
1210             break;
1211 
1212           case RWREC_FIELD_BYTES:
1213             rv = skStringParseUint32(&tmp32, cp, 1, 0);
1214             if (rv) {
1215                 /* FIXME: Clamp value to max instead of rejecting */
1216                 goto PARSE_ERROR;
1217             }
1218             rwRecSetBytes(&val->rec, tmp32);
1219             break;
1220 
1221           case RWREC_FIELD_FLAGS:
1222             rv = skStringParseTCPFlags(&flags, cp);
1223             if (rv) {
1224                 goto PARSE_ERROR;
1225             }
1226             rwRecSetFlags(&val->rec, flags);
1227             break;
1228 
1229           case RWREC_FIELD_STIME:
1230           case RWREC_FIELD_STIME_MSEC:
1231             if (0 == regexec(&time_regex, cp, 0, NULL, 0)) {
1232                 convertOldTime(cp);
1233             }
1234             rv = skStringParseDatetime(&t, cp, NULL);
1235             if (rv) {
1236                 /* FIXME: Allow small integers as epoch times? */
1237                 goto PARSE_ERROR;
1238             }
1239             rwRecSetStartTime(&val->rec, t);
1240             break;
1241 
1242           case RWREC_FIELD_ELAPSED:
1243           case RWREC_FIELD_ELAPSED_MSEC:
1244             {
1245                 double dur;
1246                 rv = skStringParseDouble(&dur, cp, 0.0,
1247                                          ((double)UINT32_MAX / 1e3));
1248                 if (rv) {
1249                     /* FIXME: Clamp value to max instead of rejecting */
1250                     goto PARSE_ERROR;
1251                 }
1252                 /* add a bit of slop since doubles aren't exact */
1253                 rwRecSetElapsed(&val->rec, (uint32_t)(1000 * (dur + 5e-7)));
1254             }
1255             break;
1256 
1257           case RWREC_FIELD_ETIME:
1258           case RWREC_FIELD_ETIME_MSEC:
1259             if (0 == regexec(&time_regex, cp, 0, NULL, 0)) {
1260                 convertOldTime(cp);
1261             }
1262             rv = skStringParseDatetime(&(val->eTime), cp, NULL);
1263             if (rv) {
1264                 /* FIXME: Allow small integers as epoch times? */
1265                 goto PARSE_ERROR;
1266             }
1267             break;
1268 
1269           case RWREC_FIELD_SID:
1270             if (isdigit((int)*cp)) {
1271                 rv = skStringParseUint32(&tmp32, cp, 0, SK_INVALID_SENSOR-1);
1272                 if (rv) {
1273                     goto PARSE_ERROR;
1274                 }
1275                 rwRecSetSensor(&val->rec, (sk_sensor_id_t)tmp32);
1276             } else {
1277                 rwRecSetSensor(&val->rec, sksiteSensorLookup(cp));
1278             }
1279             break;
1280 
1281           case RWREC_FIELD_INPUT:
1282             rv = skStringParseUint32(&tmp32, cp, 0, UINT16_MAX);
1283             if (rv) {
1284                 /* FIXME: Clamp value to max instead of rejecting */
1285                 goto PARSE_ERROR;
1286             }
1287             rwRecSetInput(&val->rec, (uint16_t)tmp32);
1288             break;
1289 
1290           case RWREC_FIELD_OUTPUT:
1291             rv = skStringParseUint32(&tmp32, cp, 0, UINT16_MAX);
1292             if (rv) {
1293                 /* FIXME: Clamp value to max instead of rejecting */
1294                 goto PARSE_ERROR;
1295             }
1296             rwRecSetOutput(&val->rec, (uint16_t)tmp32);
1297             break;
1298 
1299           case RWREC_FIELD_NHIP:
1300             rv = skStringParseIP(&ipaddr, cp);
1301             if (rv) {
1302                 goto PARSE_ERROR;
1303             }
1304             rwRecMemSetNhIP(&val->rec, &ipaddr);
1305             break;
1306 
1307           case RWREC_FIELD_INIT_FLAGS:
1308             rv = skStringParseTCPFlags(&flags, cp);
1309             if (rv) {
1310                 goto PARSE_ERROR;
1311             }
1312             rwRecSetInitFlags(&val->rec, flags);
1313             break;
1314 
1315           case RWREC_FIELD_REST_FLAGS:
1316             rv = skStringParseTCPFlags(&flags, cp);
1317             if (rv) {
1318                 goto PARSE_ERROR;
1319             }
1320             rwRecSetRestFlags(&val->rec, flags);
1321             break;
1322 
1323           case RWREC_FIELD_TCP_STATE:
1324             rv = skStringParseTCPState(&flags, cp);
1325             if (rv) {
1326                 goto PARSE_ERROR;
1327             }
1328             tcp_state |= flags;
1329             break;
1330 
1331           case RWREC_FIELD_APPLICATION:
1332             rv = skStringParseUint32(&tmp32, cp, 0, UINT16_MAX);
1333             if (rv) {
1334                 goto PARSE_ERROR;
1335             }
1336             rwRecSetApplication(&val->rec, (uint16_t)tmp32);
1337             break;
1338 
1339           case RWREC_FIELD_FTYPE_CLASS:
1340             val->class_name = cp;
1341             break;
1342 
1343           case RWREC_FIELD_FTYPE_TYPE:
1344             val->type_name = cp;
1345             break;
1346 
1347           default:
1348             skAbortBadCase(field_type[i]);
1349         }
1350     }
1351 
1352 
1353     proto = rwRecGetProto(&val->rec);
1354 
1355     /* use the ICMP type/code when appropriate */
1356     if (val->have_icmp && (IPPROTO_ICMP == proto || IPPROTO_ICMPV6 == proto)) {
1357         rwRecSetDPort(&val->rec, (uint16_t)((val->itype << 8) | val->icode));
1358     }
1359 
1360     /* handle class and type */
1361     if (val->class_name && val->type_name) {
1362         rwRecSetFlowType(&val->rec,
1363                          sksiteFlowtypeLookupByClassType(val->class_name,
1364                                                          val->type_name));
1365     }
1366 
1367     /* Handle initialFlags, sessionFlags, and ALL-Flags */
1368     if (checking_defaults) {
1369         /* processing the defaults; do not modify anything */
1370     } else if (rwRecGetInitFlags(&val->rec) || rwRecGetRestFlags(&val->rec)) {
1371         if (IPPROTO_TCP == proto) {
1372             /* if either initial-flags or rest-flags is set, set
1373              * overall-flags to their combination */
1374             rwRecSetFlags(&val->rec, (rwRecGetInitFlags(&val->rec)
1375                                       | rwRecGetRestFlags(&val->rec)));
1376         } else {
1377             /* if flow is not TCP, do not record the initial-flags and
1378              * session-flags, and unset the EXPANDED flag. */
1379             rwRecSetInitFlags(&val->rec, 0);
1380             rwRecSetRestFlags(&val->rec, 0);
1381             tcp_state &= ~SK_TCPSTATE_EXPANDED;
1382         }
1383     } else {
1384         /* unset the EXPANDED bit */
1385         tcp_state &= ~SK_TCPSTATE_EXPANDED;
1386     }
1387 
1388     rwRecSetTcpState(&val->rec, tcp_state);
1389 
1390     return 0;
1391 
1392   PARSE_ERROR:
1393     rwAsciiGetFieldName(field_name, sizeof(field_name),
1394                         (rwrec_printable_fields_t)field_type[i]);
1395     if (checking_defaults) {
1396         skAppPrintErr("Error parsing default %s value '%s': %s",
1397                       field_name, cp, skStringParseStrerror(rv));
1398         return -1;
1399     }
1400     badLine("Invalid %s '%s': %s", field_name, cp, skStringParseStrerror(rv));
1401     return -1;
1402 }
1403 
1404 
1405 /*
1406  *  ok = processFile();
1407  *
1408  *    Read each line of text from the stream in the global 'curline'
1409  *    structure, create an rwRec from the fields on the line, and
1410  *    write the records to the global out_stream stream.
1411  *
1412  *    Return 0 on success or -1 on failure.
1413  */
1414 static int
processFile(void)1415 processFile(
1416     void)
1417 {
1418     static char line[RWTUC_LINE_BUFSIZE];
1419     parsed_values_t defaults;
1420     parsed_values_t currents;
1421     uint32_t *field_type = NULL;
1422     char **field_val = NULL;
1423     char *cp;
1424     char *ep;
1425     uint32_t field;
1426     int is_title = -1;
1427     int rv;
1428 
1429     /* read until end of file */
1430     while ((rv = skStreamGetLine(curline->stream, line, sizeof(line),
1431                                  &curline->lineno))
1432            != SKSTREAM_ERR_EOF)
1433     {
1434         if (bad_stream) {
1435             strncpy(curline->text, line, sizeof(curline->text));
1436         }
1437         switch (rv) {
1438           case SKSTREAM_OK:
1439             /* good, we got our line */
1440             break;
1441           case SKSTREAM_ERR_LONG_LINE:
1442             /* bad: line was longer than sizeof(line) */
1443             badLine("Input line too long");
1444             continue;
1445           default:
1446             /* unexpected error */
1447             skStreamPrintLastErr(curline->stream, rv, &skAppPrintErr);
1448             goto END;
1449         }
1450 
1451         /* initialize the field_type array either from the --fields
1452          * switch or based on the first line in the file. */
1453         if (is_title < 0) {
1454             /* fill in the defaults */
1455             is_title = determineFields(&field_type, &field_val,
1456                                        &defaults, line);
1457             if (is_title < 0) {
1458                 /* error */
1459                 return -1;
1460             }
1461             if (is_title > 0) {
1462                 /* goto next line */
1463                 continue;
1464             }
1465         }
1466 
1467         /* We have a line; process it */
1468         cp = line;
1469         field = 0;
1470         memcpy(&currents, &defaults, sizeof(parsed_values_t));
1471 
1472         /* break the line into separate fields */
1473         while (field < num_fields) {
1474             field_val[field] = cp;
1475             ++field;
1476 
1477             /* find end of current field */
1478             ep = strchr(cp, column_separator);
1479             if (NULL == ep) {
1480                 /* at end of line; break out of while() */
1481                 cp += strlen(cp);
1482                 break;
1483             } else {
1484                 *ep = '\0';
1485                 cp = ep + 1;
1486             }
1487         }
1488 
1489         /* check for extra fields at the end */
1490         if ((*cp != '\0') && (strlen(cp) != strspn(cp, RWTUC_WHITESPACE))) {
1491             badLine(("Too many fields on line:"
1492                      " text follows delimiter number %" PRIu32),
1493                     num_fields);
1494             goto NEXT_LINE;
1495         }
1496 
1497         /* check for too few fields */
1498         if (field != num_fields) {
1499             badLine(("Too few fields on line:"
1500                      " found %" PRIu32 " of %" PRIu32 " expected"),
1501                     field, num_fields);
1502             goto NEXT_LINE;
1503         }
1504 
1505         /* process fields */
1506         if (processFields(&currents, num_fields, field_type, field_val, 0)) {
1507             goto NEXT_LINE;
1508         }
1509 
1510         /* verify bytes */
1511         if (currents.bytes_equals_pkts) {
1512             rwRecSetBytes(&currents.rec, rwRecGetPkts(&currents.rec));
1513         }
1514 
1515         /* handle time */
1516         switch (currents.handle_time) {
1517           case CALC_STIME:
1518             rwRecSetStartTime(&currents.rec,
1519                               (currents.eTime
1520                                - rwRecGetElapsed(&currents.rec)));
1521             break;
1522 
1523           case CALC_ELAPSED:
1524             if (rwRecGetStartTime(&currents.rec) > currents.eTime) {
1525                 badLine("End time less than start time");
1526                 goto NEXT_LINE;
1527             }
1528             if (currents.eTime - rwRecGetStartTime(&currents.rec) > UINT32_MAX)
1529             {
1530                 /* FIXME: Clamp value to max instead of rejecting */
1531                 badLine("Computed duration too large");
1532                 goto NEXT_LINE;
1533             }
1534             rwRecSetElapsed(&currents.rec,
1535                             (currents.eTime-rwRecGetStartTime(&currents.rec)));
1536             break;
1537 
1538           case CALC_NONE:
1539             break;
1540         }
1541 
1542         /* output binary rwrec */
1543         rv = skStreamWriteRecord(out_stream, &currents.rec);
1544         if (rv) {
1545             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
1546             if (SKSTREAM_ERROR_IS_FATAL(rv)) {
1547                 return -1;
1548             }
1549         }
1550 
1551       NEXT_LINE:
1552         ; /* empty */
1553     } /* outer loop over lines  */
1554 
1555   END:
1556     if (field_type) {
1557         free(field_type);
1558     }
1559     if (field_val) {
1560         free(field_val);
1561     }
1562 
1563     return 0;
1564 }
1565 
1566 
main(int argc,char ** argv)1567 int main(int argc, char **argv)
1568 {
1569     char *fname;
1570     ssize_t rv = 0;
1571 
1572     appSetup(argc, argv);
1573 
1574     /* process the input file(s) */
1575     while ((rv = skOptionsCtxNextArgument(optctx, &fname)) == 0) {
1576         /* create an input stream and open text file */
1577         if ((rv = skStreamCreate(&curline->stream, SK_IO_READ,SK_CONTENT_TEXT))
1578             || (rv = skStreamBind(curline->stream, fname))
1579             || (rv = skStreamOpen(curline->stream)))
1580         {
1581             skStreamPrintLastErr(curline->stream, rv, &skAppPrintErr);
1582             skStreamDestroy(&curline->stream);
1583             rv = -1;
1584             break;
1585         }
1586         rv = processFile();
1587         skStreamDestroy(&curline->stream);
1588         if (rv != 0) {
1589             break;
1590         }
1591     }
1592 
1593     /* if everything went well, make certain there are headers in our
1594      * output */
1595     if (rv == 1) {
1596         rv = skStreamWriteSilkHeader(out_stream);
1597         if (rv) {
1598             if (rv == SKSTREAM_ERR_PREV_DATA) {
1599                 /* headers already printed */
1600                 rv = 0;
1601             } else {
1602                 skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
1603             }
1604         }
1605 
1606         if (bad_line_count && !verbose) {
1607             if (bad_stream) {
1608                 skAppPrintErr(("Could not parse %u line%s;"
1609                                " invalid input written to '%s'"),
1610                               bad_line_count,
1611                               ((1 == bad_line_count) ? "" : "s"),
1612                               skStreamGetPathname(bad_stream));
1613             } else {
1614                 skAppPrintErr(("Could not parse %u line%s;"
1615                                " try again with --%s or --%s for details"),
1616                               bad_line_count,
1617                               ((1 == bad_line_count) ? "" : "s"),
1618                               appOptions[OPT_STOP_ON_ERROR].name,
1619                               appOptions[OPT_VERBOSE].name);
1620             }
1621         }
1622     }
1623 
1624     return ((rv == -1) ? EXIT_FAILURE : EXIT_SUCCESS);
1625 }
1626 
1627 
1628 /*
1629 ** Local Variables:
1630 ** mode:c
1631 ** indent-tabs-mode:nil
1632 ** c-basic-offset:4
1633 ** End:
1634 */
1635