1 /*
2 ** Copyright (C) 2017-2020 by Carnegie Mellon University.
3 **
4 ** @OPENSOURCE_LICENSE_START@
5 ** See license information in ../../LICENSE.txt
6 ** @OPENSOURCE_LICENSE_END@
7 */
8 
9 /*
10  *  rwaggbagbuild.c
11  *
12  *    Read textual input and create an Aggregate Bag.
13  *
14  *  Mark Thomas
15  *  January 2017
16  *
17  */
18 #define AB_SETBAG  0
19 
20 #include <silk/silk.h>
21 
22 RCSIDENT("$SiLK: rwaggbagbuild.c ef14e54179be 2020-04-14 21:57:45Z mthomas $");
23 
24 #include <silk/rwascii.h>
25 #include <silk/skaggbag.h>
26 #include <silk/skcountry.h>
27 #include <silk/skipaddr.h>
28 #include <silk/sksite.h>
29 #include <silk/skstream.h>
30 #include <silk/skstringmap.h>
31 #include <silk/skvector.h>
32 #include <silk/utils.h>
33 #if AB_SETBAG
34 #include <silk/skbag.h>
35 #include <silk/skipset.h>
36 #endif  /* AB_SETBAG */
37 
38 /* LOCAL DEFINES AND TYPEDEFS */
39 
40 /* where to write --help output */
41 #define USAGE_FH stdout
42 
43 /* size to use for arrays that hold field IDs */
44 #define AGGBAGBUILD_ARRAY_SIZE      65536
45 
46 /* the longest input line to accept; lines longer than this size are
47  * ignored */
48 #define AGGBAGBUILD_LINE_BUFSIZE    2048
49 
50 /* the ident for the "ignored" field */
51 #define AGGBAGBUILD_FIELD_IGNORED   ((sk_stringmap_id_t)INT32_MAX)
52 
53 /* whitespace chars used in strspn(); list taken from isspace() */
54 #define AGGBAGBUILD_WHITESPACE      "\t\v\f\r "
55 
56 /* the default input type */
57 #define AGGBAGBUILD_DEFAULT_INPUT_TYPE  AGGBAGBUILD_INPUT_TEXT
58 
59 /* parsed_value_t is a structure to hold the unparsed value, an
60  * indication as to whether the value is active, and the parsed
61  * value. there is an array of these for all possible field
62  * identifiers */
63 typedef struct parsed_value_st {
64     const char     *pv_raw;
65     /* True if the field is part of the key or counter */
66     unsigned        pv_is_used  : 1;
67     /* True if the field was specified by --constant-field and its
68      * value only needs to be computed once */
69     unsigned        pv_is_const : 1;
70     /* True if the value of the field is fixed for this input file
71      * because either it was not mentioned in file's title line or
72      * because it was mentioned in --constant-field */
73     unsigned        pv_is_fixed : 1;
74     union parsed_value_v_un {
75         uint64_t        pv_int;
76         sktime_t        pv_time;
77         skipaddr_t      pv_ip;
78     }               pv;
79 } parsed_value_t;
80 
81 /* current input line */
82 typedef struct current_line_st {
83     /* input line (as read from input) */
84     char        text[AGGBAGBUILD_LINE_BUFSIZE];
85     /* input stream currently being processed */
86     skstream_t *stream;
87     /* line number in the 'stream' */
88     int         lineno;
89 } current_line_t;
90 
91 typedef enum input_type_en {
92     AGGBAGBUILD_INPUT_TEXT = 1
93 #if AB_SETBAG
94     ,AGGBAGBUILD_INPUT_IPSET,
95     AGGBAGBUILD_INPUT_BAG
96 #endif  /* #if AB_SETBAG */
97 } input_type_t;
98 
99 
100 /* LOCAL VARIABLES */
101 
102 /* fields in addition to those provided by rwascii */
103 static sk_stringmap_entry_t aggbagbuild_fields[] = {
104     {"ignore", AGGBAGBUILD_FIELD_IGNORED,   NULL, NULL},
105     SK_STRINGMAP_SENTINEL
106 };
107 
108 /* available types of input */
109 static sk_stringmap_entry_t input_types[] = {
110     {"text",    AGGBAGBUILD_INPUT_TEXT,     NULL, NULL},
111 #if AB_SETBAG
112     {"ipset",   AGGBAGBUILD_INPUT_IPSET,    NULL, NULL},
113     {"bag",     AGGBAGBUILD_INPUT_BAG,      NULL, NULL},
114 #endif  /* #if AB_SETBAG */
115     SK_STRINGMAP_SENTINEL
116 };
117 
118 /* where to send output, set by --output-path */
119 static skstream_t *out_stream = NULL;
120 
121 /* where to copy bad input lines, set by --bad-output-lines */
122 static skstream_t *bad_stream = NULL;
123 
124 /* number of lines that are bad */
125 static unsigned int bad_line_count = 0;
126 
127 /* whether to report parsing errors, set by --verbose */
128 static int verbose = 0;
129 
130 /* whether to halt on first error, set by --stop-on-error */
131 static int stop_on_error = 0;
132 
133 /* whether to always parse the first line as data, set by --no-titles */
134 static int no_titles = 0;
135 
136 /* available fields */
137 static sk_stringmap_t *field_map = NULL;
138 
139 /* the argument to the --fields switch */
140 static char *fields = NULL;
141 
142 /* the fields (columns) to parse in the order to parse them; each
143  * value is an ID from field_map, set by --fields */
144 static sk_vector_t *field_vec = NULL;
145 
146 /* each argument to the --constant-field switch; switch may be
147  * repeated; vector of char* */
148 static sk_vector_t *constant_field = NULL;
149 
150 /* fields that have a constant value for all inputs; vector of IDs */
151 static sk_vector_t *const_fields = NULL;
152 
153 /* fields that have been parsed; the index into this array an
154  * sk_aggbag_type_t type ID */
155 static parsed_value_t parsed_value[AGGBAGBUILD_ARRAY_SIZE];
156 
157 /* type of input */
158 static input_type_t input_type = AGGBAGBUILD_DEFAULT_INPUT_TYPE;
159 
160 /* string-map for parsing the input_type */
161 static sk_stringmap_t *input_type_map = NULL;
162 
163 /* character that separates input fields (the delimiter) */
164 static char column_separator = '|';
165 
166 /* for processing the input files */
167 static sk_options_ctx_t *optctx;
168 
169 /* current input line and stream from which it was read */
170 static current_line_t current_line;
171 
172 /* a pointer to the current input line */
173 static current_line_t *curline = &current_line;
174 
175 /* the aggbag to create */
176 static sk_aggbag_t *ab = NULL;
177 
178 /* options for writing the AggBag file */
179 static sk_aggbag_options_t ab_options;
180 
181 
182 /* OPTIONS SETUP */
183 
184 typedef enum {
185 #if AB_SETBAG
186     OPT_INPUT_TYPE,
187 #endif  /* #if AB_SETBAG */
188     OPT_FIELDS,
189     OPT_CONSTANT_FIELD,
190     OPT_COLUMN_SEPARATOR,
191     OPT_OUTPUT_PATH,
192     OPT_BAD_INPUT_LINES,
193     OPT_VERBOSE,
194     OPT_STOP_ON_ERROR,
195     OPT_NO_TITLES
196 } appOptionsEnum;
197 
198 
199 static struct option appOptions[] = {
200 #if AB_SETBAG
201     {"input-type",          REQUIRED_ARG, 0, OPT_INPUT_TYPE},
202 #endif  /* #if AB_SETBAG */
203     {"fields",              REQUIRED_ARG, 0, OPT_FIELDS},
204     {"constant-field",      REQUIRED_ARG, 0, OPT_CONSTANT_FIELD},
205     {"column-separator",    REQUIRED_ARG, 0, OPT_COLUMN_SEPARATOR},
206     {"output-path",         REQUIRED_ARG, 0, OPT_OUTPUT_PATH},
207     {"bad-input-lines",     REQUIRED_ARG, 0, OPT_BAD_INPUT_LINES},
208     {"verbose",             NO_ARG,       0, OPT_VERBOSE},
209     {"stop-on-error",       NO_ARG,       0, OPT_STOP_ON_ERROR},
210     {"no-titles",           NO_ARG,       0, OPT_NO_TITLES},
211     {0,0,0,0}               /* sentinel entry */
212 };
213 
214 static const char *appHelp[] = {
215 #if AB_SETBAG
216     ("Specify the type of input to read"),
217 #endif  /* #if AB_SETBAG */
218     NULL, /* generated dynamically */
219     ("Given an argument of FIELD=VALUE, add the extra\n"
220      "\tfield FIELD to each entry in the Aggregate Bag and give that field\n"
221      "\tthe specified value.  May be repeated to set multiple FIELDs"),
222     "Split input fields on this character. Def. '|'",
223     "Write the aggregate bag to this stream. Def. stdout",
224     ("Write each bad input line to this file or stream.\n"
225      "\tLines will have the file name and line number prepended. Def. none"),
226     ("Print an error message for each bad input line to the\n"
227      "\tstandard error. Def. Quietly ignore errors"),
228     ("Print an error message for a bad input line to stderr\n"
229      "\tand exit. Def. Quietly ignore errors and continue processing"),
230     ("Parse the first line as record values. Requires --fields.\n"
231      "\tDef. Skip first line if it appears to contain titles"),
232     (char *)NULL
233 };
234 
235 
236 
237 /* LOCAL FUNCTION PROTOTYPES */
238 
239 static int  appOptionsHandler(clientData cData, int opt_index, char *opt_arg);
240 static int  createStringmap(void);
241 #if AB_SETBAG
242 static int  parseInputType(const char *type_string);
243 #endif  /* #if AB_SETBAG */
244 static int  parseFieldList(const char *field_string, char **errmsg);
245 static int  parseConstantFieldValues(void);
246 static int  setAggBagFields(void);
247 static void badLine(const char *fmt, ...)  SK_CHECK_PRINTF(1, 2);
248 
249 
250 /* FUNCTION DEFINITIONS */
251 
252 /*
253  *  appUsageLong();
254  *
255  *    Print complete usage information to USAGE_FH.  Pass this
256  *    function to skOptionsSetUsageCallback(); skOptionsParse() will
257  *    call this funciton and then exit the program when the --help
258  *    option is given.
259  */
260 static void
appUsageLong(void)261 appUsageLong(
262     void)
263 {
264 #define USAGE_MSG                                                             \
265     ("[SWITCHES] [FILES]\n"                                                   \
266      "\tCreate a binary Aggregate Bag file from textual input and write it\n" \
267      "\tto the standard output or the specified --output-path.  The input\n"  \
268      "\tshould contain delimited field values. The names of the fields may\n" \
269      "\tbe specified in the --fields switch or the first line of the\n"       \
270      "\tinput.  At least one key and one counter field are required.\n")
271 
272     FILE *fh = USAGE_FH;
273     unsigned int i;
274 #if AB_SETBAG
275     unsigned int j;
276 #endif  /* #if AB_SETBAG */
277 
278     fprintf(fh, "%s %s", skAppName(), USAGE_MSG);
279     fprintf(fh, "\nSWITCHES:\n");
280     skOptionsDefaultUsage(fh);
281 
282     for (i = 0; appOptions[i].name; ++i) {
283         fprintf(fh, "--%s %s. ", appOptions[i].name,
284                 SK_OPTION_HAS_ARG(appOptions[i]));
285         switch (appOptions[i].val) {
286 #if AB_SETBAG
287           case OPT_INPUT_TYPE:
288             fprintf(fh, "%s. Def. %s\n\tChoices: %s",
289                     appHelp[i],
290                     skStringMapGetFirstName(input_type_map,
291                                             AGGBAGBUILD_DEFAULT_INPUT_TYPE),
292                     input_types[0].name);
293             for (j = 1; input_types[j].name; ++j) {
294                 fprintf(fh, ", %s", input_types[j].name);
295             }
296             fprintf(fh, "\n");
297             break;
298 #endif  /* #if AB_SETBAG */
299           case OPT_FIELDS:
300             fprintf(fh, ("Parse the input into this comma-separated set of"
301                          " fields and\n\tadd to the Aggregate Bag.\n"));
302             skStringMapPrintUsage(field_map, fh, 4);
303             break;
304           case OPT_OUTPUT_PATH:
305             /* include the help for --notes and --invocation-strip
306              * after --output-path */
307             fprintf(fh, "%s\n", appHelp[i]);
308             skAggBagOptionsUsage(fh);
309             break;
310           default:
311             /* Simple static help text from the appHelp array */
312             fprintf(fh, "%s\n", appHelp[i]);
313             break;
314         }
315     }
316 
317     skOptionsCtxOptionsUsage(optctx, fh);
318     sksiteOptionsUsage(fh);
319 }
320 
321 
322 /*
323  *  appTeardown()
324  *
325  *    Teardown all modules, close all files, and tidy up all
326  *    application state.
327  *
328  *    This function is idempotent.
329  */
330 static void
appTeardown(void)331 appTeardown(
332     void)
333 {
334     static int teardownFlag = 0;
335     int rv;
336 
337     if (teardownFlag) {
338         return;
339     }
340     teardownFlag = 1;
341 
342     if (out_stream) {
343         rv = skStreamClose(out_stream);
344         if (rv && rv != SKSTREAM_ERR_NOT_OPEN) {
345             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
346         }
347         skStreamDestroy(&out_stream);
348     }
349 
350     if (bad_stream) {
351         rv = skStreamClose(bad_stream);
352         if (SKSTREAM_OK == rv) {
353             if (0 == bad_line_count && skStreamIsSeekable(bad_stream)) {
354                 unlink(skStreamGetPathname(bad_stream));
355             }
356         } else if (rv != SKSTREAM_ERR_NOT_OPEN) {
357             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
358         }
359         skStreamDestroy(&bad_stream);
360         bad_stream = NULL;
361     }
362 
363     skVectorDestroy(constant_field);
364     constant_field = NULL;
365     skVectorDestroy(const_fields);
366     const_fields = NULL;
367     skVectorDestroy(field_vec);
368     field_vec = NULL;
369 
370     (void)skStringMapDestroy(input_type_map);
371     input_type_map = NULL;
372     (void)skStringMapDestroy(field_map);
373     field_map = NULL;
374 
375     skAggBagOptionsTeardown();
376     skOptionsCtxDestroy(&optctx);
377     skAppUnregister();
378 }
379 
380 
381 /*
382  *  appSetup(argc, argv);
383  *
384  *    Perform all the setup for this application include setting up
385  *    required modules, parsing options, etc.  This function should be
386  *    passed the same arguments that were passed into main().
387  *
388  *    Returns to the caller if all setup succeeds.  If anything fails,
389  *    this function will cause the application to exit with a FAILURE
390  *    exit status.
391  */
392 static void
appSetup(int argc,char ** argv)393 appSetup(
394     int                 argc,
395     char              **argv)
396 {
397     SILK_FEATURES_DEFINE_STRUCT(features);
398     unsigned int optctx_flags;
399     sk_stringmap_status_t sm_err;
400     int rv;
401 
402     /* verify same number of options and help strings */
403     assert((sizeof(appHelp)/sizeof(char *)) ==
404            (sizeof(appOptions)/sizeof(struct option)));
405 
406     /* register the application */
407     skAppRegister(argv[0]);
408     skAppVerifyFeatures(&features, NULL);
409     skOptionsSetUsageCallback(&appUsageLong);
410 
411     /* initialize globals */
412     memset(parsed_value, 0, sizeof(parsed_value));
413     memset(&ab_options, 0, sizeof(sk_aggbag_options_t));
414     ab_options.argc = argc;
415     ab_options.argv = argv;
416 
417     optctx_flags = (SK_OPTIONS_CTX_ALLOW_STDIN | SK_OPTIONS_CTX_XARGS
418                     | SK_OPTIONS_CTX_INPUT_BINARY);
419 
420     /* register the options */
421     if (skOptionsCtxCreate(&optctx, optctx_flags)
422         || skOptionsCtxOptionsRegister(optctx)
423         || skOptionsRegister(appOptions, &appOptionsHandler, NULL)
424         || skAggBagOptionsRegister(&ab_options)
425         || sksiteOptionsRegister(SK_SITE_FLAG_CONFIG_FILE))
426     {
427         skAppPrintErr("Unable to register options");
428         exit(EXIT_FAILURE);
429     }
430 
431     /* register the teardown handler */
432     if (atexit(appTeardown) < 0) {
433         skAppPrintErr("Unable to register appTeardown() with atexit()");
434         appTeardown();
435         exit(EXIT_FAILURE);
436     }
437 
438     /* initialize the string-map of field identifiers, and add the
439      * locally defined fields. */
440     if (createStringmap()) {
441         skAppPrintErr("Unable to setup fields string map");
442         exit(EXIT_FAILURE);
443     }
444 
445     /* initialize the string-map of input-types */
446     if ((sm_err = skStringMapCreate(&input_type_map))
447         || (sm_err = skStringMapAddEntries(input_type_map, -1, input_types)))
448     {
449         skAppPrintErr("Unable to input-type string map");
450         exit(EXIT_FAILURE);
451     }
452 
453     /* parse the options */
454     rv = skOptionsCtxOptionsParse(optctx, argc, argv);
455     if (rv < 0) {
456         skAppUsage();
457     }
458 
459     /* cannot specify --no-titles unless --fields is given */
460     if (no_titles && !fields) {
461         skAppPrintErr("May only use --%s when --%s is specified",
462                       appOptions[OPT_NO_TITLES].name,
463                       appOptions[OPT_FIELDS].name);
464         skAppUsage();
465     }
466 
467     /* try to load site config file; if it fails, we will not be able
468      * to resolve flowtype and sensor from input file names */
469     sksiteConfigure(0);
470 
471     /* create the aggregate bag */
472     if (skAggBagCreate(&ab)) {
473         exit(EXIT_FAILURE);
474     }
475     skAggBagOptionsBind(ab, &ab_options);
476 
477     /* parse each of the constant field values */
478     parseConstantFieldValues();
479 
480     /* parse the --fields switch if given */
481     if (fields) {
482         char *errmsg;
483         if (parseFieldList(fields, &errmsg)) {
484             skAppPrintErr("Invalid %s: %s",
485                           appOptions[OPT_FIELDS].name, errmsg);
486             exit(EXIT_FAILURE);
487         }
488         if (setAggBagFields()) {
489             exit(EXIT_FAILURE);
490         }
491     }
492 
493     /* use "stdout" as default output path */
494     if (NULL == out_stream) {
495         if ((rv = skStreamCreate(&out_stream, SK_IO_WRITE, SK_CONTENT_SILK))
496             || (rv = skStreamBind(out_stream, "stdout")))
497         {
498             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
499             skAppPrintErr("Could not create output stream");
500             exit(EXIT_FAILURE);
501         }
502     }
503 
504     /* open bad output, but first ensure it is not the same as the
505      * record output */
506     if (bad_stream) {
507         if (0 == strcmp(skStreamGetPathname(out_stream),
508                         skStreamGetPathname(bad_stream)))
509         {
510             skAppPrintErr("Cannot use same stream for bad input and records");
511             exit(EXIT_FAILURE);
512         }
513         rv = skStreamOpen(bad_stream);
514         if (rv) {
515             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
516             exit(EXIT_FAILURE);
517         }
518     }
519 
520     /* open output */
521     rv = skStreamOpen(out_stream);
522     if (rv) {
523         skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
524         exit(EXIT_FAILURE);
525     }
526 
527     return;  /* OK */
528 }
529 
530 
531 /*
532  *  status = appOptionsHandler(cData, opt_index, opt_arg);
533  *
534  *    This function is passed to skOptionsRegister(); it will be called
535  *    by skOptionsParse() for each user-specified switch that the
536  *    application has registered; it should handle the switch as
537  *    required---typically by setting global variables---and return 1
538  *    if the switch processing failed or 0 if it succeeded.  Returning
539  *    a non-zero from from the handler causes skOptionsParse() to return
540  *    a negative value.
541  *
542  *    The clientData in 'cData' is typically ignored; 'opt_index' is
543  *    the index number that was specified as the last value for each
544  *    struct option in appOptions[]; 'opt_arg' is the user's argument
545  *    to the switch for options that have a REQUIRED_ARG or an
546  *    OPTIONAL_ARG.
547  */
548 static int
appOptionsHandler(clientData cData,int opt_index,char * opt_arg)549 appOptionsHandler(
550     clientData          cData,
551     int                 opt_index,
552     char               *opt_arg)
553 {
554     const char *char_name;
555     int rv;
556 
557     SK_UNUSED_PARAM(cData);
558 
559     switch ((appOptionsEnum)opt_index) {
560 #if AB_SETBAG
561       case OPT_INPUT_TYPE:
562         if (parseInputType(opt_arg)) {
563             return 1;
564         }
565         break;
566 #endif  /* #if AB_SETBAG */
567 
568       case OPT_FIELDS:
569         if (fields) {
570             skAppPrintErr("Invalid %s: Switch used multiple times",
571                           appOptions[opt_index].name);
572             return 1;
573         }
574         fields = opt_arg;
575         break;
576 
577       case OPT_CONSTANT_FIELD:
578         if (NULL == constant_field) {
579             constant_field = skVectorNew(sizeof(char *));
580             if (NULL == constant_field) {
581                 skAppPrintOutOfMemory("vector");
582                 return 1;
583             }
584         }
585         if (skVectorAppendValue(constant_field, &opt_arg)) {
586             skAppPrintOutOfMemory("vector entry");
587             return 1;
588         }
589         return 0;
590 
591       case OPT_COLUMN_SEPARATOR:
592         switch (opt_arg[0]) {
593           case '#':
594             char_name = "comment start('#')";
595             break;
596           case '\n':
597             char_name = "newline";
598             break;
599           case '\r':
600             char_name = "carriage return";
601             break;
602           case '\0':
603             char_name = "end-of-string";
604             break;
605           default:
606             char_name = NULL;
607             break;
608         }
609         if (char_name) {
610             skAppPrintErr("Invalid %s: May not be the %s character",
611                           appOptions[opt_index].name, char_name);
612             return 1;
613         }
614         column_separator = opt_arg[0];
615         break;
616 
617       case OPT_OUTPUT_PATH:
618         if (out_stream) {
619             skAppPrintErr("Invalid %s: Switch used multiple times",
620                           appOptions[opt_index].name);
621             return 1;
622         }
623         if ((rv = skStreamCreate(&out_stream, SK_IO_WRITE, SK_CONTENT_SILK))
624             || (rv = skStreamBind(out_stream, opt_arg)))
625         {
626             skStreamPrintLastErr(out_stream, rv, &skAppPrintErr);
627             return 1;
628         }
629         break;
630 
631       case OPT_BAD_INPUT_LINES:
632         if (bad_stream) {
633             skAppPrintErr("Invalid %s: Switch used multiple times",
634                           appOptions[opt_index].name);
635             return 1;
636         }
637         if ((rv = skStreamCreate(&bad_stream, SK_IO_WRITE, SK_CONTENT_TEXT))
638             || (rv = skStreamBind(bad_stream, opt_arg)))
639         {
640             skStreamPrintLastErr(bad_stream, rv, &skAppPrintErr);
641             return 1;
642         }
643         break;
644 
645       case OPT_VERBOSE:
646         verbose = 1;
647         break;
648 
649       case OPT_STOP_ON_ERROR:
650         stop_on_error = 1;
651         break;
652 
653       case OPT_NO_TITLES:
654         no_titles = 1;
655         break;
656     }
657 
658     return 0;  /* OK */
659 }
660 
661 
662 /*
663  *  ok = createStringmap();
664  *
665  *    Create the global 'field_map'.  Return 0 on success, or -1 on
666  *    failure.
667  */
668 static int
createStringmap(void)669 createStringmap(
670     void)
671 {
672     sk_stringmap_status_t sm_err;
673     sk_stringmap_entry_t sm_entry;
674     sk_aggbag_type_iter_t iter;
675     sk_aggbag_type_t type;
676     unsigned int key_counter[] = {SK_AGGBAG_KEY, SK_AGGBAG_COUNTER};
677     unsigned int i;
678 
679     memset(&sm_entry, 0, sizeof(sm_entry));
680 
681     sm_err = skStringMapCreate(&field_map);
682     if (sm_err) {
683         skAppPrintErr("Unable to create string map");
684         return -1;
685     }
686 
687     for (i = 0; i < sizeof(key_counter)/sizeof(key_counter[0]); ++i) {
688         skAggBagFieldTypeIteratorBind(&iter, key_counter[i]);
689         while ((sm_entry.name = skAggBagFieldTypeIteratorNext(&iter, &type))
690                != NULL)
691         {
692             sm_entry.id = type;
693             sm_err = skStringMapAddEntries(field_map, 1, &sm_entry);
694             if (sm_err) {
695                 skAppPrintErr("Unable to add %s field named '%s': %s",
696                               ((SK_AGGBAG_KEY == key_counter[i])
697                                ? "key" : "counter"),
698                               sm_entry.name, skStringMapStrerror(sm_err));
699                 return -1;
700             }
701             if (SKAGGBAG_FIELD_ANY_COUNTRY == type) {
702                 break;
703             }
704         }
705     }
706 
707 #ifndef NDEBUG
708     {
709         sk_stringmap_iter_t *sm_iter = NULL;
710 
711         skStringMapGetByID(field_map, AGGBAGBUILD_FIELD_IGNORED, &sm_iter);
712         if (0 != skStringMapIterCountMatches(sm_iter)) {
713             skStringMapIterDestroy(sm_iter);
714             skAbort();
715         }
716         skStringMapIterDestroy(sm_iter);
717     }
718 #endif  /* NDEBUG */
719 
720     sm_err = skStringMapAddEntries(field_map, -1, aggbagbuild_fields);
721     if (sm_err) {
722         skAppPrintErr("Unable to add fields: %s", skStringMapStrerror(sm_err));
723         return -1;
724     }
725 
726     return 0;
727 }
728 
729 
730 /*
731  *  status = parseFieldList(fields_string, &errmsg);
732  *
733  *    Parse the user's argument to the --fields switch or from the
734  *    first line of the input and fill the global 'field_vec' vector
735  *    with the field IDs.  Return 0 on success; -1 on failure.
736  */
737 static int
parseFieldList(const char * field_string,char ** errmsg)738 parseFieldList(
739     const char         *field_string,
740     char              **errmsg)
741 {
742     static char buf[256];
743     BITMAP_DECLARE(field_dup, AGGBAGBUILD_ARRAY_SIZE);
744     sk_stringmap_iter_t *iter = NULL;
745     sk_stringmap_entry_t *entry;
746     int rv = -1;
747 
748     /* parse the fields; duplicate 'ignore' fields are okay, but any
749      * other duplcate is an error */
750     if (skStringMapParse(field_map, field_string, SKSTRINGMAP_DUPES_KEEP,
751                          &iter, errmsg))
752     {
753         goto END;
754     }
755 
756     /* check for duplicate fields */
757     BITMAP_INIT(field_dup);
758     while (skStringMapIterNext(iter, &entry, NULL) == SK_ITERATOR_OK) {
759         if (AGGBAGBUILD_FIELD_IGNORED != entry->id) {
760             if (BITMAP_GETBIT(field_dup, entry->id)) {
761                 snprintf(buf, sizeof(buf), "Duplicate name '%s'", entry->name);
762                 *errmsg = buf;
763                 goto END;
764             }
765             BITMAP_SETBIT(field_dup, entry->id);
766         }
767     }
768 
769     /* clear or create the vector as necessary */
770     if (field_vec) {
771         skVectorClear(field_vec);
772     } else {
773         field_vec = skVectorNew(sizeof(uint32_t));
774         if (NULL == field_vec) {
775             skAppPrintOutOfMemory("vector");
776             goto END;
777         }
778     }
779 
780     /* fill the vector */
781     skStringMapIterReset(iter);
782     while (skStringMapIterNext(iter, &entry, NULL) == SK_ITERATOR_OK) {
783         if (skVectorAppendValue(field_vec, &entry->id)) {
784             skAppPrintOutOfMemory("vector element");
785             goto END;
786         }
787     }
788 
789     rv = 0;
790 
791   END:
792     skStringMapIterDestroy(iter);
793     return rv;
794 }
795 
796 
797 #if AB_SETBAG
798 /*
799  *    Parse the input-type string specified in 'type_string' and
800  *    update the globlal 'input_type' with the type.
801  */
802 static int
parseInputType(const char * type_string)803 parseInputType(
804     const char         *type_string)
805 {
806     sk_stringmap_status_t sm_err = SKSTRINGMAP_OK;
807     sk_stringmap_entry_t *sm_entry;
808 
809     sm_err = skStringMapGetByName(input_type_map, type_string, &sm_entry);
810     if (sm_err) {
811         skAppPrintErr("Invalid %s '%s': %s",
812                       appOptions[OPT_INPUT_TYPE].name, type_string,
813                       skStringMapStrerror(sm_err));
814         goto END;
815     }
816     input_type = sm_entry->id;
817 
818   END:
819     return (sm_err != SKSTRINGMAP_OK);
820 }
821 #endif  /* #if AB_SETBAG */
822 
823 
824 /*
825  *    If invalid input lines are being written to a stream, write the
826  *    text in 'curline', preceeded by the input file's name and line
827  *    number.
828  *
829  *    If verbose output or stop-on-error is set, format the error
830  *    message given by the arguments and print an error message.  The
831  *    error message includes the current input file and line number.
832  */
833 static void
badLine(const char * fmt,...)834 badLine(
835     const char         *fmt,
836     ...)
837 {
838     char errbuf[2 * PATH_MAX];
839     va_list ap;
840 
841     ++bad_line_count;
842 
843     va_start(ap, fmt);
844     if (bad_stream) {
845         skStreamPrint(bad_stream, "%s:%d:%s\n",
846                       skStreamGetPathname(curline->stream),
847                       curline->lineno, curline->text);
848     }
849     if (verbose || stop_on_error) {
850         vsnprintf(errbuf, sizeof(errbuf), fmt, ap);
851         skAppPrintErr("%s:%d: %s",
852                       skStreamGetPathname(curline->stream), curline->lineno,
853                       errbuf);
854         if (stop_on_error) {
855             va_end(ap);
856             exit(EXIT_FAILURE);
857         }
858     }
859     va_end(ap);
860 }
861 
862 
863 /*
864  *    Parse the string in 'str_value' which is a value for the field
865  *    'id' and set the appropriate entry in the global 'parsed_value'
866  *    array.  The 'is_const_field' parameter is used in error
867  *    reporting.  Report an error message and return -1 if parsing
868  *    fails.
869  */
870 static int
parseSingleField(const char * str_value,uint32_t id,int is_const_field)871 parseSingleField(
872     const char         *str_value,
873     uint32_t            id,
874     int                 is_const_field)
875 {
876     parsed_value_t *pv;
877     sktime_t tmp_time;
878     uint8_t tcp_flags;
879     int rv;
880 
881     assert(id < AGGBAGBUILD_ARRAY_SIZE);
882     pv = &parsed_value[id];
883 
884     assert(1 == pv->pv_is_used);
885     switch (id) {
886       case SKAGGBAG_FIELD_RECORDS:
887       case SKAGGBAG_FIELD_SUM_BYTES:
888       case SKAGGBAG_FIELD_SUM_PACKETS:
889       case SKAGGBAG_FIELD_SUM_ELAPSED:
890       case SKAGGBAG_FIELD_PACKETS:
891       case SKAGGBAG_FIELD_BYTES:
892       case SKAGGBAG_FIELD_ELAPSED:
893       case SKAGGBAG_FIELD_CUSTOM_KEY:
894       case SKAGGBAG_FIELD_CUSTOM_COUNTER:
895         if (NULL == str_value) {
896             pv->pv.pv_int = 0;
897             break;
898         }
899         rv = skStringParseUint64(&pv->pv.pv_int, str_value, 0, UINT64_MAX);
900         if (rv) {
901             goto PARSE_ERROR;
902         }
903         break;
904 
905       case SKAGGBAG_FIELD_SPORT:
906       case SKAGGBAG_FIELD_DPORT:
907       case SKAGGBAG_FIELD_ANY_PORT:
908       case SKAGGBAG_FIELD_INPUT:
909       case SKAGGBAG_FIELD_OUTPUT:
910       case SKAGGBAG_FIELD_ANY_SNMP:
911       case SKAGGBAG_FIELD_APPLICATION:
912         if (NULL == str_value) {
913             pv->pv.pv_int = 0;
914             break;
915         }
916         rv = skStringParseUint64(&pv->pv.pv_int, str_value, 0, UINT16_MAX);
917         if (rv) {
918             goto PARSE_ERROR;
919         }
920         break;
921 
922       case SKAGGBAG_FIELD_PROTO:
923       case SKAGGBAG_FIELD_ICMP_TYPE:
924       case SKAGGBAG_FIELD_ICMP_CODE:
925         if (NULL == str_value) {
926             pv->pv.pv_int = 0;
927             break;
928         }
929         rv = skStringParseUint64(&pv->pv.pv_int, str_value, 0, UINT8_MAX);
930         if (rv) {
931             goto PARSE_ERROR;
932         }
933         break;
934 
935       case SKAGGBAG_FIELD_SIPv4:
936       case SKAGGBAG_FIELD_DIPv4:
937       case SKAGGBAG_FIELD_NHIPv4:
938       case SKAGGBAG_FIELD_ANY_IPv4:
939         if (NULL == str_value) {
940             skipaddrClear(&pv->pv.pv_ip);
941             break;
942         }
943         rv = skStringParseIP(&pv->pv.pv_ip, str_value);
944         if (rv) {
945             goto PARSE_ERROR;
946         }
947 #if SK_ENABLE_IPV6
948         if (skipaddrIsV6(&pv->pv.pv_ip)
949             && skipaddrV6toV4(&pv->pv.pv_ip, &pv->pv.pv_ip))
950         {
951             /* FIXME: Need to produce some error code */
952         }
953 #endif  /* SK_ENABLE_IPV6 */
954         break;
955 
956       case SKAGGBAG_FIELD_SIPv6:
957       case SKAGGBAG_FIELD_DIPv6:
958       case SKAGGBAG_FIELD_NHIPv6:
959       case SKAGGBAG_FIELD_ANY_IPv6:
960         if (NULL == str_value) {
961             skipaddrClear(&pv->pv.pv_ip);
962             skipaddrSetVersion(&pv->pv.pv_ip, 6);
963             break;
964         }
965         rv = skStringParseIP(&pv->pv.pv_ip, str_value);
966         if (rv) {
967             goto PARSE_ERROR;
968         }
969 #if SK_ENABLE_IPV6
970         if (!skipaddrIsV6(&pv->pv.pv_ip)) {
971             skipaddrV4toV6(&pv->pv.pv_ip, &pv->pv.pv_ip);
972         }
973 #endif  /* SK_ENABLE_IPV6 */
974         break;
975 
976       case SKAGGBAG_FIELD_STARTTIME:
977       case SKAGGBAG_FIELD_ENDTIME:
978       case SKAGGBAG_FIELD_ANY_TIME:
979         if (NULL == str_value) {
980             pv->pv.pv_int = 0;
981             break;
982         }
983         rv = skStringParseDatetime(&tmp_time, str_value, NULL);
984         if (rv) {
985             goto PARSE_ERROR;
986         }
987         pv->pv.pv_int = sktimeGetSeconds(tmp_time);
988         break;
989 
990       case SKAGGBAG_FIELD_FLAGS:
991       case SKAGGBAG_FIELD_INIT_FLAGS:
992       case SKAGGBAG_FIELD_REST_FLAGS:
993         if (NULL == str_value) {
994             pv->pv.pv_int = 0;
995             break;
996         }
997         rv = skStringParseTCPFlags(&tcp_flags, str_value);
998         if (rv) {
999             goto PARSE_ERROR;
1000         }
1001         break;
1002 
1003       case SKAGGBAG_FIELD_TCP_STATE:
1004         if (NULL == str_value) {
1005             pv->pv.pv_int = 0;
1006             break;
1007         }
1008         rv = skStringParseTCPState(&tcp_flags, str_value);
1009         if (rv) {
1010             goto PARSE_ERROR;
1011         }
1012         break;
1013 
1014       case SKAGGBAG_FIELD_SID:
1015         if (NULL == str_value) {
1016             pv->pv.pv_int = SK_INVALID_SENSOR;
1017             break;
1018         }
1019         if (isdigit((int)*str_value)) {
1020             rv = skStringParseUint64(&pv->pv.pv_int, str_value, 0,
1021                                      SK_INVALID_SENSOR-1);
1022             if (rv) {
1023                 goto PARSE_ERROR;
1024             }
1025         } else {
1026             pv->pv.pv_int = sksiteSensorLookup(str_value);
1027         }
1028         break;
1029 
1030       case SKAGGBAG_FIELD_FTYPE_CLASS:
1031         if (NULL == str_value) {
1032             pv->pv.pv_int = SK_INVALID_FLOWTYPE;
1033             break;
1034         }
1035         pv->pv.pv_int = sksiteClassLookup(str_value);
1036         break;
1037 
1038       case SKAGGBAG_FIELD_FTYPE_TYPE:
1039         if (NULL == str_value) {
1040             pv->pv.pv_int = SK_INVALID_FLOWTYPE;
1041             break;
1042         }
1043         pv->pv.pv_int = (sksiteFlowtypeLookupByClassIDType(
1044                              parsed_value[SKAGGBAG_FIELD_FTYPE_CLASS].pv.pv_int,
1045                              str_value));
1046         break;
1047 
1048       case SKAGGBAG_FIELD_SIP_COUNTRY:
1049       case SKAGGBAG_FIELD_DIP_COUNTRY:
1050       case SKAGGBAG_FIELD_ANY_COUNTRY:
1051         if (NULL == str_value) {
1052             pv->pv.pv_int = SK_COUNTRYCODE_INVALID;
1053             break;
1054         }
1055         pv->pv.pv_int = skCountryNameToCode(str_value);
1056         break;
1057 
1058       default:
1059         break;
1060     }
1061 
1062     return 0;
1063 
1064   PARSE_ERROR:
1065     if (is_const_field) {
1066         skAppPrintErr("Invalid %s '%s=%s': %s",
1067                       appOptions[OPT_CONSTANT_FIELD].name,
1068                       skAggBagFieldTypeGetName((sk_aggbag_type_t)id),
1069                       str_value, skStringParseStrerror(rv));
1070     } else {
1071         badLine("Invalid %s '%s': %s",
1072                 skAggBagFieldTypeGetName((sk_aggbag_type_t)id), str_value,
1073                 skStringParseStrerror(rv));
1074     }
1075     return -1;
1076 }
1077 
1078 
1079 /*
1080  *    Parse all the NAME=VALUE arguments to the --constant-field
1081  *    switch (the switch may be repeated) which are specified in the
1082  *    'constant_field' vector.  Set the appropriate field(s) in the
1083  *    global 'parsed_value' array to the value(s) and update the
1084  *    'const_fields' vector with the numeric IDs of those fields.
1085  *
1086  *    Return 0 on success or if no constant fields were specified.
1087  *    Return -1 on failure.
1088  */
1089 static int
parseConstantFieldValues(void)1090 parseConstantFieldValues(
1091     void)
1092 {
1093     sk_stringmap_entry_t *sm_entry;
1094     sk_stringmap_status_t sm_err;
1095     parsed_value_t *pv;
1096     char *argument;
1097     char *cp;
1098     char *eq;
1099     size_t i;
1100 
1101     if (NULL == constant_field) {
1102         return 0;
1103     }
1104 
1105     const_fields = skVectorNew(sizeof(uint32_t));
1106     if (NULL == const_fields) {
1107         skAppPrintOutOfMemory("vector");
1108         return -1;
1109     }
1110 
1111     /* parse each of the NAME=VALUE arguments */
1112     for (i = 0; 0 == skVectorGetValue(&argument, constant_field, i); ++i) {
1113         /* find the '=' */
1114         eq = strchr(argument, '=');
1115         if (NULL == eq) {
1116             skAppPrintErr("Invalid %s '%s': Unable to find '=' character",
1117                           appOptions[OPT_CONSTANT_FIELD].name, argument);
1118             return -1;
1119         }
1120 
1121         /* ensure a value is given */
1122         cp = eq + 1;
1123         while (*cp && isspace((int)*cp)) {
1124             ++cp;
1125         }
1126         if ('\0' == *cp) {
1127             skAppPrintErr("Invalid %s '%s': No value specified for field",
1128                           appOptions[OPT_CONSTANT_FIELD].name, argument);
1129             return -1;
1130         }
1131 
1132         /* split into name and value */
1133         *eq = '\0';
1134         cp = eq + 1;
1135 
1136         /* find the field with that name */
1137         sm_err = skStringMapGetByName(field_map, argument, &sm_entry);
1138         if (sm_err) {
1139             skAppPrintErr("Invalid %s: Unable to find a field named '%s': %s",
1140                           appOptions[OPT_CONSTANT_FIELD].name, argument,
1141                           skStringMapStrerror(sm_err));
1142             return -1;
1143         }
1144 
1145         /* ensure the field is ok to use: not ignore, not a duplicate */
1146         if (AGGBAGBUILD_FIELD_IGNORED == sm_entry->id) {
1147             skAppPrintErr("Invalid %s: May not set a default value for '%s'",
1148                           appOptions[OPT_CONSTANT_FIELD].name, sm_entry->name);
1149             return -1;
1150         }
1151         assert(sm_entry->id < AGGBAGBUILD_ARRAY_SIZE);
1152         pv = &parsed_value[sm_entry->id];
1153         if (pv->pv_is_used) {
1154             skAppPrintErr(
1155                 "Invalid %s: A constant value for '%s' is already set",
1156                 appOptions[OPT_CONSTANT_FIELD].name, sm_entry->name);
1157             return -1;
1158         }
1159 
1160         pv->pv_is_used = 1;
1161         if (parseSingleField(cp, sm_entry->id, 1)) {
1162             return -1;
1163         }
1164         pv->pv_is_const = 1;
1165         pv->pv_is_fixed = 1;
1166 
1167         if (skVectorAppendValue(const_fields, &sm_entry->id)) {
1168             skAppPrintOutOfMemory("vector element");
1169             return -1;
1170         }
1171     }
1172 
1173     return 0;
1174 }
1175 
1176 
1177 /*
1178  *    Use the values in 'field_vec' and 'const_fields' to determine
1179  *    fields to parse and the key and counter settings for the AggBag
1180  *    file.
1181  *
1182  *    The function requires that 'field_vec' contains a list of field
1183  *    IDs that were determined from the --fields switch, the first
1184  *    line of a text file, or the key and counter of a Bag file.
1185  *
1186  *    Constant fields---specified in the 'const_fields' vector---are
1187  *    also added to key and/or counter fields.  If a constant field
1188  *    matches a value in 'field_vec', the value in 'field_vec' is
1189  *    changed to AGGBAGBUILD_FIELD_IGNORED to ignore the field.
1190  *
1191  *    The function ensures at least one key and one counter field are
1192  *    specified.  For IPset and Bag inputs, additional checks are
1193  *    performed.
1194  *
1195  *    When this function returns, the number of entries in the
1196  *    'field_vec' vector represents the number of textual columns in
1197  *    the input.
1198  *
1199  *    Return 0 on success and -1 on error.
1200  */
1201 static int
setAggBagFields(void)1202 setAggBagFields(
1203     void)
1204 {
1205     sk_aggbag_type_iter_t iter;
1206     sk_aggbag_type_t field_type;
1207     sk_vector_t *key_vec;
1208     sk_vector_t *counter_vec;
1209     sk_aggbag_type_t *id_array;
1210     unsigned int id_count;
1211     sk_bitmap_t *key_bitmap;
1212     sk_bitmap_t *counter_bitmap;
1213     size_t missing_fields;
1214     sk_aggbag_type_t t;
1215     parsed_value_t *pv;
1216     int have_type;
1217     uint32_t id;
1218     size_t i;
1219     unsigned int j;
1220 
1221     assert(field_vec);
1222 
1223     if (AGGBAGBUILD_INPUT_TEXT == input_type) {
1224         /* nothing to check for yet */
1225 #if AB_SETBAG
1226     } else if (AGGBAGBUILD_INPUT_IPSET == input_type) {
1227         /* for IPset input files, the field list must have a single
1228          * field that is either an IP type or ignore */
1229         if (skVectorGetCount(field_vec) != 1) {
1230             skAppPrintErr(
1231                 "When using --%s=%s, the --%s switch accepts exactly 1 field",
1232                 appOptions[OPT_INPUT_TYPE].name,
1233                 skStringMapGetFirstName(input_type_map, input_type),
1234                 appOptions[OPT_FIELDS].name);
1235             return -1;
1236         }
1237         skVectorGetValue(&id, field_vec, 0);
1238         switch (id) {
1239           case AGGBAGBUILD_FIELD_IGNORED:
1240             break;
1241           case SKAGGBAG_FIELD_SIPv4:
1242           case SKAGGBAG_FIELD_DIPv4:
1243           case SKAGGBAG_FIELD_NHIPv4:
1244           case SKAGGBAG_FIELD_ANY_IPv4:
1245           case SKAGGBAG_FIELD_SIPv6:
1246           case SKAGGBAG_FIELD_DIPv6:
1247           case SKAGGBAG_FIELD_NHIPv6:
1248           case SKAGGBAG_FIELD_ANY_IPv6:
1249             break;
1250           default:
1251             skAppPrintErr(
1252                 "When using --%s=%s, the --%s switch must be an IP type or %s",
1253                 appOptions[OPT_INPUT_TYPE].name,
1254                 skStringMapGetFirstName(input_type_map, input_type),
1255                 appOptions[OPT_FIELDS].name,
1256                 skStringMapGetFirstName(field_map, AGGBAGBUILD_FIELD_IGNORED));
1257             return -1;
1258         }
1259     } else if (AGGBAGBUILD_INPUT_BAG == input_type) {
1260         /* for Bag input files, the field list must have exactly two
1261          * fields */
1262         if (skVectorGetCount(field_vec) != 2) {
1263             skAppPrintErr(
1264                 "When using --%s=%s, the --%s switch accepts exactly 2 fields",
1265                 appOptions[OPT_INPUT_TYPE].name,
1266                 skStringMapGetFirstName(input_type_map, input_type),
1267                 appOptions[OPT_FIELDS].name);
1268             return -1;
1269         }
1270 #endif  /* AB_SETBAG */
1271     } else {
1272         skAbortBadCase(input_type);
1273     }
1274 
1275     key_bitmap = NULL;
1276     counter_bitmap = NULL;
1277 
1278     /* ensure the flowtype type field is the final field */
1279     have_type = 0;
1280 
1281     /* ignore fields that are duplicates of constant fields */
1282     for (i = 0; 0 == skVectorGetValue(&id, field_vec, i); ++i) {
1283         if (AGGBAGBUILD_FIELD_IGNORED != id) {
1284             assert(id < AGGBAGBUILD_ARRAY_SIZE);
1285             pv = &parsed_value[id];
1286             if (pv->pv_is_const) {
1287                 id = AGGBAGBUILD_FIELD_IGNORED;
1288                 skVectorSetValue(field_vec, i, &id);
1289             } else {
1290                 assert(0 == pv->pv_is_used);
1291                 pv->pv_is_used = 1;
1292             }
1293         }
1294     }
1295 
1296     /* we have a list of fields, but do not yet know which are
1297      * considered keys and which are counters.  the following code
1298      * determines that.  FIXME: This code should probably be in
1299      * skaggbag.c. */
1300 
1301     /* create bitmaps to hold key ids and counter ids */
1302     if (skBitmapCreate(&key_bitmap, AGGBAGBUILD_ARRAY_SIZE)) {
1303         skAppPrintOutOfMemory("bitmap");
1304         exit(EXIT_FAILURE);
1305     }
1306     if (skBitmapCreate(&counter_bitmap, AGGBAGBUILD_ARRAY_SIZE)) {
1307         skAppPrintOutOfMemory("bitmap");
1308         skBitmapDestroy(&key_bitmap);
1309         exit(EXIT_FAILURE);
1310     }
1311     skAggBagFieldTypeIteratorBind(&iter, SK_AGGBAG_KEY);
1312     while (skAggBagFieldTypeIteratorNext(&iter, &field_type)) {
1313         assert(AGGBAGBUILD_ARRAY_SIZE > (int)field_type);
1314         skBitmapSetBit(key_bitmap, field_type);
1315     }
1316     skAggBagFieldTypeIteratorBind(&iter, SK_AGGBAG_COUNTER);
1317     while (skAggBagFieldTypeIteratorNext(&iter, &field_type)) {
1318         assert(AGGBAGBUILD_ARRAY_SIZE > (int)field_type);
1319         skBitmapSetBit(counter_bitmap, field_type);
1320     }
1321 
1322     /* create vectors to hold the IDs that are being used */
1323     key_vec = skVectorNew(sizeof(sk_aggbag_type_t));
1324     counter_vec = skVectorNew(sizeof(sk_aggbag_type_t));
1325     if (!key_vec || !counter_vec) {
1326         skAppPrintOutOfMemory("vector");
1327         skVectorDestroy(key_vec);
1328         skVectorDestroy(counter_vec);
1329         skBitmapDestroy(&key_bitmap);
1330         skBitmapDestroy(&counter_bitmap);
1331         exit(EXIT_FAILURE);
1332     }
1333 
1334     /* add any constant fields, then the other fields, to the key or
1335      * counter vectors */
1336     for (j = 0; j < 2; ++j) {
1337         const sk_vector_t *v = (0 == j) ? const_fields : field_vec;
1338         if (NULL == v) {
1339             continue;
1340         }
1341         for (i = 0; 0 == skVectorGetValue(&id, v, i); ++i) {
1342             if (SKAGGBAG_FIELD_FTYPE_TYPE == id) {
1343                 have_type = 1;
1344             } else if (skBitmapGetBit(key_bitmap, id) == 1) {
1345                 t = (sk_aggbag_type_t)id;
1346                 skVectorAppendValue(key_vec, &t);
1347             } else if (skBitmapGetBit(counter_bitmap, id) == 1) {
1348                 t = (sk_aggbag_type_t)id;
1349                 skVectorAppendValue(counter_vec, &t);
1350             } else if (id != AGGBAGBUILD_FIELD_IGNORED || v != field_vec) {
1351                 skAppPrintErr("Unknown field id %u", id);
1352                 skAbort();
1353             }
1354         }
1355     }
1356 
1357     /* no longer need the bitmaps */
1358     skBitmapDestroy(&key_bitmap);
1359     skBitmapDestroy(&counter_bitmap);
1360 
1361     if (have_type) {
1362         t = SKAGGBAG_FIELD_FTYPE_TYPE;
1363         skVectorAppendValue(key_vec, &t);
1364     }
1365 
1366     /* ensure key and counter are defined */
1367     missing_fields = ((0 == skVectorGetCount(key_vec))
1368                       + 2 * (0 == skVectorGetCount(counter_vec)));
1369     if (missing_fields) {
1370         skAppPrintErr(
1371             "Do not have any %s fields; at least one %s field %s required",
1372             ((missing_fields == 3)
1373              ? "key fields or counter"
1374              : ((missing_fields == 1) ? "key" : "counter")),
1375             ((missing_fields == 3)
1376              ? "key field and one counter"
1377              : ((missing_fields == 1) ? "key" : "counter")),
1378             ((missing_fields == 3) ? "are" : "is"));
1379         skVectorDestroy(key_vec);
1380         skVectorDestroy(counter_vec);
1381         return -1;
1382     }
1383 
1384     /* set key and counter */
1385     id_count = skVectorGetCount(key_vec);
1386     id_array = (sk_aggbag_type_t *)skVectorToArrayAlloc(key_vec);
1387     skAggBagSetKeyFields(ab, id_count, id_array);
1388     skVectorDestroy(key_vec);
1389     free(id_array);
1390 
1391     id_count = skVectorGetCount(counter_vec);
1392     id_array = (sk_aggbag_type_t *)skVectorToArrayAlloc(counter_vec);
1393     skAggBagSetCounterFields(ab, id_count, id_array);
1394     skVectorDestroy(counter_vec);
1395     free(id_array);
1396 
1397 #if AB_SETBAG
1398     /* special handling for IPset and Bag input types: number of
1399      * non-constant key and counter fields is restricted */
1400     if (AGGBAGBUILD_INPUT_TEXT != input_type) {
1401         sk_aggbag_field_t field;
1402         const char *limit_msg;
1403         uint32_t count;
1404 
1405         for (i = 0; i < 2; ++i) {
1406             count = 0;
1407             if (0 == i) {
1408                 skAggBagInitializeKey(ab, NULL, &field);
1409                 limit_msg = "Only one key field";
1410             } else {
1411                 skAggBagInitializeCounter(ab, NULL, &field);
1412                 if (AGGBAGBUILD_INPUT_IPSET == input_type) {
1413                     limit_msg = "No counter fields";
1414                     count = 1;
1415                 } else {
1416                     limit_msg = "Only one counter field";
1417                 }
1418             }
1419             do {
1420                 id = skAggBagFieldIterGetType(&field);
1421                 assert(id < AGGBAGBUILD_ARRAY_SIZE);
1422                 pv = &parsed_value[id];
1423                 count += (1 != pv->pv_is_const);
1424             } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
1425 
1426             if (count > 1) {
1427                 skAppPrintErr("Invalid %s: %s may be specified when %s is %s",
1428                               appOptions[OPT_FIELDS].name, limit_msg,
1429                               appOptions[OPT_INPUT_TYPE].name,
1430                               skStringMapGetFirstName(input_type_map,
1431                                                       input_type));
1432                 return -1;
1433             }
1434         }
1435     }
1436 #endif  /* #if AB_SETBAG */
1437 
1438     return 0;
1439 }
1440 
1441 
1442 /*
1443  *    Determine if the input line in 'first_line' is a title line.
1444  *
1445  *    If 'no_titles' is set, simply return 0.  Otherwise, check
1446  *    whether a name in 'first_line' matches a field name.  Return 1
1447  *    if a field name is found to match, 0 otherwise.
1448  */
1449 static int
checkFirstLineIsTitle(char * first_line)1450 checkFirstLineIsTitle(
1451     char               *first_line)
1452 {
1453     sk_stringmap_entry_t *entry;
1454     char *cp;
1455     char *ep;
1456     size_t i;
1457     int is_title = 0;
1458     uint32_t id;
1459 
1460     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1461     assert(first_line);
1462     assert(fields);
1463     assert(field_vec);
1464 
1465     if (no_titles) {
1466         return 0;
1467     }
1468 
1469     /* we have the fields, need to determine if first_line is a
1470      * title line. */
1471     cp = first_line;
1472     for (i = 0; 0 == skVectorGetValue(&id, field_vec, i); ++i) {
1473         ep = strchr(cp, column_separator);
1474         if (!is_title && (id != AGGBAGBUILD_FIELD_IGNORED)) {
1475             if (ep) {
1476                 *ep = '\0';
1477             }
1478             while ((isspace((int)*cp))) {
1479                 ++cp;
1480             }
1481             if ('\0' == *cp) {
1482                 /* ignore */
1483             } else if (!isdigit((int)*cp)) {
1484                 if (skStringMapGetByName(field_map, cp, &entry)
1485                     == SKSTRINGMAP_OK)
1486                 {
1487                     is_title = 1;
1488                 }
1489             }
1490         }
1491         if (ep) {
1492             *ep = column_separator;
1493             cp = ep + 1;
1494         } else {
1495             cp += strlen(cp);
1496             if (is_title && (1 + i != skVectorGetCount(field_vec))) {
1497                 badLine(("Too few fields on title line:"
1498                          " found %" SK_PRIuZ " of %" SK_PRIuZ " expected"),
1499                         i, skVectorGetCount(field_vec));
1500             }
1501             break;
1502         }
1503     }
1504 
1505     if (is_title && (*cp != '\0')
1506         && (strlen(cp) != strspn(cp, AGGBAGBUILD_WHITESPACE)))
1507     {
1508         badLine(("Too many fields on title line:"
1509                  " text follows delimiter number %" SK_PRIuZ),
1510                 skVectorGetCount(field_vec));
1511     }
1512 
1513     return is_title;
1514 }
1515 
1516 
1517 /*
1518  *    Remove all whitespace from 'first_line' and convert the
1519  *    column_separator to a comma.
1520  *
1521  *    FIXME: How should this code handle double column_separators?
1522  */
1523 static void
convertTitleLineToCSV(char * first_line)1524 convertTitleLineToCSV(
1525     char               *first_line)
1526 {
1527     char *cp, *ep;
1528 
1529     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1530     assert(first_line);
1531 
1532     cp = ep = first_line;
1533     while (*cp) {
1534         if (*cp == column_separator) {
1535             /* convert column_separator to comma for parseFieldList() */
1536             *ep++ = ',';
1537             ++cp;
1538         } else if (isspace((int)*cp)) {
1539             /* ignore spaces */
1540             ++cp;
1541         } else {
1542             /* copy character */
1543             *ep++ = *cp++;
1544         }
1545     }
1546     *ep = *cp;
1547 }
1548 
1549 
1550 /*
1551  *    Determine which fields (columns) to parse across all input files
1552  *    based on the title line in given in 'first_line'.
1553  *
1554  *    This function determines a set of fields based on the column
1555  *    titles specified in 'first_line', then invokes setAggBagFields()
1556  *    to initialize the key and counter fields on the AggBag.  See
1557  *    setAggBagFields() for details.
1558  *
1559  *    Return 0 on success and -1 on error.
1560  */
1561 static int
parseFirstLineAsFieldList(char * first_line)1562 parseFirstLineAsFieldList(
1563     char               *first_line)
1564 {
1565     char *errmsg;
1566 
1567     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1568     assert(first_line);
1569     assert(NULL == field_vec);
1570     assert(NULL == fields);
1571     assert(0 == no_titles);
1572 
1573     /* make the title line look like the argument to --fields and then
1574      * parse it as a field list */
1575     convertTitleLineToCSV(first_line);
1576     if (parseFieldList(first_line, &errmsg)) {
1577         skAppPrintErr(
1578             "Unable to guess fields from first line of file '%s': %s",
1579             skStreamGetPathname(curline->stream), errmsg);
1580         return -1;
1581     }
1582 
1583     /* use 'field_vec' to set the key and value fields */
1584     if (setAggBagFields()) {
1585         return -1;
1586     }
1587 
1588     return 0;
1589 }
1590 
1591 
1592 /*
1593  *    Update the global 'field_vec' based on the titles present in
1594  *    'first_line'.
1595  *
1596  *    This function is used when parsing multiple text files and
1597  *    'first_line' contains the first line of the second, third, etc
1598  *    text file.  It updates the 'field_vec' based on the new title
1599  *    line.
1600  *
1601  *    The parseFirstLineAsFieldList() function is used when parsing
1602  *    the first line of the first text file.
1603  */
1604 static int
updateFieldVectorMultipleFiles(char * first_line)1605 updateFieldVectorMultipleFiles(
1606     char               *first_line)
1607 {
1608     parsed_value_t *pv;
1609     uint32_t id;
1610     size_t i;
1611     char *errmsg;
1612 
1613     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1614     assert(first_line);
1615     assert(field_vec);
1616     assert(NULL == fields);
1617     assert(0 == no_titles);
1618 
1619     /* clear all non-const values in current field_vec */
1620     for (i = 0; 0 == skVectorGetValue(&id, field_vec, i); ++i) {
1621         if (AGGBAGBUILD_FIELD_IGNORED != id) {
1622             assert(id < AGGBAGBUILD_ARRAY_SIZE);
1623             pv = &parsed_value[id];
1624             assert(1 == pv->pv_is_used);
1625             assert(0 == pv->pv_is_const);
1626             pv->pv_is_fixed = 1;
1627             pv->pv_raw = NULL;
1628             parseSingleField(NULL, id, 0);
1629         }
1630     }
1631 
1632     /* make the title line look like the argument to --fields and then
1633      * parse it as a field list */
1634     convertTitleLineToCSV(first_line);
1635     if (parseFieldList(first_line, &errmsg)) {
1636         skAppPrintErr(
1637             "Unable to guess fields from first line of file '%s': %s",
1638             skStreamGetPathname(curline->stream), errmsg);
1639         return -1;
1640     }
1641 
1642     /* update the field_vec */
1643     for (i = 0; 0 == skVectorGetValue(&id, field_vec, i); ++i) {
1644         if (AGGBAGBUILD_FIELD_IGNORED != id) {
1645             assert(id < AGGBAGBUILD_ARRAY_SIZE);
1646             pv = &parsed_value[id];
1647             if (pv->pv_is_const || !pv->pv_is_used) {
1648                 id = AGGBAGBUILD_FIELD_IGNORED;
1649                 skVectorSetValue(field_vec, i, &id);
1650             } else {
1651                 pv->pv_is_fixed = 0;
1652             }
1653         }
1654     }
1655 
1656     return 0;
1657 }
1658 
1659 
1660 /*
1661  *    Parse one row of textual values used by the AggBag's key and
1662  *    counter and insert the key/counter into the AggBag.
1663  *
1664  *    This function is used when reading textual data, and it
1665  *    processes a single row of values.  This function expects the
1666  *    string value for each key or counter field to be set in the
1667  *    global 'parsed_value' array.
1668  *
1669  *    Return 0 on success or -1 if a string value cannot be parsed.
1670  */
1671 static int
processFields(void)1672 processFields(
1673     void)
1674 {
1675     sk_aggbag_aggregate_t *agg;
1676     sk_aggbag_aggregate_t key;
1677     sk_aggbag_aggregate_t counter;
1678     sk_aggbag_field_t field;
1679     parsed_value_t *pv;
1680     uint32_t id;
1681     int i;
1682 
1683     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1684 
1685     /* loop twice: once for key and again for counter */
1686     for (i = 0; i < 2; ++i) {
1687         if (0 == i) {
1688             agg = &key;
1689             skAggBagInitializeKey(ab, agg, &field);
1690         } else {
1691             agg = &counter;
1692             skAggBagInitializeCounter(ab, agg, &field);
1693         }
1694         do {
1695             id = skAggBagFieldIterGetType(&field);
1696             assert(id < AGGBAGBUILD_ARRAY_SIZE);
1697             pv = &parsed_value[id];
1698             assert(pv->pv_is_used);
1699             if (!pv->pv_is_fixed) {
1700                 if (parseSingleField(pv->pv_raw, id, 0)) {
1701                     return -1;
1702                 }
1703             }
1704             switch (id) {
1705               case SKAGGBAG_FIELD_SIPv4:
1706               case SKAGGBAG_FIELD_DIPv4:
1707               case SKAGGBAG_FIELD_NHIPv4:
1708               case SKAGGBAG_FIELD_ANY_IPv4:
1709               case SKAGGBAG_FIELD_SIPv6:
1710               case SKAGGBAG_FIELD_DIPv6:
1711               case SKAGGBAG_FIELD_NHIPv6:
1712               case SKAGGBAG_FIELD_ANY_IPv6:
1713                 skAggBagAggregateSetIPAddress(agg, &field, &pv->pv.pv_ip);
1714                 break;
1715 
1716               default:
1717                 skAggBagAggregateSetUnsigned(agg, &field, pv->pv.pv_int);
1718                 break;
1719             }
1720         } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
1721     }
1722 
1723     skAggBagKeyCounterAdd(ab, &key, &counter, NULL);
1724     return 0;
1725 }
1726 
1727 
1728 /*
1729  *  ok = processInputText();
1730  *
1731  *    Read each line of text from the stream in the global 'curline'
1732  *    structure, create a key and a counter from the fields on the
1733  *    line, and add the key and counter to the global aggbag
1734  *    structure.
1735  *
1736  *    Return 0 on success, -1 on failure.
1737  */
1738 static int
processInputText(void)1739 processInputText(
1740     void)
1741 {
1742     static char line[AGGBAGBUILD_LINE_BUFSIZE];
1743     char *cp;
1744     char *ep;
1745     int is_first_line = 1;
1746     size_t i;
1747     uint32_t id;
1748     int rv;
1749 
1750     assert(AGGBAGBUILD_INPUT_TEXT == input_type);
1751 
1752     if (skStreamSetCommentStart(curline->stream, "#")) {
1753         return 1;
1754     }
1755 
1756     /* read until end of file */
1757     while ((rv = skStreamGetLine(curline->stream, line, sizeof(line),
1758                                  &curline->lineno))
1759            != SKSTREAM_ERR_EOF)
1760     {
1761         if (bad_stream) {
1762             /* stash copy; used by badLine() */
1763             strncpy(curline->text, line, sizeof(curline->text));
1764         }
1765         switch (rv) {
1766           case SKSTREAM_OK:
1767             /* good, we got our line */
1768             break;
1769           case SKSTREAM_ERR_LONG_LINE:
1770             /* bad: line was longer than sizeof(line) */
1771             badLine("Input line too long");
1772             continue;
1773           default:
1774             /* unexpected error */
1775             skStreamPrintLastErr(curline->stream, rv, &skAppPrintErr);
1776             return -1;
1777         }
1778 
1779         if (is_first_line) {
1780             /* this is the first line in the file. either initialize
1781              * or update the parsed_value array based on the values in
1782              * the --fields switch or on this line */
1783             is_first_line = 0;
1784 
1785             if (fields) {
1786                 /* user provided the list of fields; only need to
1787                  * determine whether to skip this line */
1788                 if (checkFirstLineIsTitle(line)) {
1789                     continue;
1790                 }
1791             } else {
1792                 assert(0 == no_titles);
1793                 if (field_vec) {
1794                     /* this is a second text file; recompute which
1795                      * fields to ignore */
1796                     if (updateFieldVectorMultipleFiles(line)) {
1797                         return -1;
1798                     }
1799                 } else {
1800                     /* create field_vec based on title line */
1801                     if (parseFirstLineAsFieldList(line)) {
1802                         return -1;
1803                     }
1804                 }
1805                 /* this line must be a title */
1806                 continue;
1807             }
1808         }
1809 
1810         /* We have a line; process it */
1811         cp = line;
1812         i = 0;
1813 
1814         /* break the line into separate fields */
1815         while (0 == skVectorGetValue(&id, field_vec, i)) {
1816             if (id != AGGBAGBUILD_FIELD_IGNORED) {
1817                 assert(1 == parsed_value[id].pv_is_used);
1818                 assert(0 == parsed_value[id].pv_is_const);
1819                 while (isspace((int)*cp)) {
1820                     ++cp;
1821                 }
1822                 if (*cp == column_separator) {
1823                     parsed_value[id].pv_raw = NULL;
1824                 } else {
1825                     parsed_value[id].pv_raw = cp;
1826                 }
1827             }
1828             ++i;
1829 
1830             /* find end of current field */
1831             ep = strchr(cp, column_separator);
1832             if (NULL == ep) {
1833                 /* at end of line; break out of while() */
1834                 cp += strlen(cp);
1835                 break;
1836             } else {
1837                 *ep = '\0';
1838                 cp = ep + 1;
1839             }
1840         }
1841 
1842         if (*cp != '\0') {
1843             if (strlen(cp) != strspn(cp, AGGBAGBUILD_WHITESPACE)) {
1844                 /* there are extra fields at the end */
1845                 badLine(("Too many fields on line:"
1846                          " text follows delimiter number %" SK_PRIuZ),
1847                         skVectorGetCount(field_vec));
1848             }
1849         } else if (i != skVectorGetCount(field_vec)) {
1850             /* there are too few fields */
1851             badLine(("Too few fields on line:"
1852                      " found %" SK_PRIuZ " of %" SK_PRIuZ " expected"),
1853                     i, skVectorGetCount(field_vec));
1854         } else {
1855             processFields();
1856         }
1857     } /* outer loop over lines  */
1858 
1859     return 0;
1860 }
1861 
1862 
1863 #if AB_SETBAG
1864 /*
1865  *  ok = processInputIPSet(stream);
1866  *
1867  *    Read an IPset from 'stream'.  Use each IP and one or more
1868  *    constant fields to create a key and a counter, and add the key
1869  *    and the counter to the global aggbag structure.
1870  *
1871  *    Return 0 on success, -1 on failure.
1872  */
1873 static int
processInputIPSet(skstream_t * stream)1874 processInputIPSet(
1875     skstream_t         *stream)
1876 {
1877     skipset_t *set = NULL;
1878     skipset_iterator_t iter;
1879     sk_ipv6policy_t policy;
1880     sk_aggbag_aggregate_t key;
1881     sk_aggbag_aggregate_t counter;
1882     sk_aggbag_field_t field;
1883     parsed_value_t *pv;
1884     unsigned int key_field_count;
1885     unsigned int nonfixed_count;
1886     skipaddr_t ipaddr;
1887     uint32_t prefix;
1888     uint32_t id;
1889     ssize_t rv;
1890 
1891     assert(AGGBAGBUILD_INPUT_IPSET == input_type);
1892 
1893     /* read the IPset from the stream.  FIXME: change this to use
1894      * skIPSetProcessStream(). */
1895     rv = skIPSetRead(&set, stream);
1896     if (rv) {
1897         if (SKIPSET_ERR_FILEIO == rv) {
1898             char errbuf[2 * PATH_MAX];
1899             skStreamLastErrMessage(stream, skStreamGetLastReturnValue(stream),
1900                                    errbuf, sizeof(errbuf));
1901             skAppPrintErr("Unable to read IPset from '%s': %s",
1902                           skStreamGetPathname(stream), errbuf);
1903         } else {
1904             skAppPrintErr("Unable to read IPset from '%s': %s",
1905                           skStreamGetPathname(stream), skIPSetStrerror(rv));
1906         }
1907         return -1;
1908     }
1909 
1910     if (NULL == field_vec) {
1911         const char *faux_list;
1912         char *errmsg;
1913 
1914         faux_list = skIPSetContainsV6(set) ? "any-IPv6" : "any-IPv4";
1915         if (parseFieldList(faux_list, &errmsg)) {
1916             skAppPrintErr("Error parsing programmer field list '%s': %s",
1917                           faux_list, errmsg);
1918             exit(EXIT_FAILURE);
1919         }
1920         if (setAggBagFields()) {
1921             return -1;
1922         }
1923     }
1924 
1925     /* initialize the counter, which must contain only fixed/constant
1926      * values */
1927     skAggBagInitializeCounter(ab, &counter, &field);
1928     do {
1929         id = skAggBagFieldIterGetType(&field);
1930         assert(id < AGGBAGBUILD_ARRAY_SIZE);
1931         pv = &parsed_value[id];
1932         assert(pv->pv_is_used);
1933         assert(pv->pv_is_fixed);
1934         skAggBagAggregateSetUnsigned(&counter, &field, pv->pv.pv_int);
1935     } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
1936 
1937     /* count the number of fields in the key, and determine whether an
1938      * IPv4 or IPv6 IP is wanted. */
1939     key_field_count = 0;
1940     nonfixed_count = 0;
1941     policy = SK_IPV6POLICY_MIX;
1942     skAggBagInitializeKey(ab, NULL, &field);
1943     do {
1944         id = skAggBagFieldIterGetType(&field);
1945         assert(id < AGGBAGBUILD_ARRAY_SIZE);
1946         pv = &parsed_value[id];
1947         assert(pv->pv_is_used);
1948         if (!pv->pv_is_fixed) {
1949             ++nonfixed_count;
1950             switch (id) {
1951               case SKAGGBAG_FIELD_SIPv4:
1952               case SKAGGBAG_FIELD_DIPv4:
1953               case SKAGGBAG_FIELD_NHIPv4:
1954               case SKAGGBAG_FIELD_ANY_IPv4:
1955                 policy = SK_IPV6POLICY_ASV4;
1956                 break;
1957               case SKAGGBAG_FIELD_SIPv6:
1958               case SKAGGBAG_FIELD_DIPv6:
1959               case SKAGGBAG_FIELD_NHIPv6:
1960               case SKAGGBAG_FIELD_ANY_IPv6:
1961                 policy = SK_IPV6POLICY_FORCE;
1962                 break;
1963               default:
1964                 skAbortBadCase(id);
1965             }
1966         }
1967         ++key_field_count;
1968     } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
1969 
1970     if (SK_IPV6POLICY_MIX == policy) {
1971         if (nonfixed_count) {
1972             skAbort();
1973         }
1974         /* the key is also fixed; fill it in */
1975         skAggBagInitializeKey(ab, &key, &field);
1976         do {
1977             id = skAggBagFieldIterGetType(&field);
1978             assert(id < AGGBAGBUILD_ARRAY_SIZE);
1979             pv = &parsed_value[id];
1980             assert(pv->pv_is_used);
1981             assert(pv->pv_is_fixed);
1982             skAggBagAggregateSetUnsigned(&key, &field, pv->pv.pv_int);
1983         } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
1984     }
1985 
1986     skIPSetIteratorBind(&iter, set, 0, policy);
1987 
1988     if (0 == nonfixed_count) {
1989         while (skIPSetIteratorNext(&iter, &ipaddr, &prefix) == SK_ITERATOR_OK){
1990             skAggBagAggregateSetIPAddress(&key, &field, &ipaddr);
1991             skAggBagKeyCounterAdd(ab, &key, &counter, NULL);
1992         }
1993     } else if (1 == key_field_count) {
1994         /* no need to move the field, just update the IP address in
1995          * the key each time */
1996         skAggBagInitializeKey(ab, &key, &field);
1997         pv = &parsed_value[skAggBagFieldIterGetType(&field)];
1998         while (skIPSetIteratorNext(&iter, &ipaddr, &prefix) == SK_ITERATOR_OK){
1999             skAggBagAggregateSetIPAddress(&key, &field, &ipaddr);
2000             skAggBagKeyCounterAdd(ab, &key, &counter, NULL);
2001         }
2002     } else {
2003         while (skIPSetIteratorNext(&iter, &ipaddr, &prefix) == SK_ITERATOR_OK){
2004             skAggBagInitializeKey(ab, &key, &field);
2005             do {
2006                 id = skAggBagFieldIterGetType(&field);
2007                 assert(id < AGGBAGBUILD_ARRAY_SIZE);
2008                 pv = &parsed_value[id];
2009                 assert(pv->pv_is_used);
2010                 if (!pv->pv_is_fixed) {
2011                     skAggBagAggregateSetIPAddress(&key, &field, &ipaddr);
2012                 } else {
2013                     switch (id) {
2014                       case SKAGGBAG_FIELD_SIPv4:
2015                       case SKAGGBAG_FIELD_DIPv4:
2016                       case SKAGGBAG_FIELD_NHIPv4:
2017                       case SKAGGBAG_FIELD_ANY_IPv4:
2018                       case SKAGGBAG_FIELD_SIPv6:
2019                       case SKAGGBAG_FIELD_DIPv6:
2020                       case SKAGGBAG_FIELD_NHIPv6:
2021                       case SKAGGBAG_FIELD_ANY_IPv6:
2022                         skAggBagAggregateSetIPAddress(
2023                             &key, &field, &pv->pv.pv_ip);
2024                         break;
2025                       default:
2026                         skAggBagAggregateSetUnsigned(
2027                             &key, &field, pv->pv.pv_int);
2028                         break;
2029                     }
2030                 }
2031             } while (skAggBagFieldIterNext(&field) == SK_ITERATOR_OK);
2032             skAggBagKeyCounterAdd(ab, &key, &counter, NULL);
2033         }
2034     }
2035 
2036     skIPSetDestroy(&set);
2037     return 0;
2038 }
2039 
2040 
2041 /*
2042  *  ok = processInputBag(stream);
2043  *
2044  *    Read a Bag from 'stream'.  add the key and the counter to the
2045  *    global aggbag structure.
2046  *
2047  *    Return 0 on success, -1 on failure.
2048  */
2049 static int
processInputBag(skstream_t * stream)2050 processInputBag(
2051     skstream_t         *stream)
2052 {
2053     skBag_t *bag = NULL;
2054     skBagIterator_t *iter = NULL;
2055     skBagTypedKey_t b_key;
2056     skBagTypedCounter_t b_counter;
2057     skBagErr_t b_err;
2058     sk_aggbag_aggregate_t ab_key;
2059     sk_aggbag_aggregate_t ab_counter;
2060     sk_aggbag_field_t k_field;
2061     sk_aggbag_field_t c_field;
2062     parsed_value_t *pv;
2063     uint32_t id;
2064 
2065     assert(AGGBAGBUILD_INPUT_BAG == input_type);
2066 
2067     /* read the bag from the stream; FIXME: change this to use
2068      * skBagProcessStreamTyped(). */
2069     b_err = skBagRead(&bag, stream);
2070     if (b_err) {
2071         if (SKBAG_ERR_READ == b_err) {
2072             char errbuf[2 * PATH_MAX];
2073             skStreamLastErrMessage(stream, skStreamGetLastReturnValue(stream),
2074                                    errbuf, sizeof(errbuf));
2075             skAppPrintErr("Unable to read Bag from '%s': %s",
2076                           skStreamGetPathname(stream), errbuf);
2077         } else {
2078             skAppPrintErr("Unable to read Bag from '%s': %s",
2079                           skStreamGetPathname(stream), skBagStrerror(b_err));
2080         }
2081         skBagDestroy(&bag);
2082         return -1;
2083     }
2084 
2085     if (NULL == field_vec) {
2086         char k_name[SKBAG_MAX_FIELD_BUFLEN];
2087         char c_name[SKBAG_MAX_FIELD_BUFLEN];
2088         char faux_list[3 * SKBAG_MAX_FIELD_BUFLEN];
2089         skBagFieldType_t t;
2090         char *errmsg;
2091 
2092         t = skBagKeyFieldName(bag, k_name, sizeof(k_name));
2093         if (SKBAG_FIELD_CUSTOM == t) {
2094             snprintf(k_name, sizeof(k_name), "%s",
2095                      skAggBagFieldTypeGetName(SKAGGBAG_FIELD_CUSTOM_KEY));
2096         }
2097         t = skBagCounterFieldName(bag, c_name, sizeof(c_name));
2098         if (SKBAG_FIELD_CUSTOM == t) {
2099             snprintf(c_name, sizeof(c_name), "%s",
2100                      skAggBagFieldTypeGetName(SKAGGBAG_FIELD_CUSTOM_COUNTER));
2101         }
2102 
2103         snprintf(faux_list, sizeof(faux_list), "%s,%s", k_name, c_name);
2104         if (parseFieldList(faux_list, &errmsg)) {
2105             skAppPrintErr("Error parsing field list '%s': %s",
2106                           faux_list, errmsg);
2107             exit(EXIT_FAILURE);
2108         }
2109         if (setAggBagFields()) {
2110             return -1;
2111         }
2112     }
2113 
2114     /* initialize 'key' with any contant key fields and determine the
2115      * type of the key that the Bag's key is to fill */
2116     skAggBagInitializeKey(ab, &ab_key, &k_field);
2117     do {
2118         id = skAggBagFieldIterGetType(&k_field);
2119         assert(id < AGGBAGBUILD_ARRAY_SIZE);
2120         pv = &parsed_value[id];
2121         assert(pv->pv_is_used);
2122         switch (id) {
2123           case SKAGGBAG_FIELD_SIPv4:
2124           case SKAGGBAG_FIELD_DIPv4:
2125           case SKAGGBAG_FIELD_NHIPv4:
2126           case SKAGGBAG_FIELD_ANY_IPv4:
2127           case SKAGGBAG_FIELD_SIPv6:
2128           case SKAGGBAG_FIELD_DIPv6:
2129           case SKAGGBAG_FIELD_NHIPv6:
2130           case SKAGGBAG_FIELD_ANY_IPv6:
2131             if (!pv->pv_is_fixed) {
2132                 b_key.type = SKBAG_KEY_IPADDR;
2133             } else {
2134                 skAggBagAggregateSetIPAddress(&ab_key, &k_field, &pv->pv.pv_ip);
2135             }
2136             break;
2137           default:
2138             if (!pv->pv_is_fixed) {
2139                 b_key.type = SKBAG_KEY_U32;
2140             } else {
2141                 skAggBagAggregateSetUnsigned(&ab_key, &k_field, pv->pv.pv_int);
2142             }
2143             break;
2144         }
2145     } while (skAggBagFieldIterNext(&k_field) == SK_ITERATOR_OK);
2146 
2147     /* initialize 'counter' with any contant counter fields */
2148     skAggBagInitializeCounter(ab, &ab_counter, &c_field);
2149     do {
2150         id = skAggBagFieldIterGetType(&c_field);
2151         assert(id < AGGBAGBUILD_ARRAY_SIZE);
2152         pv = &parsed_value[id];
2153         assert(pv->pv_is_used);
2154         if (!pv->pv_is_fixed) {
2155             b_counter.type = SKBAG_COUNTER_U64;
2156         } else {
2157             skAggBagAggregateSetUnsigned(&ab_counter, &c_field, pv->pv.pv_int);
2158         }
2159     } while (skAggBagFieldIterNext(&c_field) == SK_ITERATOR_OK);
2160 
2161     /* Position 'k_field' and 'c_field' on the field to map the bag's
2162      * key and counter into.  Note use of NULL as second parameter */
2163     skAggBagInitializeKey(ab, NULL, &k_field);
2164     do {
2165         pv = &parsed_value[skAggBagFieldIterGetType(&k_field)];
2166     } while (pv->pv_is_fixed
2167              && skAggBagFieldIterNext(&k_field) == SK_ITERATOR_OK);
2168 
2169     skAggBagInitializeCounter(ab, NULL, &c_field);
2170     do {
2171         pv = &parsed_value[skAggBagFieldIterGetType(&c_field)];
2172     } while (pv->pv_is_fixed
2173              && skAggBagFieldIterNext(&c_field) == SK_ITERATOR_OK);
2174 
2175     /* iterate over the entries in the bag  */
2176     skBagIteratorCreate(bag, &iter);
2177     if (SKBAG_KEY_IPADDR == b_key.type) {
2178         while (skBagIteratorNextTyped(iter, &b_key, &b_counter) == SKBAG_OK) {
2179             skAggBagAggregateSetIPAddress(
2180                 &ab_key, &k_field, &b_key.val.addr);
2181             skAggBagAggregateSetUnsigned(
2182                 &ab_counter, &c_field, b_counter.val.u64);
2183             skAggBagKeyCounterAdd(ab, &ab_key, &ab_counter, NULL);
2184         }
2185     } else {
2186         while (skBagIteratorNextTyped(iter, &b_key, &b_counter) == SKBAG_OK) {
2187             skAggBagAggregateSetUnsigned(
2188                 &ab_key, &k_field, b_key.val.u32);
2189             skAggBagAggregateSetUnsigned(
2190                 &ab_counter, &c_field, b_counter.val.u64);
2191             skAggBagKeyCounterAdd(ab, &ab_key, &ab_counter, NULL);
2192         }
2193     }
2194 
2195     skBagIteratorDestroy(iter);
2196     skBagDestroy(&bag);
2197     return 0;
2198 }
2199 #endif  /* #if AB_SETBAG */
2200 
2201 
main(int argc,char ** argv)2202 int main(int argc, char **argv)
2203 {
2204     skcontent_t stream_type;
2205     skstream_t *stream;
2206     char *fname;
2207     ssize_t rv = 0;
2208 
2209     appSetup(argc, argv);
2210 
2211     if (AGGBAGBUILD_INPUT_TEXT == input_type) {
2212         stream_type = SK_CONTENT_TEXT;
2213     } else {
2214         stream_type = SK_CONTENT_SILK;
2215     }
2216 
2217     while ((rv = skOptionsCtxNextArgument(optctx, &fname)) == 0) {
2218         /* create an input stream and open the file */
2219         stream = NULL;
2220         if ((rv = skStreamCreate(&stream, SK_IO_READ, stream_type))
2221             || (rv = skStreamBind(stream, fname))
2222             || (rv = skStreamOpen(stream)))
2223         {
2224             skStreamPrintLastErr(stream, rv, &skAppPrintErr);
2225             skStreamDestroy(&stream);
2226             rv = -1;
2227             break;
2228         }
2229         switch (input_type) {
2230           case AGGBAGBUILD_INPUT_TEXT:
2231             curline->lineno = 0;
2232             curline->stream = stream;
2233             rv = processInputText();
2234             break;
2235 #if AB_SETBAG
2236           case AGGBAGBUILD_INPUT_IPSET:
2237             rv = processInputIPSet(stream);
2238             break;
2239           case AGGBAGBUILD_INPUT_BAG:
2240             rv = processInputBag(stream);
2241             break;
2242 #endif  /* #if AB_SETBAG */
2243         }
2244 
2245         skStreamDestroy(&stream);
2246         if (rv != 0) {
2247             break;
2248         }
2249     }
2250 
2251     if (1 == rv) {
2252         rv = skAggBagWrite(ab, out_stream);
2253         if (rv) {
2254             if (SKAGGBAG_E_WRITE == rv) {
2255                 skStreamPrintLastErr(out_stream,
2256                                      skStreamGetLastReturnValue(out_stream),
2257                                      &skAppPrintErr);
2258             } else {
2259                 skAppPrintErr("Error writing Aggregate Bag to '%s': %s",
2260                               skStreamGetPathname(out_stream),
2261                               skAggBagStrerror(rv));
2262             }
2263             exit(EXIT_FAILURE);
2264         }
2265 
2266         if (bad_line_count && !verbose) {
2267             if (bad_stream) {
2268                 skAppPrintErr(("Could not parse %u line%s;"
2269                                " invalid input written to '%s'"),
2270                               bad_line_count,
2271                               ((1 == bad_line_count) ? "" : "s"),
2272                               skStreamGetPathname(bad_stream));
2273             } else {
2274                 skAppPrintErr(("Could not parse %u line%s;"
2275                                " try again with --%s or --%s for details"),
2276                               bad_line_count,
2277                               ((1 == bad_line_count) ? "" : "s"),
2278                               appOptions[OPT_STOP_ON_ERROR].name,
2279                               appOptions[OPT_VERBOSE].name);
2280             }
2281         }
2282     }
2283 
2284     skAggBagDestroy(&ab);
2285 
2286     appTeardown();
2287 
2288     return 0;
2289 }
2290 
2291 
2292 /*
2293 ** Local Variables:
2294 ** mode:c
2295 ** indent-tabs-mode:nil
2296 ** c-basic-offset:4
2297 ** End:
2298 */
2299