1 /** 2 * @file pdag.h 3 * @brief The parse DAG object. 4 * @class ln_pdag pdag.h 5 *//* 6 * Copyright 2015 by Rainer Gerhards and Adiscon GmbH. 7 * 8 * Released under ASL 2.0. 9 */ 10 #ifndef LIBLOGNORM_PDAG_H_INCLUDED 11 #define LIBLOGNORM_PDAG_H_INCLUDED 12 #include <stdio.h> 13 #include <libestr.h> 14 #include <stdint.h> 15 16 #define META_KEY "metadata" 17 #define ORIGINAL_MSG_KEY "originalmsg" 18 #define UNPARSED_DATA_KEY "unparsed-data" 19 #define EXEC_PATH_KEY "exec-path" 20 #define META_RULE_KEY "rule" 21 #define RULE_MOCKUP_KEY "mockup" 22 #define RULE_LOCATION_KEY "location" 23 24 typedef struct ln_pdag ln_pdag; /**< the parse DAG object */ 25 typedef struct ln_parser_s ln_parser_t; 26 typedef struct npb npb_t; 27 typedef uint8_t prsid_t; 28 29 struct ln_type_pdag; 30 31 /** 32 * parser IDs. 33 * 34 * These identfy a parser. VERY IMPORTANT: they must start at zero 35 * and continously increment. They must exactly match the index 36 * of the respective parser inside the parser lookup table. 37 */ 38 #define PRS_LITERAL 0 39 #define PRS_REPEAT 1 40 #if 0 41 #define PRS_DATE_RFC3164 1 42 #define PRS_DATE_RFC5424 2 43 #define PRS_NUMBER 3 44 #define PRS_FLOAT 4 45 #define PRS_HEXNUMBER 5 46 #define PRS_KERNEL_TIMESTAMP 6 47 #define PRS_WHITESPACE 7 48 #define PRS_IPV4 8 49 #define PRS_IPV6 9 50 #define PRS_WORD 10 51 #define PRS_ALPHA 11 52 #define PRS_REST 12 53 #define PRS_OP_QUOTED_STRING 13 54 #define PRS_QUOTED_STRING 14 55 #define PRS_DATE_ISO 15 56 #define PRS_TIME_24HR 16 57 #define PRS_TIME_12HR 17 58 #define PRS_DURATION 18 59 #define PRS_CISCO_INTERFACE_SPEC 19 60 #define PRS_NAME_VALUE_LIST 20 61 #define PRS_JSON 21 62 #define PRS_CEE_SYSLOG 22 63 #define PRS_MAC48 23 64 #define PRS_CEF 24 65 #define PRS_CHECKPOINT_LEA 25 66 #define PRS_v2_IPTABLES 26 67 #define PRS_STRING_TO 27 68 #define PRS_CHAR_TO 28 69 #define PRS_CHAR_SEP 29 70 #endif 71 72 #define PRS_CUSTOM_TYPE 254 73 #define PRS_INVALID 255 74 /* NOTE: current max limit on parser ID is 255, because we use uint8_t 75 * for the prsid_t type (which gains cache performance). If more parsers 76 * come up, the type must be modified. 77 */ 78 /** 79 * object describing a specific parser instance. 80 */ 81 struct ln_parser_s { 82 prsid_t prsid; /**< parser ID (for lookup table) */ 83 ln_pdag *node; /**< node to branch to if parser succeeded */ 84 void *parser_data; /**< opaque data that the field-parser understands */ 85 size_t custTypeIdx; /**< index to custom type, if such is used */ 86 int prio; /**< priority (combination of user- and parser-specific parts) */ 87 const char *name; /**< field name */ 88 const char *conf; /**< configuration as printable json for comparison reasons */ 89 }; 90 91 struct ln_parser_info { 92 const char *name; /**< parser name as used in rule base */ 93 int prio; /**< parser specific prio in range 0..255 */ 94 int (*construct)(ln_ctx ctx, json_object *const json, void **); 95 int (*parser)(npb_t *npb, size_t*, void *const, 96 size_t*, struct json_object **); /**< parser to use */ 97 void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */ 98 #ifdef ADVANCED_STATS 99 uint64_t called; 100 uint64_t success; 101 #endif 102 }; 103 104 105 /* parse DAG object 106 */ 107 struct ln_pdag { 108 ln_ctx ctx; /**< our context */ // TODO: why do we need it? 109 ln_parser_t *parsers; /* array of parsers to try */ 110 prsid_t nparsers; /**< current table size (prsid_t slighly abused) */ 111 struct { 112 unsigned isTerminal:1; /**< designates this node a terminal sequence */ 113 unsigned visited:1; /**< work var for recursive procedures */ 114 } flags; 115 struct json_object *tags; /**< tags to assign to events of this type */ 116 int refcnt; /**< reference count for deleting tracking */ 117 struct { 118 unsigned called; 119 unsigned backtracked; /**< incremented when backtracking was initiated */ 120 unsigned terminated; 121 } stats; /**< usage statistics */ 122 const char *rb_id; /**< human-readable rulebase identifier, for stats etc */ 123 124 // experimental, move outside later 125 const char *rb_file; 126 unsigned int rb_lineno; 127 }; 128 129 #ifdef ADVANCED_STATS 130 struct advstats { 131 int pathlen; 132 int parser_calls; /**< parser calls in general during path */ 133 int lit_parser_calls; /**< same just for the literal parser */ 134 int backtracked; 135 int recursion_level; 136 es_str_t *exec_path; 137 }; 138 #define ADVSTATS_MAX_ENTITIES 100 139 extern int advstats_max_pathlen; 140 extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES]; 141 extern int advstats_max_backtracked; 142 extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES]; 143 #endif 144 145 /** the "normalization paramater block" (npb) 146 * This structure is passed to all normalization routines including 147 * parsers. It contains data that commonly needs to be passed, 148 * like the to be parsed string and its length, as well as read/write 149 * data which is used to track information over the general 150 * normalization process (like the execution path, if requested). 151 * The main purpose is to save stack writes by eliminating the 152 * need for using multiple function parameters. Note that it 153 * must be carefully considered which items to add to the 154 * npb - those that change from recursion level to recursion 155 * level are NOT to be placed here. 156 */ 157 struct npb { 158 ln_ctx ctx; 159 const char *str; /**< to-be-normalized message */ 160 size_t strLen; /**< length of it */ 161 size_t parsedTo; /**< up to which byte could this be parsed? */ 162 es_str_t *rule; /**< a mock-up of the rule used to parse */ 163 es_str_t *exec_path; 164 #ifdef ADVANCED_STATS 165 int pathlen; 166 int backtracked; 167 int recursion_level; 168 struct advstats astats; 169 #endif 170 }; 171 172 /* Methods */ 173 174 /** 175 * Allocates and initializes a new parse DAG node. 176 * @memberof ln_pdag 177 * 178 * @param[in] ctx current library context. This MUST match the 179 * context of the parent. 180 * @param[in] parent pointer to the new node inside the parent 181 * 182 * @return pointer to new node or NULL on error 183 */ 184 struct ln_pdag* ln_newPDAG(ln_ctx ctx); 185 186 187 /** 188 * Free a parse DAG and destruct all members. 189 * @memberof ln_pdag 190 * 191 * @param[in] DAG pointer to pdag to free 192 */ 193 void ln_pdagDelete(struct ln_pdag *DAG); 194 195 196 /** 197 * Add parser to dag node. 198 * Works on unoptimzed dag. 199 * 200 * @param[in] pdag pointer to pdag to modify 201 * @param[in] parser parser definition 202 * @returns 0 on success, something else otherwise 203 */ 204 int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *); 205 206 207 /** 208 * Display the content of a pdag (debug function). 209 * This is a debug aid that spits out a textual representation 210 * of the provided pdag via multiple calls of the debug callback. 211 * 212 * @param DAG pdag to display 213 */ 214 void ln_displayPDAG(ln_ctx ctx); 215 216 217 /** 218 * Generate a DOT graph. 219 * Well, actually it does not generate the graph itself, but a 220 * control file that is suitable for the GNU DOT tool. Such a file 221 * can be very useful to understand complex sample databases 222 * (not to mention that it is probably fun for those creating 223 * samples). 224 * The dot commands are appended to the provided string. 225 * 226 * @param[in] DAG pdag to display 227 * @param[out] str string which receives the DOT commands. 228 */ 229 void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str); 230 231 232 /** 233 * Build a pdag based on the provided string, but only if necessary. 234 * The passed-in DAG is searched and traversed for str. If a node exactly 235 * matching str is found, that node is returned. If no exact match is found, 236 * a new node is added. Existing nodes may be split, if a so-far common 237 * prefix needs to be split in order to add the new node. 238 * 239 * @param[in] DAG root of the current DAG 240 * @param[in] str string to be added 241 * @param[in] offs offset into str where match needs to start 242 * (this is required for recursive calls to handle 243 * common prefixes) 244 * @return NULL on error, otherwise the pdag leaf that 245 * corresponds to the parameters passed. 246 */ 247 struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs); 248 249 250 prsid_t ln_parserName2ID(const char *const __restrict__ name); 251 int ln_pdagOptimize(ln_ctx ctx); 252 void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int); 253 ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit); 254 ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf); 255 struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd); 256 void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp); 257 258 /* friends */ 259 int 260 ln_normalizeRec(npb_t *const __restrict__ npb, 261 struct ln_pdag *dag, 262 const size_t offs, 263 const int bPartialMatch, 264 struct json_object *json, 265 struct ln_pdag **endNode 266 ); 267 268 #endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */ 269