1 /**
2  * @file pdag.h
3  * @brief The parse DAG object.
4  * @class ln_pdag pdag.h
5  *//*
6  * Copyright 2015 by Rainer Gerhards and Adiscon GmbH.
7  *
8  * Released under ASL 2.0.
9  */
10 #ifndef LIBLOGNORM_PDAG_H_INCLUDED
11 #define	LIBLOGNORM_PDAG_H_INCLUDED
12 #include <stdio.h>
13 #include <libestr.h>
14 #include <stdint.h>
15 
16 #define META_KEY "metadata"
17 #define ORIGINAL_MSG_KEY "originalmsg"
18 #define UNPARSED_DATA_KEY "unparsed-data"
19 #define EXEC_PATH_KEY "exec-path"
20 #define META_RULE_KEY "rule"
21 #define RULE_MOCKUP_KEY "mockup"
22 #define RULE_LOCATION_KEY "location"
23 
24 typedef struct ln_pdag ln_pdag; /**< the parse DAG object */
25 typedef struct ln_parser_s ln_parser_t;
26 typedef struct npb npb_t;
27 typedef uint8_t prsid_t;
28 
29 struct ln_type_pdag;
30 
31 /**
32  * parser IDs.
33  *
34  * These identfy a parser. VERY IMPORTANT: they must start at zero
35  * and continously increment. They must exactly match the index
36  * of the respective parser inside the parser lookup table.
37  */
38 #define PRS_LITERAL			0
39 #define PRS_REPEAT			1
40 #if 0
41 #define PRS_DATE_RFC3164		1
42 #define PRS_DATE_RFC5424		2
43 #define PRS_NUMBER			3
44 #define PRS_FLOAT			4
45 #define PRS_HEXNUMBER			5
46 #define PRS_KERNEL_TIMESTAMP		6
47 #define PRS_WHITESPACE			7
48 #define PRS_IPV4			8
49 #define PRS_IPV6			9
50 #define PRS_WORD			10
51 #define PRS_ALPHA			11
52 #define PRS_REST			12
53 #define PRS_OP_QUOTED_STRING		13
54 #define PRS_QUOTED_STRING		14
55 #define PRS_DATE_ISO			15
56 #define PRS_TIME_24HR			16
57 #define PRS_TIME_12HR			17
58 #define PRS_DURATION			18
59 #define PRS_CISCO_INTERFACE_SPEC	19
60 #define PRS_NAME_VALUE_LIST		20
61 #define PRS_JSON			21
62 #define PRS_CEE_SYSLOG			22
63 #define PRS_MAC48			23
64 #define PRS_CEF				24
65 #define PRS_CHECKPOINT_LEA		25
66 #define PRS_v2_IPTABLES			26
67 #define PRS_STRING_TO			27
68 #define PRS_CHAR_TO			28
69 #define PRS_CHAR_SEP			29
70 #endif
71 
72 #define PRS_CUSTOM_TYPE			254
73 #define PRS_INVALID			255
74 /* NOTE: current max limit on parser ID is 255, because we use uint8_t
75  * for the prsid_t type (which gains cache performance). If more parsers
76  * come up, the type must be modified.
77  */
78 /**
79  * object describing a specific parser instance.
80  */
81 struct ln_parser_s {
82 	prsid_t prsid;		/**< parser ID (for lookup table) */
83 	ln_pdag *node;		/**< node to branch to if parser succeeded */
84 	void *parser_data;	/**< opaque data that the field-parser understands */
85 	size_t custTypeIdx;	/**< index to custom type, if such is used */
86 	int prio;		/**< priority (combination of user- and parser-specific parts) */
87 	const char *name;	/**< field name */
88 	const char *conf;	/**< configuration as printable json for comparison reasons */
89 };
90 
91 struct ln_parser_info {
92 	const char *name;	/**< parser name as used in rule base */
93 	int prio;		/**< parser specific prio in range 0..255 */
94 	int (*construct)(ln_ctx ctx, json_object *const json, void **);
95 	int (*parser)(npb_t *npb, size_t*, void *const,
96 				  size_t*, struct json_object **); /**< parser to use */
97 	void (*destruct)(ln_ctx, void *const); /* note: destructor is only needed if parser data exists */
98 #ifdef ADVANCED_STATS
99 	uint64_t called;
100 	uint64_t success;
101 #endif
102 };
103 
104 
105 /* parse DAG object
106  */
107 struct ln_pdag {
108 	ln_ctx ctx;			/**< our context */ // TODO: why do we need it?
109 	ln_parser_t *parsers;		/* array of parsers to try */
110 	prsid_t nparsers;		/**< current table size (prsid_t slighly abused) */
111 	struct {
112 		unsigned isTerminal:1;	/**< designates this node a terminal sequence */
113 		unsigned visited:1;	/**< work var for recursive procedures */
114 	} flags;
115 	struct json_object *tags;	/**< tags to assign to events of this type */
116 	int refcnt;			/**< reference count for deleting tracking */
117 	struct {
118 		unsigned called;
119 		unsigned backtracked;	/**< incremented when backtracking was initiated */
120 		unsigned terminated;
121 	} stats;	/**< usage statistics */
122 	const char *rb_id;		/**< human-readable rulebase identifier, for stats etc */
123 
124 	// experimental, move outside later
125 	const char *rb_file;
126 	unsigned int rb_lineno;
127 };
128 
129 #ifdef ADVANCED_STATS
130 struct advstats {
131 	int pathlen;
132 	int parser_calls;		/**< parser calls in general during path */
133 	int lit_parser_calls;		/**< same just for the literal parser */
134 	int backtracked;
135 	int recursion_level;
136 	es_str_t *exec_path;
137 };
138 #define ADVSTATS_MAX_ENTITIES 100
139 extern int advstats_max_pathlen;
140 extern int advstats_pathlens[ADVSTATS_MAX_ENTITIES];
141 extern int advstats_max_backtracked;
142 extern int advstats_backtracks[ADVSTATS_MAX_ENTITIES];
143 #endif
144 
145 /** the "normalization paramater block" (npb)
146  * This structure is passed to all normalization routines including
147  * parsers. It contains data that commonly needs to be passed,
148  * like the to be parsed string and its length, as well as read/write
149  * data which is used to track information over the general
150  * normalization process (like the execution path, if requested).
151  * The main purpose is to save stack writes by eliminating the
152  * need for using multiple function parameters. Note that it
153  * must be carefully considered which items to add to the
154  * npb - those that change from recursion level to recursion
155  * level are NOT to be placed here.
156  */
157 struct npb {
158 	ln_ctx ctx;
159 	const char *str;		/**< to-be-normalized message */
160 	size_t strLen;			/**< length of it */
161 	size_t parsedTo;		/**< up to which byte could this be parsed? */
162 	es_str_t *rule;			/**< a mock-up of the rule used to parse */
163 	es_str_t *exec_path;
164 #ifdef ADVANCED_STATS
165 	int pathlen;
166 	int backtracked;
167 	int recursion_level;
168 	struct advstats astats;
169 #endif
170 };
171 
172 /* Methods */
173 
174 /**
175  * Allocates and initializes a new parse DAG node.
176  * @memberof ln_pdag
177  *
178  * @param[in] ctx current library context. This MUST match the
179  * 		context of the parent.
180  * @param[in] parent pointer to the new node inside the parent
181  *
182  * @return pointer to new node or NULL on error
183  */
184 struct ln_pdag* ln_newPDAG(ln_ctx ctx);
185 
186 
187 /**
188  * Free a parse DAG and destruct all members.
189  * @memberof ln_pdag
190  *
191  * @param[in] DAG pointer to pdag to free
192  */
193 void ln_pdagDelete(struct ln_pdag *DAG);
194 
195 
196 /**
197  * Add parser to dag node.
198  * Works on unoptimzed dag.
199  *
200  * @param[in] pdag pointer to pdag to modify
201  * @param[in] parser parser definition
202  * @returns 0 on success, something else otherwise
203  */
204 int ln_pdagAddParser(ln_ctx ctx, struct ln_pdag **pdag, json_object *);
205 
206 
207 /**
208  * Display the content of a pdag (debug function).
209  * This is a debug aid that spits out a textual representation
210  * of the provided pdag via multiple calls of the debug callback.
211  *
212  * @param DAG pdag to display
213  */
214 void ln_displayPDAG(ln_ctx ctx);
215 
216 
217 /**
218  * Generate a DOT graph.
219  * Well, actually it does not generate the graph itself, but a
220  * control file that is suitable for the GNU DOT tool. Such a file
221  * can be very useful to understand complex sample databases
222  * (not to mention that it is probably fun for those creating
223  * samples).
224  * The dot commands are appended to the provided string.
225  *
226  * @param[in] DAG pdag to display
227  * @param[out] str string which receives the DOT commands.
228  */
229 void ln_genDotPDAGGraph(struct ln_pdag *DAG, es_str_t **str);
230 
231 
232 /**
233  * Build a pdag based on the provided string, but only if necessary.
234  * The passed-in DAG is searched and traversed for str. If a node exactly
235  * matching str is found, that node is returned. If no exact match is found,
236  * a new node is added. Existing nodes may be split, if a so-far common
237  * prefix needs to be split in order to add the new node.
238  *
239  * @param[in] DAG root of the current DAG
240  * @param[in] str string to be added
241  * @param[in] offs offset into str where match needs to start
242  *             (this is required for recursive calls to handle
243  *             common prefixes)
244  * @return NULL on error, otherwise the pdag leaf that
245  *         corresponds to the parameters passed.
246  */
247 struct ln_pdag * ln_buildPDAG(struct ln_pdag *DAG, es_str_t *str, size_t offs);
248 
249 
250 prsid_t ln_parserName2ID(const char *const __restrict__ name);
251 int ln_pdagOptimize(ln_ctx ctx);
252 void ln_fullPdagStats(ln_ctx ctx, FILE *const fp, const int);
253 ln_parser_t * ln_newLiteralParser(ln_ctx ctx, char lit);
254 ln_parser_t* ln_newParser(ln_ctx ctx, json_object *const prscnf);
255 struct ln_type_pdag * ln_pdagFindType(ln_ctx ctx, const char *const __restrict__ name, const int bAdd);
256 void ln_fullPDagStatsDOT(ln_ctx ctx, FILE *const fp);
257 
258 /* friends */
259 int
260 ln_normalizeRec(npb_t *const __restrict__ npb,
261 	struct ln_pdag *dag,
262 	const size_t offs,
263 	const int bPartialMatch,
264 	struct json_object *json,
265 	struct ln_pdag **endNode
266 );
267 
268 #endif /* #ifndef LOGNORM_PDAG_H_INCLUDED */
269