1 /* samp.c -- code for ln_samp objects.
2  * This code handles rulebase processing. Rulebases have been called
3  * "sample bases" in the early days of liblognorm, thus the name.
4  *
5  * Copyright 2010-2018 by Rainer Gerhards and Adiscon GmbH.
6  *
7  * Modified by Pavel Levshin (pavel@levshin.spb.ru) in 2013
8  *
9  * This file is part of liblognorm.
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with this library; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
24  *
25  * A copy of the LGPL v2.1 can be found in the file "COPYING" in this distribution.
26  */
27 #include "config.h"
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <stdarg.h>
31 #include <string.h>
32 #include <assert.h>
33 #include <ctype.h>
34 #include <errno.h>
35 
36 #include "liblognorm.h"
37 #include "lognorm.h"
38 #include "samp.h"
39 #include "internal.h"
40 #include "parser.h"
41 #include "pdag.h"
42 #include "v1_liblognorm.h"
43 #include "v1_ptree.h"
44 
45 void
ln_sampFree(ln_ctx ctx,struct ln_samp * samp)46 ln_sampFree(ln_ctx __attribute__((unused)) ctx, struct ln_samp *samp)
47 {
48 	free(samp);
49 }
50 
51 static int
ln_parseLegacyFieldDescr(ln_ctx ctx,const char * const buf,const size_t lenBuf,size_t * bufOffs,es_str_t ** str,json_object ** prscnf)52 ln_parseLegacyFieldDescr(ln_ctx ctx,
53 	const char *const buf,
54 	const size_t lenBuf,
55 	size_t *bufOffs,
56 	es_str_t **str,
57 	json_object **prscnf)
58 {
59 	int r = 0;
60 	char *cstr;	/* for debug mode strings */
61 	char *ftype = NULL;
62 	char name[MAX_FIELDNAME_LEN];
63 	size_t iDst;
64 	struct json_object *json = NULL;
65 	char *ed = NULL;
66 	es_size_t i = *bufOffs;
67 	es_str_t *edata = NULL;
68 
69 	for(  iDst = 0
70 	    ; iDst < (MAX_FIELDNAME_LEN - 1) && i < lenBuf && buf[i] != ':'
71 	    ; ++iDst) {
72 		name[iDst] = buf[i++];
73 	}
74 	name[iDst] = '\0';
75 	if(iDst == (MAX_FIELDNAME_LEN - 1)) {
76 		ln_errprintf(ctx, 0, "field name too long in: %s", buf+(*bufOffs));
77 		FAIL(LN_INVLDFDESCR);
78 	}
79 	if(i == lenBuf) {
80 		ln_errprintf(ctx, 0, "field definition wrong in: %s", buf+(*bufOffs));
81 		FAIL(LN_INVLDFDESCR);
82 	}
83 
84 	if(iDst == 0) {
85 		FAIL(LN_INVLDFDESCR);
86 	}
87 
88 	if(ctx->debug) {
89 		ln_dbgprintf(ctx, "parsed field: '%s'", name);
90 	}
91 
92 	if(buf[i] != ':') {
93 		ln_errprintf(ctx, 0, "missing colon in: %s", buf+(*bufOffs));
94 		FAIL(LN_INVLDFDESCR);
95 	}
96 	++i; /* skip ':' */
97 
98 	/* parse and process type (trailing whitespace must be trimmed) */
99 	es_emptyStr(*str);
100 	size_t j = i;
101 	/* scan for terminator */
102 	while(j < lenBuf && buf[j] != ':' && buf[j] != '{' && buf[j] != '%')
103 		++j;
104 	/* now trim trailing space backwards */
105 	size_t next = j;
106 	--j;
107 	while(j >= i && isspace(buf[j]))
108 		--j;
109 	/* now copy */
110 	while(i <= j) {
111 		CHKR(es_addChar(str, buf[i++]));
112 	}
113 	/* finally move i to consumed position */
114 	i = next;
115 
116 	if(i == lenBuf) {
117 		ln_errprintf(ctx, 0, "premature end (missing %%?) in: %s", buf+(*bufOffs));
118 		FAIL(LN_INVLDFDESCR);
119 	}
120 
121 	ftype = es_str2cstr(*str, NULL);
122 	ln_dbgprintf(ctx, "field type '%s', i %d", ftype, i);
123 
124 	if(buf[i] == '{') {
125 		struct json_tokener *tokener = json_tokener_new();
126 		json = json_tokener_parse_ex(tokener, buf+i, (int) (lenBuf - i));
127 		if(json == NULL) {
128 			ln_errprintf(ctx, 0, "invalid json in '%s'", buf+i);
129 		}
130 		i += tokener->char_offset;
131 		json_tokener_free(tokener);
132 	}
133 
134 	if(buf[i] == '%') {
135 		i++;
136 	} else {
137 		/* parse extra data */
138 		CHKN(edata = es_newStr(8));
139 		i++;
140 		while(i < lenBuf) {
141 			if(buf[i] == '%') {
142 				++i;
143 				break; /* end of field */
144 			}
145 			CHKR(es_addChar(&edata, buf[i++]));
146 		}
147 		es_unescapeStr(edata);
148 		if(ctx->debug) {
149 			cstr = es_str2cstr(edata, NULL);
150 			ln_dbgprintf(ctx, "parsed extra data: '%s'", cstr);
151 			free(cstr);
152 		}
153 	}
154 
155 	struct json_object *val;
156 	*prscnf = json_object_new_object();
157 	CHKN(val = json_object_new_string(name));
158 	json_object_object_add(*prscnf, "name", val);
159 	CHKN(val = json_object_new_string(ftype));
160 	json_object_object_add(*prscnf, "type", val);
161 	if(edata != NULL) {
162 		ed = es_str2cstr(edata, " ");
163 		CHKN(val = json_object_new_string(ed));
164 		json_object_object_add(*prscnf, "extradata", val);
165 	}
166 	if(json != NULL) {
167 		/* now we need to merge the json params into the main object */
168 		struct json_object_iterator it = json_object_iter_begin(json);
169 		struct json_object_iterator itEnd = json_object_iter_end(json);
170 		while (!json_object_iter_equal(&it, &itEnd)) {
171 			struct json_object *const v = json_object_iter_peek_value(&it);
172 			json_object_get(v);
173 			json_object_object_add(*prscnf, json_object_iter_peek_name(&it), v);
174 			json_object_iter_next(&it);
175 		}
176 	}
177 
178 	*bufOffs = i;
179 done:
180 	free(ed);
181 	if(edata != NULL)
182 		es_deleteStr(edata);
183 	free(ftype);
184 	if(json != NULL)
185 		json_object_put(json);
186 	return r;
187 }
188 
189 /**
190  * Extract a field description from a sample.
191  * The field description is added to the tail of the current
192  * subtree's field list. The parse buffer must be position on the
193  * leading '%' that starts a field definition. It is a program error
194  * if this condition is not met.
195  *
196  * Note that we break up the object model and access ptree members
197  * directly. Let's consider us a friend of ptree. This is necessary
198  * to optimize the structure for a high-speed parsing process.
199  *
200  * @param[in] str a temporary work string. This is passed in to save the
201  * 		  creation overhead
202  * @returns 0 on success, something else otherwise
203  */
204 static int
addFieldDescr(ln_ctx ctx,struct ln_pdag ** pdag,es_str_t * rule,size_t * bufOffs,es_str_t ** str)205 addFieldDescr(ln_ctx ctx, struct ln_pdag **pdag, es_str_t *rule,
206 	        size_t *bufOffs, es_str_t **str)
207 {
208 	int r = 0;
209 	es_size_t i = *bufOffs;
210 	char *ftype = NULL;
211 	const char *buf;
212 	es_size_t lenBuf;
213 	struct json_object *prs_config = NULL;
214 
215 	buf = (const char*)es_getBufAddr(rule);
216 	lenBuf = es_strlen(rule);
217 	assert(buf[i] == '%');
218 	++i;	/* "eat" ':' */
219 
220 	/* skip leading whitespace in field name */
221 	while(i < lenBuf && isspace(buf[i]))
222 		++i;
223 	/* check if we have new-style json config */
224 	if(buf[i] == '{' || buf[i] == '[') {
225 		struct json_tokener *tokener = json_tokener_new();
226 		prs_config = json_tokener_parse_ex(tokener, buf+i, (int) (lenBuf - i));
227 		i += tokener->char_offset;
228 		json_tokener_free(tokener);
229 		if(prs_config == NULL || i == lenBuf || buf[i] != '%') {
230 			ln_errprintf(ctx, 0, "invalid json in '%s'", buf+i);
231 			r = -1;
232 			goto done;
233 		}
234 		*bufOffs = i+1; /* eat '%' - if above ensures it is present */
235 	} else {
236 		*bufOffs = i;
237 		CHKR(ln_parseLegacyFieldDescr(ctx, buf, lenBuf, bufOffs, str, &prs_config));
238 	}
239 
240 	CHKR(ln_pdagAddParser(ctx, pdag, prs_config));
241 
242 done:
243 	free(ftype);
244 	return r;
245 }
246 
247 
248 /**
249  *  Construct a literal parser json definition.
250  */
251 static json_object *
newLiteralParserJSONConf(char lit)252 newLiteralParserJSONConf(char lit)
253 {
254 	char buf[] = "x";
255 	buf[0] = lit;
256 	struct json_object *val;
257 	struct json_object *prscnf = json_object_new_object();
258 
259 	val = json_object_new_string("literal");
260 	json_object_object_add(prscnf, "type", val);
261 
262 	val = json_object_new_string(buf);
263 	json_object_object_add(prscnf, "text", val);
264 
265 	return prscnf;
266 }
267 
268 /**
269  * Parse a Literal string out of the template and add it to the tree.
270  * This function is used to create the unoptimized tree. So we do
271  * one node for each character. These will be compacted by the optimizer
272  * in a later stage. The advantage is that we do not need to care about
273  * splitting the tree. As such the processing is fairly simple:
274  *
275  *   for each character in literal (left-to-right):
276  *      create literal parser object o
277  *      add new DAG node o, advance to it
278  *
279  * @param[in] ctx the context
280  * @param[in/out] subtree on entry, current subtree, on exist newest
281  *    		deepest subtree
282  * @param[in] rule string with current rule
283  * @param[in/out] bufOffs parse pointer, up to which offset is parsed
284  * 		(is updated so that it points to first char after consumed
285  * 		string on exit).
286  * @param    str a work buffer, provided to prevent creation of a new object
287  * @return 0 on success, something else otherwise
288  */
289 static int
parseLiteral(ln_ctx ctx,struct ln_pdag ** pdag,es_str_t * rule,size_t * const __restrict__ bufOffs,es_str_t ** str)290 parseLiteral(ln_ctx ctx, struct ln_pdag **pdag, es_str_t *rule,
291 	     size_t *const __restrict__ bufOffs, es_str_t **str)
292 {
293 	int r = 0;
294 	size_t i = *bufOffs;
295 	unsigned char *buf = es_getBufAddr(rule);
296 	const size_t lenBuf = es_strlen(rule);
297 	const char *cstr = NULL;
298 
299 	es_emptyStr(*str);
300 	while(i < lenBuf) {
301 		if(buf[i] == '%') {
302 			if(i+1 < lenBuf && buf[i+1] != '%') {
303 				break; /* field start is end of literal */
304 			}
305 			if (++i == lenBuf) break;
306 		}
307 		CHKR(es_addChar(str, buf[i]));
308 		++i;
309 	}
310 
311 	es_unescapeStr(*str);
312 	cstr = es_str2cstr(*str, NULL);
313 	if(ctx->debug) {
314 		ln_dbgprintf(ctx, "parsed literal: '%s'", cstr);
315 	}
316 
317 	*bufOffs = i;
318 
319 	/* we now add the string to the tree */
320 	for(i = 0 ; cstr[i] != '\0' ; ++i) {
321 		struct json_object *const prscnf =
322 			newLiteralParserJSONConf(cstr[i]);
323 		CHKN(prscnf);
324 		CHKR(ln_pdagAddParser(ctx, pdag, prscnf));
325 	}
326 
327 	r = 0;
328 
329 done:
330 	free((void*)cstr);
331 	return r;
332 }
333 
334 
335 /* Implementation note:
336  * We read in the sample, and split it into chunks of literal text and
337  * fields. Each literal text is added as whole to the tree, as is each
338  * field individually. To do so, we keep track of our current subtree
339  * root, which changes whenever a new part of the tree is build. It is
340  * set to the then-lowest part of the tree, where the next step sample
341  * data is to be added.
342  *
343  * This function processes the whole string or returns an error.
344  *
345  * format: literal1%field:type:extra-data%literal2
346  *
347  * @returns the new dag root (or NULL in case of error)
348  */
349 static int
addSampToTree(ln_ctx ctx,es_str_t * rule,ln_pdag * dag,struct json_object * tagBucket)350 addSampToTree(ln_ctx ctx,
351 	es_str_t *rule,
352 	ln_pdag *dag,
353 	struct json_object *tagBucket)
354 {
355 	int r = -1;
356 	es_str_t *str = NULL;
357 	size_t i;
358 
359 	CHKN(str = es_newStr(256));
360 	i = 0;
361 	while(i < es_strlen(rule)) {
362 		LN_DBGPRINTF(ctx, "addSampToTree %zu of %d", i, es_strlen(rule));
363 		CHKR(parseLiteral(ctx, &dag, rule, &i, &str));
364 		/* After the literal there can be field only*/
365 		if (i < es_strlen(rule)) {
366 			CHKR(addFieldDescr(ctx, &dag, rule, &i, &str));
367 			if (i == es_strlen(rule)) {
368 				/* finish the tree with empty literal to avoid false merging*/
369 				CHKR(parseLiteral(ctx, &dag, rule, &i, &str));
370 			}
371 		}
372 	}
373 
374 	LN_DBGPRINTF(ctx, "end addSampToTree %zu of %d", i, es_strlen(rule));
375 	/* we are at the end of rule processing, so this node is a terminal */
376 	dag->flags.isTerminal = 1;
377 	dag->tags = tagBucket;
378 	dag->rb_file = strdup(ctx->conf_file);
379 	dag->rb_lineno = ctx->conf_ln_nbr;
380 
381 done:
382 	if(str != NULL)
383 		es_deleteStr(str);
384 	return r;
385 }
386 
387 
388 
389 /**
390  * get the initial word of a rule line that tells us the type of the
391  * line.
392  * @param[in] buf line buffer
393  * @param[in] len length of buffer
394  * @param[out] offs offset after "="
395  * @param[out] str string with "linetype-word" (newly created)
396  * @returns 0 on success, something else otherwise
397  */
398 static int
getLineType(const char * buf,es_size_t lenBuf,size_t * offs,es_str_t ** str)399 getLineType(const char *buf, es_size_t lenBuf, size_t *offs, es_str_t **str)
400 {
401 	int r = -1;
402 	size_t i;
403 
404 	*str = es_newStr(16);
405 	for(i = 0 ; i < lenBuf && buf[i] != '=' ; ++i) {
406 		CHKR(es_addChar(str, buf[i]));
407 	}
408 
409 	if(i < lenBuf)
410 		++i; /* skip over '=' */
411 	*offs = i;
412 
413 done:	return r;
414 }
415 
416 
417 /**
418  * Get a new common prefix from the config file. That is actually everything from
419  * the current offset to the end of line.
420  *
421  * @param[in] buf line buffer
422  * @param[in] len length of buffer
423  * @param[in] offs offset after "="
424  * @param[in/out] str string to store common offset. If NULL, it is created,
425  * 	 	otherwise it is emptied.
426  * @returns 0 on success, something else otherwise
427  */
428 static int
getPrefix(const char * buf,es_size_t lenBuf,es_size_t offs,es_str_t ** str)429 getPrefix(const char *buf, es_size_t lenBuf, es_size_t offs, es_str_t **str)
430 {
431 	int r;
432 
433 	if(*str == NULL) {
434 		CHKN(*str = es_newStr(lenBuf - offs));
435 	} else {
436 		es_emptyStr(*str);
437 	}
438 
439 	r = es_addBuf(str, (char*)buf + offs, lenBuf - offs);
440 done:	return r;
441 }
442 
443 /**
444  * Extend the common prefix. This means that the line is concatenated
445  * to the prefix. This is useful if the same rulebase is to be used with
446  * different prefixes (well, not strictly necessary, but probably useful).
447  *
448  * @param[in] ctx current context
449  * @param[in] buf line buffer
450  * @param[in] len length of buffer
451  * @param[in] offs offset to-be-added text starts
452  * @returns 0 on success, something else otherwise
453  */
454 static int
extendPrefix(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t offs)455 extendPrefix(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs)
456 {
457 	return es_addBuf(&ctx->rulePrefix, (char*)buf+offs, lenBuf - offs);
458 }
459 
460 
461 /**
462  * Add a tag to the tag bucket. Helper to processTags.
463  * @param[in] ctx current context
464  * @param[in] tagname string with tag name
465  * @param[out] tagBucket tagbucket to which new tags shall be added
466  *                       the tagbucket is created if it is NULL
467  * @returns 0 on success, something else otherwise
468  */
469 static int
addTagStrToBucket(ln_ctx ctx,es_str_t * tagname,struct json_object ** tagBucket)470 addTagStrToBucket(ln_ctx ctx, es_str_t *tagname, struct json_object **tagBucket)
471 {
472 	int r = -1;
473 	char *cstr;
474 	struct json_object *tag;
475 
476 	if(*tagBucket == NULL) {
477 		CHKN(*tagBucket = json_object_new_array());
478 	}
479 	cstr = es_str2cstr(tagname, NULL);
480 	ln_dbgprintf(ctx, "tag found: '%s'", cstr);
481 	CHKN(tag = json_object_new_string(cstr));
482 	json_object_array_add(*tagBucket, tag);
483 	free(cstr);
484 	r = 0;
485 
486 done:	return r;
487 }
488 
489 
490 /**
491  * Extract the tags and create a tag bucket out of them
492  *
493  * @param[in] ctx current context
494  * @param[in] buf line buffer
495  * @param[in] len length of buffer
496  * @param[in,out] poffs offset where tags start, on exit and success
497  *                      offset after tag part (excluding ':')
498  * @param[out] tagBucket tagbucket to which new tags shall be added
499  *                       the tagbucket is created if it is NULL
500  * @returns 0 on success, something else otherwise
501  */
502 static int
processTags(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t * poffs,struct json_object ** tagBucket)503 processTags(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t *poffs, struct json_object **tagBucket)
504 {
505 	int r = -1;
506 	es_str_t *str = NULL;
507 	es_size_t i;
508 
509 	assert(poffs != NULL);
510 	i = *poffs;
511 	while(i < lenBuf && buf[i] != ':') {
512 		if(buf[i] == ',') {
513 			/* end of this tag */
514 			CHKR(addTagStrToBucket(ctx, str, tagBucket));
515 			es_deleteStr(str);
516 			str = NULL;
517 		} else {
518 			if(str == NULL) {
519 				CHKN(str = es_newStr(32));
520 			}
521 			CHKR(es_addChar(&str, buf[i]));
522 		}
523 		++i;
524 	}
525 
526 	if(buf[i] != ':')
527 		goto done;
528 	++i; /* skip ':' */
529 
530 	if(str != NULL) {
531 		CHKR(addTagStrToBucket(ctx, str, tagBucket));
532 		es_deleteStr(str);
533 	}
534 
535 	*poffs = i;
536 	r = 0;
537 
538 done:	return r;
539 }
540 
541 
542 
543 /**
544  * Process a new rule and add it to pdag.
545  *
546  * @param[in] ctx current context
547  * @param[in] buf line buffer
548  * @param[in] len length of buffer
549  * @param[in] offs offset where rule starts
550  * @returns 0 on success, something else otherwise
551  */
552 static int
processRule(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t offs)553 processRule(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs)
554 {
555 	int r = -1;
556 	es_str_t *str;
557 	struct json_object *tagBucket = NULL;
558 
559 	ln_dbgprintf(ctx, "rule line to add: '%s'", buf+offs);
560 	CHKR(processTags(ctx, buf, lenBuf, &offs, &tagBucket));
561 
562 	if(offs == lenBuf) {
563 		ln_errprintf(ctx, 0, "error: actual message sample part is missing");
564 		goto done;
565 	}
566 	if(ctx->rulePrefix == NULL) {
567 		CHKN(str = es_newStr(lenBuf));
568 	} else {
569 		CHKN(str = es_strdup(ctx->rulePrefix));
570 	}
571 	CHKR(es_addBuf(&str, (char*)buf + offs, lenBuf - offs));
572 	addSampToTree(ctx, str, ctx->pdag, tagBucket);
573 	es_deleteStr(str);
574 	r = 0;
575 done:	return r;
576 }
577 
578 
579 static int
getTypeName(ln_ctx ctx,const char * const __restrict__ buf,const size_t lenBuf,size_t * const __restrict__ offs,char * const __restrict__ dstbuf)580 getTypeName(ln_ctx ctx,
581 	const char *const __restrict__ buf,
582 	const size_t lenBuf,
583 	size_t *const __restrict__ offs,
584 	char *const __restrict__ dstbuf)
585 {
586 	int r = -1;
587 	size_t iDst;
588 	size_t i = *offs;
589 
590 	if(buf[i] != '@') {
591 		ln_errprintf(ctx, 0, "user-defined type name must "
592 			"start with '@'");
593 		goto done;
594 	}
595 	for(  iDst = 0
596 	    ; i < lenBuf && buf[i] != ':' && iDst < MAX_TYPENAME_LEN - 1
597 	    ; ++i, ++iDst) {
598 		if(isspace(buf[i])) {
599 			ln_errprintf(ctx, 0, "user-defined type name must "
600 				"not contain whitespace");
601 			goto done;
602 		}
603 		dstbuf[iDst] = buf[i];
604 	}
605 	dstbuf[iDst] = '\0';
606 
607 	if(i < lenBuf && buf[i] == ':') {
608 		r = 0,
609 		*offs = i+1; /* skip ":" */
610 	}
611 done:
612 	return r;
613 }
614 
615 /**
616  * Process a type definition and add it to the PDAG
617  * disconnected components.
618  *
619  * @param[in] ctx current context
620  * @param[in] buf line buffer
621  * @param[in] len length of buffer
622  * @param[in] offs offset where rule starts
623  * @returns 0 on success, something else otherwise
624  */
625 static int
processType(ln_ctx ctx,const char * const __restrict__ buf,const size_t lenBuf,size_t offs)626 processType(ln_ctx ctx,
627 	const char *const __restrict__ buf,
628 	const size_t lenBuf,
629 	size_t offs)
630 {
631 	int r = -1;
632 	es_str_t *str;
633 	char typename[MAX_TYPENAME_LEN];
634 
635 	ln_dbgprintf(ctx, "type line to add: '%s'", buf+offs);
636 	CHKR(getTypeName(ctx, buf, lenBuf, &offs, typename));
637 	ln_dbgprintf(ctx, "type name is '%s'", typename);
638 
639 	ln_dbgprintf(ctx, "type line to add: '%s'", buf+offs);
640 	if(offs == lenBuf) {
641 		ln_errprintf(ctx, 0, "error: actual message sample part is missing in type def");
642 		goto done;
643 	}
644 	// TODO: optimize
645 	CHKN(str = es_newStr(lenBuf));
646 	CHKR(es_addBuf(&str, (char*)buf + offs, lenBuf - offs));
647 	struct ln_type_pdag *const td = ln_pdagFindType(ctx, typename, 1);
648 	CHKN(td);
649 	addSampToTree(ctx, str, td->pdag, NULL);
650 	es_deleteStr(str);
651 	r = 0;
652 done:	return r;
653 }
654 
655 
656 /**
657  * Obtain a field name from a rule base line.
658  *
659  * @param[in] ctx current context
660  * @param[in] buf line buffer
661  * @param[in] len length of buffer
662  * @param[in/out] offs on entry: offset where tag starts,
663  * 		       on exit: updated offset AFTER TAG and (':')
664  * @param [out] strTag obtained tag, if successful
665  * @returns 0 on success, something else otherwise
666  */
667 static int
getFieldName(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t * offs,es_str_t ** strTag)668 getFieldName(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs,
669 es_str_t **strTag)
670 {
671 	int r = -1;
672 	es_size_t i;
673 
674 	i = *offs;
675 	while(i < lenBuf &&
676 	       (isalnum(buf[i]) || buf[i] == '_' || buf[i] == '.')) {
677 		if(*strTag == NULL) {
678 			CHKN(*strTag = es_newStr(32));
679 		}
680 		CHKR(es_addChar(strTag, buf[i]));
681 		++i;
682 	}
683 	*offs = i;
684 	r = 0;
685 done:	return r;
686 }
687 
688 
689 /**
690  * Skip over whitespace.
691  * Skips any whitespace present at the offset.
692  *
693  * @param[in] ctx current context
694  * @param[in] buf line buffer
695  * @param[in] len length of buffer
696  * @param[in/out] offs on entry: offset first unprocessed position
697  */
698 static void
skipWhitespace(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t * offs)699 skipWhitespace(ln_ctx __attribute__((unused)) ctx, const char *buf, es_size_t lenBuf, es_size_t *offs)
700 {
701 	while(*offs < lenBuf && isspace(buf[*offs])) {
702 		(*offs)++;
703 	}
704 }
705 
706 
707 /**
708  * Obtain an annotation (field) operation.
709  * This usually is a plus or minus sign followed by a field name
710  * followed (if plus) by an equal sign and the field value. On entry,
711  * offs must be positioned on the first unprocessed field (after ':' for
712  * the initial field!). Extra whitespace is detected and, if present,
713  * skipped. The obtained operation is added to the annotation set provided.
714  * Note that extracted string objects are passed to the annotation; thus it
715  * is vital NOT to free them (most importantly, this is *not* a memory leak).
716  *
717  * @param[in] ctx current context
718  * @param[in] annot active annotation set to which the operation is to be added
719  * @param[in] buf line buffer
720  * @param[in] len length of buffer
721  * @param[in/out] offs on entry: offset where tag starts,
722  * 		       on exit: updated offset AFTER TAG and (':')
723  * @param [out] strTag obtained tag, if successful
724  * @returns 0 on success, something else otherwise
725  */
726 static int
getAnnotationOp(ln_ctx ctx,ln_annot * annot,const char * buf,es_size_t lenBuf,es_size_t * offs)727 getAnnotationOp(ln_ctx ctx, ln_annot *annot, const char *buf, es_size_t lenBuf, es_size_t *offs)
728 {
729 	int r = -1;
730 	es_size_t i;
731 	es_str_t *fieldName = NULL;
732 	es_str_t *fieldVal = NULL;
733 	ln_annot_opcode opc;
734 
735 	i = *offs;
736 	skipWhitespace(ctx, buf, lenBuf, &i);
737 	if(i == lenBuf) {
738 		r = 0;
739 		goto done; /* nothing left to process (no error!) */
740 	}
741 
742 	switch(buf[i]) {
743 	case '+':
744 		opc = ln_annot_ADD;
745 		break;
746 	case '#':
747 		ln_dbgprintf(ctx, "inline comment in 'annotate' line: %s", buf);
748 		*offs = lenBuf;
749 		r = 0;
750 		goto done;
751 	case '-':
752 		ln_dbgprintf(ctx, "annotate op '-' not yet implemented - failing");
753 		/*FALLTHROUGH*/
754 	default:ln_errprintf(ctx, 0, "invalid annotate operation '%c': %s", buf[i], buf+i);
755 		goto fail;
756 	}
757 	i++;
758 
759 	if(i == lenBuf) goto fail; /* nothing left to process */
760 
761 	CHKR(getFieldName(ctx, buf, lenBuf, &i, &fieldName));
762 	if(i == lenBuf) goto fail; /* nothing left to process */
763 	if(buf[i] != '=') goto fail; /* format error */
764 	i++;
765 
766 	skipWhitespace(ctx, buf, lenBuf, &i);
767 	if(buf[i] != '"') goto fail; /* format error */
768 	++i;
769 
770 	while(i < lenBuf && buf[i] != '"') {
771 		if(fieldVal == NULL) {
772 			CHKN(fieldVal = es_newStr(32));
773 		}
774 		CHKR(es_addChar(&fieldVal, buf[i]));
775 		++i;
776 	}
777 	*offs = (i == lenBuf) ? i : i+1;
778 	CHKR(ln_addAnnotOp(annot, opc, fieldName, fieldVal));
779 	r = 0;
780 done:	return r;
781 fail:	return -1;
782 }
783 
784 
785 /**
786  * Process a new annotation and add it to the annotation set.
787  *
788  * @param[in] ctx current context
789  * @param[in] buf line buffer
790  * @param[in] len length of buffer
791  * @param[in] offs offset where annotation starts
792  * @returns 0 on success, something else otherwise
793  */
794 static int
processAnnotate(ln_ctx ctx,const char * buf,es_size_t lenBuf,es_size_t offs)795 processAnnotate(ln_ctx ctx, const char *buf, es_size_t lenBuf, es_size_t offs)
796 {
797 	int r;
798 	es_str_t *tag = NULL;
799 	ln_annot *annot;
800 
801 	ln_dbgprintf(ctx, "sample annotation to add: '%s'", buf+offs);
802 	CHKR(getFieldName(ctx, buf, lenBuf, &offs, &tag));
803 	skipWhitespace(ctx, buf, lenBuf, &offs);
804 	if(buf[offs] != ':' || tag == NULL) {
805 		ln_dbgprintf(ctx, "invalid tag field in annotation, line is '%s'", buf);
806 		r=-1;
807 		goto done;
808 	}
809 	++offs;
810 
811 	/* we got an annotation! */
812 	CHKN(annot = ln_newAnnot(tag));
813 
814 	while(offs < lenBuf) {
815 		CHKR(getAnnotationOp(ctx, annot, buf, lenBuf, &offs));
816 	}
817 
818 	r = ln_addAnnotToSet(ctx->pas, annot);
819 
820 done:	return r;
821 }
822 
823 /**
824  * Process include directive. This permits to add unlimited layers
825  * of include files.
826  *
827  * @param[in] ctx current context
828  * @param[in] buf line buffer, a C-string
829  * @param[in] offs offset where annotation starts
830  * @returns 0 on success, something else otherwise
831  */
832 static int
processInclude(ln_ctx ctx,const char * buf,const size_t offs)833 processInclude(ln_ctx ctx, const char *buf, const size_t offs)
834 {
835 	int r;
836 	const char *const conf_file_save = ctx->conf_file;
837 	char *const fname = strdup(buf+offs);
838 	size_t lenfname = strlen(fname);
839 	const unsigned conf_ln_nbr_save = ctx->conf_ln_nbr;
840 
841 	/* trim string - not optimized but also no need to */
842 	for(size_t i = lenfname - 1 ; i > 0 ; --i) {
843 		if(isspace(fname[i])) {
844 			fname[i] = '\0';
845 			--lenfname;
846 		}
847 	}
848 
849 	CHKR(ln_loadSamples(ctx, fname));
850 
851 done:
852 	free(fname);
853 	ctx->conf_file = conf_file_save;
854 	ctx->conf_ln_nbr = conf_ln_nbr_save;
855 
856 	return r;
857 }
858 
859 /**
860  * Reads a rule (sample) stored in buffer buf and creates a new ln_samp object
861  * out of it, which it adds to the pdag (if required).
862  *
863  * @param[ctx] ctx current library context
864  * @param[buf] cstr buffer containing the string contents of the sample
865  * @param[lenBuf] length of the sample contained within buf
866  * @return standard error code
867  */
868 static int
ln_processSamp(ln_ctx ctx,const char * buf,const size_t lenBuf)869 ln_processSamp(ln_ctx ctx, const char *buf, const size_t lenBuf)
870 {
871 	int r = 0;
872 	es_str_t *typeStr = NULL;
873 	size_t offs;
874 
875 	if(getLineType(buf, lenBuf, &offs, &typeStr) != 0)
876 		goto done;
877 
878 	if(!es_strconstcmp(typeStr, "prefix")) {
879 		if(getPrefix(buf, lenBuf, offs, &ctx->rulePrefix) != 0) goto done;
880 	} else if(!es_strconstcmp(typeStr, "extendprefix")) {
881 		if(extendPrefix(ctx, buf, lenBuf, offs) != 0) goto done;
882 	} else if(!es_strconstcmp(typeStr, "rule")) {
883 		if(processRule(ctx, buf, lenBuf, offs) != 0) goto done;
884 	} else if(!es_strconstcmp(typeStr, "type")) {
885 		if(processType(ctx, buf, lenBuf, offs) != 0) goto done;
886 	} else if(!es_strconstcmp(typeStr, "annotate")) {
887 		if(processAnnotate(ctx, buf, lenBuf, offs) != 0) goto done;
888 	} else if(!es_strconstcmp(typeStr, "include")) {
889 		CHKR(processInclude(ctx, buf, offs));
890 	} else {
891 		char *str;
892 		str = es_str2cstr(typeStr, NULL);
893 		ln_errprintf(ctx, 0, "invalid record type detected: '%s'", str);
894 		free(str);
895 		goto done;
896 	}
897 
898 done:
899 	if(typeStr != NULL)
900 		es_deleteStr(typeStr);
901 	return r;
902 }
903 
904 
905 /**
906  * Read a character from our sample source.
907  */
908 static int
ln_sampReadChar(const ln_ctx ctx,FILE * const __restrict__ repo,const char ** inpbuf)909 ln_sampReadChar(const ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf)
910 {
911 	int c;
912 	assert((repo != NULL && inpbuf == NULL) || (repo == NULL && inpbuf != NULL));
913 	if(repo == NULL) {
914 		c = (**inpbuf == '\0') ? EOF : *(*inpbuf)++;
915 	} else {
916 		c = fgetc(repo);
917 	}
918 	return c;
919 }
920 
921 /* note: comments are only supported at beginning of line! */
922 /* skip to end of line */
923 void
ln_sampSkipCommentLine(ln_ctx ctx,FILE * const __restrict__ repo,const char ** inpbuf)924 ln_sampSkipCommentLine(ln_ctx ctx, FILE * const __restrict__ repo, const char **inpbuf)
925 {
926 	int c;
927 	do {
928 		c = ln_sampReadChar(ctx, repo, inpbuf);
929 	} while(c != EOF && c != '\n');
930 	++ctx->conf_ln_nbr;
931 }
932 
933 
934 /* this checks if in a multi-line rule, the next line seems to be a new
935  * rule, which would meand we have some unmatched percent signs inside
936  * our rule (what we call a "runaway rule"). This can easily happen and
937  * is otherwise hard to debug, so let's see if it is the case...
938  * @return 1 if this is a runaway rule, 0 if not
939  */
940 int
ln_sampChkRunawayRule(ln_ctx ctx,FILE * const __restrict__ repo,const char ** inpbuf)941 ln_sampChkRunawayRule(ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf)
942 {
943 	int r = 1;
944 	fpos_t fpos;
945 	char buf[6];
946 	int cont = 1;
947 	int read;
948 
949 	fgetpos(repo, &fpos);
950 	while(cont) {
951 		fpos_t inner_fpos;
952 		fgetpos(repo, &inner_fpos);
953 		if((read = fread(buf, sizeof(char), sizeof(buf)-1, repo)) == 0) {
954 			r = 0;
955 			goto done;
956 		}
957 		if(buf[0] == '\n') {
958 			fsetpos(repo, &inner_fpos);
959 			if(fread(buf, sizeof(char), 1, repo)) {}; /* skip '\n' */
960 			continue;
961 		} else if(buf[0] == '#') {
962 			fsetpos(repo, &inner_fpos);
963 			const unsigned conf_ln_nbr_save = ctx->conf_ln_nbr;
964 			ln_sampSkipCommentLine(ctx, repo, inpbuf);
965 			ctx->conf_ln_nbr = conf_ln_nbr_save;
966 			continue;
967 		}
968 		if(read != 5)
969 			goto done; /* cannot be a rule= line! */
970 		cont = 0; /* no comment, so we can decide */
971 		buf[5] = '\0';
972 		if(!strncmp(buf, "rule=", 5)) {
973 			ln_errprintf(ctx, 0, "line has 'rule=' at begin of line, which "
974 				"does look like a typo in the previous lines (unmatched "
975 				"%% character) and is forbidden. If valid, please re-format "
976 				"the rule to start with other characters. Rule ignored.");
977 			goto done;
978 		}
979 	}
980 
981 	r = 0;
982 done:
983 	fsetpos(repo, &fpos);
984 	return r;
985 }
986 
987 /**
988  * Read a rule (sample) from repository (sequentially).
989  *
990  * Reads a sample starting with the current file position and
991  * creates a new ln_samp object out of it, which it adds to the
992  * pdag.
993  *
994  * @param[in] ctx current library context
995  * @param[in] repo repository descriptor if file input is desired
996  * @param[in/out] ptr to ptr of input buffer; this is used if a string is
997  *                provided instead of a file. If so, this pointer is advanced
998  *                as data is consumed.
999  * @param[out] isEof must be set to 0 on entry and is switched to 1 if EOF occured.
1000  * @return standard error code
1001  */
1002 static int
ln_sampRead(ln_ctx ctx,FILE * const __restrict__ repo,const char ** inpbuf,int * const __restrict__ isEof)1003 ln_sampRead(ln_ctx ctx, FILE *const __restrict__ repo, const char **inpbuf,
1004 	int *const __restrict__ isEof)
1005 {
1006 	int r = 0;
1007 	char buf[64*1024]; /**< max size of rule - TODO: make configurable */
1008 
1009 	size_t i = 0;
1010 	int inParser = 0;
1011 	int done = 0;
1012 	while(!done) {
1013 		const int c = ln_sampReadChar(ctx, repo, inpbuf);
1014 		if(c == EOF) {
1015 			*isEof = 1;
1016 			if(i == 0)
1017 				goto done;
1018 			else
1019 				done = 1; /* last line missing LF, still process it! */
1020 		} else if(c == '\n') {
1021 			++ctx->conf_ln_nbr;
1022 			if(inParser) {
1023 				if(ln_sampChkRunawayRule(ctx, repo, inpbuf)) {
1024 					/* ignore previous rule */
1025 					inParser = 0;
1026 					i = 0;
1027 				}
1028 			}
1029 			if(!inParser && i != 0)
1030 				done = 1;
1031 		} else if(c == '#' && i == 0) {
1032 			ln_sampSkipCommentLine(ctx, repo, inpbuf);
1033 			i = 0; /* back to beginning */
1034 		} else {
1035 			if(c == '%')
1036 				inParser = (inParser) ? 0 : 1;
1037 			buf[i++] = c;
1038 			if(i >= sizeof(buf)) {
1039 				ln_errprintf(ctx, 0, "line is too long");
1040 				goto done;
1041 			}
1042 		}
1043 	}
1044 	buf[i] = '\0';
1045 
1046 	ln_dbgprintf(ctx, "read rulebase line[~%d]: '%s'", ctx->conf_ln_nbr, buf);
1047 	CHKR(ln_processSamp(ctx, buf, i));
1048 
1049 done:
1050 	return r;
1051 }
1052 
1053 /* check rulebase format version. Returns 2 if this is v2 rulebase,
1054  * 1 for any pre-v2 and -1 if there was a problem reading the file.
1055  */
1056 static int
checkVersion(FILE * const fp)1057 checkVersion(FILE *const fp)
1058 {
1059 	char buf[64];
1060 
1061 	if(fgets(buf, sizeof(buf), fp) == NULL)
1062 		return -1;
1063 	if(!strcmp(buf, "version=2\n")) {
1064 		return 2;
1065 	} else {
1066 		return 1;
1067 	}
1068 }
1069 
1070 /* we have a v1 rulebase, so let's do all stuff that we need
1071  * to make that ole piece of ... work.
1072  */
1073 static int
doOldCruft(ln_ctx ctx,const char * file)1074 doOldCruft(ln_ctx ctx, const char *file)
1075 {
1076 	int r = -1;
1077 	if((ctx->ptree = ln_newPTree(ctx, NULL)) == NULL) {
1078 		free(ctx);
1079 		r = -1;
1080 		goto done;
1081 	}
1082 	r = ln_v1_loadSamples(ctx, file);
1083 done:
1084 	return r;
1085 }
1086 
1087 /* try to open a rulebase file. This also tries to see if we need to
1088  * load it from some pre-configured alternative location.
1089  * @returns open file pointer or NULL in case of error
1090  */
1091 static FILE *
tryOpenRBFile(ln_ctx ctx,const char * const file)1092 tryOpenRBFile(ln_ctx ctx, const char *const file)
1093 {
1094 	FILE *repo = NULL;
1095 
1096 	if((repo = fopen(file, "r")) != NULL)
1097 		goto done;
1098 	const int eno1 = errno;
1099 
1100 	const char *const rb_lib = getenv("LIBLOGNORM_RULEBASES");
1101 	if(rb_lib == NULL || *file == '/') {
1102 		ln_errprintf(ctx, eno1, "cannot open rulebase '%s'", file);
1103 		goto done;
1104 	}
1105 
1106 	char *fname = NULL;
1107 	int len;
1108 	len = asprintf(&fname, (rb_lib[strlen(rb_lib)-1] == '/') ? "%s%s" : "%s/%s", rb_lib, file);
1109 	if(len == -1) {
1110 		ln_errprintf(ctx, errno, "alloc error: cannot open rulebase '%s'", file);
1111 		goto done;
1112 	}
1113 	if((repo = fopen(fname, "r")) == NULL) {
1114 		const int eno2 = errno;
1115 		ln_errprintf(ctx, eno1, "cannot open rulebase '%s'", file);
1116 		ln_errprintf(ctx, eno2, "also tried to locate %s via "
1117 			"rulebase directory without success. Expanded "
1118 			"name was '%s'", file, fname);
1119 	}
1120 	free(fname);
1121 
1122 done:
1123 	return repo;
1124 }
1125 
1126 /* @return 0 if all is ok, 1 if an error occured */
1127 int
ln_sampLoad(ln_ctx ctx,const char * file)1128 ln_sampLoad(ln_ctx ctx, const char *file)
1129 {
1130 	int r = 1;
1131 	FILE *repo;
1132 	int isEof = 0;
1133 
1134 	ln_dbgprintf(ctx, "loading rulebase file '%s'", file);
1135 	if(file == NULL) goto done;
1136 	if((repo = tryOpenRBFile(ctx, file)) == NULL)
1137 		goto done;
1138 	const int version = checkVersion(repo);
1139 	ln_dbgprintf(ctx, "rulebase version is %d\n", version);
1140 	if(version == -1) {
1141 		ln_errprintf(ctx, errno, "error determing version of %s", file);
1142 		goto done;
1143 	}
1144 	if(ctx->version != 0 && version != ctx->version) {
1145 		ln_errprintf(ctx, errno, "rulebase '%s' must be version %d, but is version %d "
1146 			" - can not be processed", file, ctx->version, version);
1147 		goto done;
1148 	}
1149 	ctx->version = version;
1150 	if(ctx->version == 1) {
1151 		fclose(repo);
1152 		r = doOldCruft(ctx, file);
1153 		goto done;
1154 	}
1155 
1156 	/* now we are in our native code */
1157 	++ctx->conf_ln_nbr; /* "version=2" is line 1! */
1158 	while(!isEof) {
1159 		CHKR(ln_sampRead(ctx, repo, NULL, &isEof));
1160 	}
1161 	fclose(repo);
1162 	r = 0;
1163 
1164 	if(ctx->include_level == 1)
1165 		ln_pdagOptimize(ctx);
1166 done:
1167 	return r;
1168 }
1169 
1170 /* @return 0 if all is ok, 1 if an error occured */
1171 int
ln_sampLoadFromString(ln_ctx ctx,const char * string)1172 ln_sampLoadFromString(ln_ctx ctx, const char *string)
1173 {
1174 	int r = 1;
1175 	int isEof = 0;
1176 
1177 	if(string == NULL)
1178 		goto done;
1179 
1180 	ln_dbgprintf(ctx, "loading v2 rulebase from string '%s'", string);
1181 	ctx->version = 2;
1182 	while(!isEof) {
1183 		CHKR(ln_sampRead(ctx, NULL, &string, &isEof));
1184 	}
1185 	r = 0;
1186 
1187 	if(ctx->include_level == 1)
1188 		ln_pdagOptimize(ctx);
1189 done:
1190 	return r;
1191 }
1192