1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 
3 /*  Fluent Bit
4  *  ==========
5  *  Copyright (C) 2019-2021 The Fluent Bit Authors
6  *  Copyright (C) 2015-2018 Treasure Data Inc.
7  *
8  *  Licensed under the Apache License, Version 2.0 (the "License");
9  *  you may not use this file except in compliance with the License.
10  *  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  *  Unless required by applicable law or agreed to in writing, software
15  *  distributed under the License is distributed on an "AS IS" BASIS,
16  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  *  See the License for the specific language governing permissions and
18  *  limitations under the License.
19  */
20 
21 #include <fluent-bit/flb_info.h>
22 #include <fluent-bit/flb_mem.h>
23 #include <fluent-bit/flb_env.h>
24 #include <fluent-bit/flb_log.h>
25 #include <fluent-bit/flb_pack.h>
26 #include <fluent-bit/flb_record_accessor.h>
27 #include <fluent-bit/flb_ra_key.h>
28 #include <fluent-bit/record_accessor/flb_ra_parser.h>
29 #include <monkey/mk_core.h>
30 #include <msgpack.h>
31 
32 #include <ctype.h>
33 
ra_parse_string(struct flb_record_accessor * ra,flb_sds_t buf,int start,int end)34 static struct flb_ra_parser *ra_parse_string(struct flb_record_accessor *ra,
35                                              flb_sds_t buf, int start, int end)
36 {
37     int len;
38     struct flb_ra_parser *rp;
39 
40     len = end - start;
41     rp = flb_ra_parser_string_create(buf + start, len);
42     if (!rp) {
43         return NULL;
44     }
45 
46     return rp;
47 }
48 
49 /* Create a parser context for a key map or function definition */
ra_parse_meta(struct flb_record_accessor * ra,flb_sds_t buf,int start,int end)50 static struct flb_ra_parser *ra_parse_meta(struct flb_record_accessor *ra,
51                                            flb_sds_t buf, int start, int end)
52 {
53     int len;
54     struct flb_ra_parser *rp;
55 
56     len = end - start;
57     rp = flb_ra_parser_meta_create(buf + start, len);
58     if (!rp) {
59         return NULL;
60     }
61 
62     return rp;
63 }
64 
65 /*
66  * Supported data
67  *
68  * ${X}                               => environment variable
69  * $key, $key['x'], $key['x'][N]['z'] => record key value or array index
70  * $0, $1,..$9                        => regex id
71  * $X()                               => built-in function
72  */
ra_parse_buffer(struct flb_record_accessor * ra,flb_sds_t buf)73 static int ra_parse_buffer(struct flb_record_accessor *ra, flb_sds_t buf)
74 {
75     int i;
76     int n;
77     int c;
78     int t;
79     int len;
80     int pre = 0;
81     int end = 0;
82     int quote_cnt;
83     struct flb_ra_parser *rp;
84     struct flb_ra_parser *rp_str = NULL;
85 
86     len = flb_sds_len(buf);
87 
88     for (i = 0; i < len; i++) {
89         if (buf[i] != '$') {
90             continue;
91         }
92 
93         /*
94          * Before to add the number entry, add the previous text
95          * before hitting this.
96          */
97         if (i > pre) {
98             rp = ra_parse_string(ra, buf, pre, i);
99             if (!rp) {
100                 return -1;
101             }
102             mk_list_add(&rp->_head, &ra->list);
103         }
104         pre = i;
105 
106 
107         n = i + 1;
108         if (n >= len) {
109             /* Finalize, nothing to do */
110             break;
111         }
112 
113         /*
114          * If the next character is a digit like $0,$1,$2..$9, means the user wants to use
115          * the result of a regex capture.
116          *
117          * We support up to 10 regex ids [0-9]
118          */
119         if (isdigit(buf[n])) {
120             /* Add REGEX_ID entry */
121             c = atoi(buf + n);
122             rp = flb_ra_parser_regex_id_create(c);
123             if (!rp) {
124                 return -1;
125             }
126 
127             mk_list_add(&rp->_head, &ra->list);
128             i++;
129             pre = i + 1;
130             continue;
131         }
132 
133         /*
134          * If the next 3 character are 'TAG', the user might want to include the tag or
135          * part of it (e.g: TAG[n]).
136          */
137         if (n + 2 < len && strncmp(buf + n, "TAG", 3) == 0) {
138             /* Check if some [] was added */
139             if (n + 4 < len) {
140                 end = -1;
141                 if (buf[n + 3] == '[') {
142                     t = n + 3;
143 
144                     /* Look for the ending ']' */
145                     end = mk_string_char_search(buf + t, ']', len - t);
146                     if (end == 0) {
147                         end = -1;
148                     }
149 
150                     /* continue processsing */
151                     c = atoi(buf + t + 1);
152 
153                     rp = flb_ra_parser_tag_part_create(c);
154                     if (!rp) {
155                         return -1;
156                     }
157                     mk_list_add(&rp->_head, &ra->list);
158 
159                     i = t + end + 1;
160                     pre = i;
161                     continue;
162                 }
163             }
164 
165             /* Append full tag */
166             rp = flb_ra_parser_tag_create();
167             if (!rp) {
168                 return -1;
169             }
170             mk_list_add(&rp->_head, &ra->list);
171             i = n + 3;
172             pre = n + 3;
173             continue;
174         }
175 
176         quote_cnt = 0;
177         for (end = i + 1; end < len; end++) {
178             if (buf[end] == '\'') {
179               ++quote_cnt;
180             }
181             else if (buf[end] == '.' && (quote_cnt & 0x01)) {
182                 /* ignore '.' if it is inside a string/subkey */
183                 continue;
184             }
185             else if (buf[end] == '.' || buf[end] == ' ' || buf[end] == ',' || buf[end] == '"') {
186                 break;
187             }
188         }
189         if (end > len) {
190             end = len;
191         }
192 
193         /* Parse the content, we use 'end' as the separator position  */
194         rp = ra_parse_meta(ra, buf, i, end);
195         if (!rp) {
196             return -1;
197         }
198 
199         /* Generate fixed length string */
200         if (pre < i) {
201             rp_str = ra_parse_string(ra, buf, pre, i);
202             if (!rp_str) {
203                 flb_ra_parser_destroy(rp);
204                 return -1;
205             }
206         }
207         else {
208             rp_str = NULL;
209         }
210 
211         if (rp_str) {
212             mk_list_add(&rp_str->_head, &ra->list);
213         }
214         mk_list_add(&rp->_head, &ra->list);
215         pre = end;
216         i = end;
217     }
218 
219     /* Append remaining string */
220     if (i - 1 > end && pre < i) {
221         end = flb_sds_len(buf);
222         rp_str = ra_parse_string(ra, buf, pre, end);
223         if (rp_str) {
224             mk_list_add(&rp_str->_head, &ra->list);
225         }
226     }
227 
228     return 0;
229 }
230 
flb_ra_destroy(struct flb_record_accessor * ra)231 void flb_ra_destroy(struct flb_record_accessor *ra)
232 {
233     struct mk_list *tmp;
234     struct mk_list *head;
235     struct flb_ra_parser *rp;
236 
237     mk_list_foreach_safe(head, tmp, &ra->list) {
238         rp = mk_list_entry(head, struct flb_ra_parser, _head);
239         mk_list_del(&rp->_head);
240         flb_ra_parser_destroy(rp);
241     }
242 
243     if (ra->pattern) {
244         flb_sds_destroy(ra->pattern);
245     }
246     flb_free(ra);
247 }
248 
flb_ra_create(char * str,int translate_env)249 struct flb_record_accessor *flb_ra_create(char *str, int translate_env)
250 {
251     int ret;
252     size_t hint = 0;
253     char *p;
254     flb_sds_t buf = NULL;
255     struct flb_env *env;
256     struct mk_list *head;
257     struct flb_ra_parser *rp;
258     struct flb_record_accessor *ra;
259 
260     p = str;
261     if (translate_env == FLB_TRUE) {
262         /*
263          * Check if some environment variable has been created as part of the
264          * string. Upon running the environment variable will be pre-set in the
265          * string.
266          */
267         env = flb_env_create();
268         if (!env) {
269             flb_error("[record accessor] cannot create environment context");
270             return NULL;
271         }
272 
273         /* Translate string */
274         buf = flb_env_var_translate(env, str);
275         if (!buf) {
276             flb_error("[record accessor] cannot translate string");
277             flb_env_destroy(env);
278             return NULL;
279         }
280         flb_env_destroy(env);
281         p = buf;
282     }
283 
284     /* Allocate context */
285     ra = flb_calloc(1, sizeof(struct flb_record_accessor));
286     if (!ra) {
287         flb_errno();
288         flb_error("[record accessor] cannot create context");
289         if (buf) {
290             flb_sds_destroy(buf);
291         }
292         return NULL;
293     }
294     ra->pattern = flb_sds_create(str);
295     if (!ra->pattern) {
296         flb_error("[record accessor] could not allocate pattern");
297         flb_free(ra);
298         if (buf) {
299             flb_sds_destroy(buf);
300         }
301         return NULL;
302     }
303 
304     mk_list_init(&ra->list);
305 
306     /*
307      * The buffer needs to processed where we create a list of parts, basically
308      * a linked list of sds using 'slist' api.
309      */
310     ret = ra_parse_buffer(ra, p);
311     if (buf) {
312         flb_sds_destroy(buf);
313     }
314     if (ret == -1) {
315         flb_ra_destroy(ra);
316         return NULL;
317     }
318 
319     /* Calculate a hint of an outgoing size buffer */
320     mk_list_foreach(head, &ra->list) {
321         rp = mk_list_entry(head, struct flb_ra_parser, _head);
322         if (rp->key) {
323             if (rp->type == FLB_RA_PARSER_REGEX_ID) {
324                 hint += 32;
325             }
326             else {
327                 hint += flb_sds_len(rp->key->name);
328             }
329         }
330     }
331     ra->size_hint = hint + 128;
332     return ra;
333 }
334 
flb_ra_dump(struct flb_record_accessor * ra)335 void flb_ra_dump(struct flb_record_accessor *ra)
336 {
337     struct mk_list *head;
338     struct flb_ra_parser *rp;
339 
340     mk_list_foreach(head, &ra->list) {
341         rp = mk_list_entry(head, struct flb_ra_parser, _head);
342         printf("\n");
343         flb_ra_parser_dump(rp);
344     }
345 }
346 
ra_translate_regex_id(struct flb_ra_parser * rp,struct flb_regex_search * result,flb_sds_t buf)347 static flb_sds_t ra_translate_regex_id(struct flb_ra_parser *rp,
348                                        struct flb_regex_search *result,
349                                        flb_sds_t buf)
350 {
351     int ret;
352     ptrdiff_t start;
353     ptrdiff_t end;
354     flb_sds_t tmp;
355 
356     ret = flb_regex_results_get(result, rp->id, &start, &end);
357     if (ret == -1) {
358         return buf;
359     }
360 
361     tmp = flb_sds_cat(buf, result->str + start, end - start);
362     return tmp;
363 }
364 
ra_translate_tag(struct flb_ra_parser * rp,flb_sds_t buf,char * tag,int tag_len)365 static flb_sds_t ra_translate_tag(struct flb_ra_parser *rp, flb_sds_t buf,
366                                   char *tag, int tag_len)
367 {
368     flb_sds_t tmp;
369 
370     tmp = flb_sds_cat(buf, tag, tag_len);
371     return tmp;
372 }
373 
ra_translate_tag_part(struct flb_ra_parser * rp,flb_sds_t buf,char * tag,int tag_len)374 static flb_sds_t ra_translate_tag_part(struct flb_ra_parser *rp, flb_sds_t buf,
375                                        char *tag, int tag_len)
376 {
377     int i = 0;
378     int id = -1;
379     int end;
380     flb_sds_t tmp = buf;
381 
382     while (i < tag_len) {
383         end = mk_string_char_search(tag + i, '.', tag_len - i);
384         if (end == -1) {
385             if (i == 0) {
386                 break;
387             }
388             end = tag_len - i;
389         }
390         id++;
391         if (rp->id == id) {
392             tmp = flb_sds_cat(buf, tag + i, end);
393             break;
394         }
395 
396         i += end + 1;
397     }
398 
399     /* No dots in the tag */
400     if (rp->id == 0 && id == -1 && i < tag_len) {
401         tmp = flb_sds_cat(buf, tag, tag_len);
402         return tmp;
403     }
404 
405     return tmp;
406 }
407 
ra_translate_string(struct flb_ra_parser * rp,flb_sds_t buf)408 static flb_sds_t ra_translate_string(struct flb_ra_parser *rp, flb_sds_t buf)
409 {
410     flb_sds_t tmp;
411 
412     tmp = flb_sds_cat(buf, rp->key->name, flb_sds_len(rp->key->name));
413     return tmp;
414 }
415 
ra_translate_keymap(struct flb_ra_parser * rp,flb_sds_t buf,msgpack_object map,int * found)416 static flb_sds_t ra_translate_keymap(struct flb_ra_parser *rp, flb_sds_t buf,
417                                      msgpack_object map, int *found)
418 {
419     int len;
420     char *js;
421     char str[32];
422     flb_sds_t tmp = NULL;
423     struct flb_ra_value *v;
424 
425     /* Lookup key or subkey value */
426     v = flb_ra_key_to_value(rp->key->name, map, rp->key->subkeys);
427     if (!v) {
428         *found = FLB_FALSE;
429         return buf;
430     }
431     else {
432         *found = FLB_TRUE;
433     }
434 
435     /* Based on data type, convert to it string representation */
436     if (v->type == FLB_RA_BOOL) {
437         /* Check if is a map or a real bool */
438         if (v->o.type == MSGPACK_OBJECT_MAP) {
439             /* Convert msgpack map to JSON string */
440             js = flb_msgpack_to_json_str(1024, &v->o);
441             if (js) {
442                 len = strlen(js);
443                 tmp = flb_sds_cat(buf, js, len);
444                 flb_free(js);
445             }
446         }
447         else if (v->o.type == MSGPACK_OBJECT_BOOLEAN) {
448             if (v->val.boolean) {
449                 tmp = flb_sds_cat(buf, "true", 4);
450             }
451             else {
452                 tmp = flb_sds_cat(buf, "false", 5);
453             }
454         }
455     }
456     else if (v->type == FLB_RA_INT) {
457         len = snprintf(str, sizeof(str) - 1, "%" PRId64, v->val.i64);
458         tmp = flb_sds_cat(buf, str, len);
459     }
460     else if (v->type == FLB_RA_FLOAT) {
461         len = snprintf(str, sizeof(str) - 1, "%f", v->val.f64);
462         if (len >= sizeof(str)) {
463             tmp = flb_sds_cat(buf, str, sizeof(str)-1);
464         }
465         else {
466             tmp = flb_sds_cat(buf, str, len);
467         }
468     }
469     else if (v->type == FLB_RA_STRING) {
470         tmp = flb_sds_cat(buf, v->val.string, flb_sds_len(v->val.string));
471     }
472     else if (v->type == FLB_RA_NULL) {
473         tmp = flb_sds_cat(buf, "null", 4);
474     }
475 
476     flb_ra_key_value_destroy(v);
477     return tmp;
478 }
479 
480 /*
481  * Translate a record accessor buffer, tag and records are optional
482  * parameters.
483  *
484  * For safety, the function returns a newly created string that needs
485  * to be destroyed by the caller.
486  */
flb_ra_translate(struct flb_record_accessor * ra,char * tag,int tag_len,msgpack_object map,struct flb_regex_search * result)487 flb_sds_t flb_ra_translate(struct flb_record_accessor *ra,
488                            char *tag, int tag_len,
489                            msgpack_object map, struct flb_regex_search *result)
490 {
491     int found;
492     flb_sds_t tmp = NULL;
493     flb_sds_t buf;
494     struct mk_list *head;
495     struct flb_ra_parser *rp;
496 
497     buf = flb_sds_create_size(ra->size_hint);
498     if (!buf) {
499         flb_error("[record accessor] cannot create outgoing buffer");
500         return NULL;
501     }
502 
503     mk_list_foreach(head, &ra->list) {
504         rp = mk_list_entry(head, struct flb_ra_parser, _head);
505         if (rp->type == FLB_RA_PARSER_STRING) {
506             tmp = ra_translate_string(rp, buf);
507         }
508         else if (rp->type == FLB_RA_PARSER_KEYMAP) {
509             tmp = ra_translate_keymap(rp, buf, map, &found);
510         }
511         else if (rp->type == FLB_RA_PARSER_REGEX_ID && result) {
512             tmp = ra_translate_regex_id(rp, result, buf);
513         }
514         else if (rp->type == FLB_RA_PARSER_TAG && tag) {
515             tmp = ra_translate_tag(rp, buf, tag, tag_len);
516         }
517         else if (rp->type == FLB_RA_PARSER_TAG_PART && tag) {
518             tmp = ra_translate_tag_part(rp, buf, tag, tag_len);
519         }
520         else {
521 
522         }
523 
524         //else if (rp->type == FLB_RA_PARSER_FUNC) {
525             //tmp = ra_translate_func(rp, buf, tag, tag_len);
526         //}
527 
528         if (!tmp) {
529             flb_error("[record accessor] translation failed");
530             flb_sds_destroy(buf);
531             return NULL;
532         }
533         if (tmp != buf) {
534             buf = tmp;
535         }
536     }
537 
538     return buf;
539 }
540 
541 /*
542  * If the record accessor rules do not generate content based on a keymap or
543  * regex, it's considered to be 'static', so the value returned will always be
544  * the same.
545  *
546  * If the 'ra' is static, return FLB_TRUE, otherwise FLB_FALSE.
547  */
flb_ra_is_static(struct flb_record_accessor * ra)548 int flb_ra_is_static(struct flb_record_accessor *ra)
549 {
550     struct mk_list *head;
551     struct flb_ra_parser *rp;
552 
553     mk_list_foreach(head, &ra->list) {
554         rp = mk_list_entry(head, struct flb_ra_parser, _head);
555         if (rp->type == FLB_RA_PARSER_STRING) {
556             continue;
557         }
558         else if (rp->type == FLB_RA_PARSER_KEYMAP) {
559             return FLB_FALSE;
560         }
561         else if (rp->type == FLB_RA_PARSER_REGEX_ID) {
562             return FLB_FALSE;
563         }
564         else if (rp->type == FLB_RA_PARSER_TAG) {
565             continue;
566         }
567         else if (rp->type == FLB_RA_PARSER_TAG_PART) {
568             continue;
569         }
570     }
571 
572     return FLB_TRUE;
573 }
574 
575 /*
576  * Compare a string value against the first entry of a record accessor component, used
577  * specifically when the record accessor refers to a single key name.
578  */
flb_ra_strcmp(struct flb_record_accessor * ra,msgpack_object map,char * str,int len)579 int flb_ra_strcmp(struct flb_record_accessor *ra, msgpack_object map,
580                   char *str, int len)
581 {
582     struct flb_ra_parser *rp;
583 
584     rp = mk_list_entry_first(&ra->list, struct flb_ra_parser, _head);
585     return flb_ra_key_strcmp(rp->key->name, map, rp->key->subkeys,
586                              rp->key->name, flb_sds_len(rp->key->name));
587 }
588 
589 /*
590  * Check if a regular expression matches a record accessor key in the
591  * given map
592  */
flb_ra_regex_match(struct flb_record_accessor * ra,msgpack_object map,struct flb_regex * regex,struct flb_regex_search * result)593 int flb_ra_regex_match(struct flb_record_accessor *ra, msgpack_object map,
594                        struct flb_regex *regex, struct flb_regex_search *result)
595 {
596     struct flb_ra_parser *rp;
597 
598     rp = mk_list_entry_first(&ra->list, struct flb_ra_parser, _head);
599     return flb_ra_key_regex_match(rp->key->name, map, rp->key->subkeys,
600                                   regex, result);
601 }
602 
603 /*
604  * If 'record accessor' pattern matches an entry in the 'map', set the
605  * reference in 'out_key' and 'out_val' for the entries in question.
606  *
607  * Returns FLB_TRUE if the pattern matched a kv pair, otherwise it returns
608  * FLB_FALSE.
609  */
flb_ra_get_kv_pair(struct flb_record_accessor * ra,msgpack_object map,msgpack_object ** start_key,msgpack_object ** out_key,msgpack_object ** out_val)610 int flb_ra_get_kv_pair(struct flb_record_accessor *ra, msgpack_object map,
611                        msgpack_object **start_key,
612                        msgpack_object **out_key, msgpack_object **out_val)
613 {
614     struct flb_ra_parser *rp;
615 
616     if (mk_list_size(&ra->list) == 0) {
617         return -1;
618     }
619 
620     rp = mk_list_entry_first(&ra->list, struct flb_ra_parser, _head);
621     if (!rp->key) {
622         return FLB_FALSE;
623     }
624 
625     return flb_ra_key_value_get(rp->key->name, map, rp->key->subkeys,
626                                 start_key, out_key, out_val);
627 }
628 
flb_ra_get_value_object(struct flb_record_accessor * ra,msgpack_object map)629 struct flb_ra_value *flb_ra_get_value_object(struct flb_record_accessor *ra,
630                                              msgpack_object map)
631 {
632     struct flb_ra_parser *rp;
633 
634     if (mk_list_size(&ra->list) == 0) {
635         return NULL;
636     }
637 
638     rp = mk_list_entry_first(&ra->list, struct flb_ra_parser, _head);
639     if (!rp->key) {
640         return NULL;
641     }
642 
643     return flb_ra_key_to_value(rp->key->name, map, rp->key->subkeys);
644 }
645