1 /*
2 * Written by Alexey Tourbin <at@altlinux.org>.
3 *
4 * The author has dedicated the code to the public domain. Anyone is free
5 * to copy, modify, publish, use, compile, sell, or distribute the original
6 * code, either in source code form or as a compiled binary, for any purpose,
7 * commercial or non-commercial, and by any means.
8 */
9
10 #include "config.h"
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <sqlite3.h>
15
16 #include <unordered_map>
17
18 #include "pcrepp/pcrepp.hh"
19
20 #include "base/humanize.hh"
21 #include "base/lnav.gzip.hh"
22 #include "base/string_util.hh"
23 #include "formats/logfmt/logfmt.parser.hh"
24 #include "yajlpp/yajlpp.hh"
25 #include "yajlpp/json_op.hh"
26 #include "column_namer.hh"
27 #include "yajl/api/yajl_gen.h"
28 #include "sqlite-extension-func.hh"
29 #include "data_scanner.hh"
30 #include "data_parser.hh"
31 #include "elem_to_json.hh"
32 #include "vtab_module.hh"
33 #include "vtab_module_json.hh"
34 #include "safe/safe.h"
35 #include "spookyhash/SpookyV2.h"
36
37 #include "optional.hpp"
38 #include "mapbox/variant.hpp"
39
40 using namespace std;
41 using namespace mapbox;
42
43 typedef struct {
44 shared_ptr<pcrepp> re2;
45 } cache_entry;
46
find_re(const char * re)47 static cache_entry *find_re(const char *re)
48 {
49 using safe_cache = safe::Safe<unordered_map<string, cache_entry>>;
50 static safe_cache CACHE;
51
52 safe::WriteAccess<safe_cache> wcache(CACHE);
53 string re_str = re;
54 auto iter = wcache->find(re_str);
55
56 if (iter == wcache->end()) {
57 cache_entry c;
58
59 c.re2 = make_shared<pcrepp>(re_str);
60 auto pair = wcache->insert(std::make_pair(re_str, c));
61
62 iter = pair.first;
63 }
64
65 return &iter->second;
66 }
67
regexp(const char * re,const char * str)68 static bool regexp(const char *re, const char *str)
69 {
70 cache_entry *reobj = find_re(re);
71 pcre_context_static<30> pc;
72 pcre_input pi(str);
73
74 return reobj->re2->match(pc, pi);
75 }
76
77 static
78 util::variant<int64_t, double, const char*, string_fragment, json_string>
regexp_match(const char * re,const char * str)79 regexp_match(const char *re, const char *str)
80 {
81 cache_entry *reobj = find_re(re);
82 pcre_context_static<30> pc;
83 pcre_input pi(str);
84 pcrepp &extractor = *reobj->re2;
85
86 if (extractor.get_capture_count() == 0) {
87 throw pcrepp::error("regular expression does not have any captures");
88 }
89
90 if (!extractor.match(pc, pi)) {
91 return static_cast<const char *>(nullptr);
92 }
93
94 yajlpp_gen gen;
95 yajl_gen_config(gen, yajl_gen_beautify, false);
96
97 if (extractor.get_capture_count() == 1) {
98 pcre_context::capture_t *cap = pc[0];
99 const char *cap_start = pi.get_substr_start(cap);
100
101 if (!cap->is_valid()) {
102 return static_cast<const char *>(nullptr);
103 }
104 else {
105 char *cap_copy = (char *)alloca(cap->length() + 1);
106 long long int i_value;
107 double d_value;
108 int end_index;
109
110 memcpy(cap_copy, cap_start, cap->length());
111 cap_copy[cap->length()] = '\0';
112
113 if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1 &&
114 (end_index == cap->length())) {
115 return (int64_t)i_value;
116 }
117 else if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1 &&
118 (end_index == cap->length())) {
119 return d_value;
120 }
121 else {
122 return string_fragment(str, cap->c_begin, cap->c_end);
123 }
124 }
125 }
126 else {
127 yajlpp_map root_map(gen);
128 column_namer cn;
129
130 for (int lpc = 0; lpc < extractor.get_capture_count(); lpc++) {
131 string colname = cn.add_column(extractor.name_for_capture(lpc));
132 pcre_context::capture_t *cap = pc[lpc];
133
134 yajl_gen_string(gen, colname);
135
136 if (!cap->is_valid()) {
137 yajl_gen_null(gen);
138 }
139 else {
140 const char *cap_start = pi.get_substr_start(cap);
141 char *cap_copy = (char *) alloca(cap->length() + 1);
142 long long int i_value;
143 double d_value;
144 int end_index;
145
146 memcpy(cap_copy, cap_start, cap->length());
147 cap_copy[cap->length()] = '\0';
148
149 if (sscanf(cap_copy, "%lld%n", &i_value, &end_index) == 1 &&
150 (end_index == cap->length())) {
151 yajl_gen_integer(gen, i_value);
152 }
153 else if (sscanf(cap_copy, "%lf%n", &d_value, &end_index) == 1 &&
154 (end_index == cap->length())) {
155 yajl_gen_number(gen, cap_start, cap->length());
156 }
157 else {
158 yajl_gen_pstring(gen, cap_start, cap->length());
159 }
160 }
161 }
162 }
163
164 return json_string(gen);
165 #if 0
166 sqlite3_result_text(ctx, (const char *) buf, len, SQLITE_TRANSIENT);
167 #ifdef HAVE_SQLITE3_VALUE_SUBTYPE
168 sqlite3_result_subtype(ctx, JSON_SUBTYPE);
169 #endif
170 #endif
171 }
172
extract(const char * str)173 json_string extract(const char *str)
174 {
175 data_scanner ds(str);
176 data_parser dp(&ds);
177
178 dp.parse();
179 // dp.print(stderr, dp.dp_pairs);
180
181 yajlpp_gen gen;
182 yajl_gen_config(gen, yajl_gen_beautify, false);
183
184 elements_to_json(gen, dp, &dp.dp_pairs);
185
186 return json_string(gen);
187 }
188
logfmt2json(string_fragment line)189 json_string logfmt2json(string_fragment line)
190 {
191 logfmt::parser p(line);
192 yajlpp_gen gen;
193 yajl_gen_config(gen, yajl_gen_beautify, false);
194
195 {
196 yajlpp_map root(gen);
197 bool done = false;
198
199 while (!done) {
200 auto pair = p.step();
201
202 done = pair.match(
203 [](const logfmt::parser::end_of_input& eoi) {
204 return true;
205 },
206 [&root, &gen](const logfmt::parser::kvpair& kvp) {
207 root.gen(kvp.first);
208
209 kvp.second.match(
210 [&root](const logfmt::parser::bool_value& bv) {
211 root.gen(bv.bv_value);
212 },
213 [&root](const logfmt::parser::int_value& iv) {
214 root.gen(iv.iv_value);
215 },
216 [&root](const logfmt::parser::float_value& fv) {
217 root.gen(fv.fv_value);
218 },
219 [&root, &gen](const logfmt::parser::quoted_value& qv) {
220 auto_mem<yajl_handle_t> parse_handle(yajl_free);
221 json_ptr jp("");
222 json_op jo(jp);
223
224 jo.jo_ptr_callbacks = json_op::gen_callbacks;
225 jo.jo_ptr_data = gen;
226 parse_handle.reset(yajl_alloc(&json_op::ptr_callbacks, nullptr, &jo));
227
228 auto json_in = (const unsigned char *) qv.qv_value.data();
229 auto json_len = qv.qv_value.length();
230
231 if (yajl_parse(parse_handle.in(), json_in, json_len) != yajl_status_ok ||
232 yajl_complete_parse(parse_handle.in()) != yajl_status_ok) {
233 root.gen(qv.qv_value);
234 }
235 },
236 [&root](const logfmt::parser::unquoted_value& uv) {
237 root.gen(uv.uv_value);
238 }
239 );
240
241 return false;
242 },
243 [](const logfmt::parser::error& e) -> bool {
244 throw sqlite_func_error("Invalid logfmt: {}", e.e_msg);
245 }
246 );
247 }
248 }
249
250 return json_string(gen);
251 }
252
253 static
regexp_replace(const char * str,const char * re,const char * repl)254 string regexp_replace(const char *str, const char *re, const char *repl)
255 {
256 cache_entry *reobj = find_re(re);
257
258 return reobj->re2->replace(str, repl);
259 }
260
261 static
spooky_hash(const vector<const char * > & args)262 string spooky_hash(const vector<const char *> &args)
263 {
264 byte_array<2, uint64> hash;
265 SpookyHash context;
266
267 context.Init(0, 0);
268 for (const auto arg : args) {
269 int64_t len = arg != nullptr ? strlen(arg) : 0;
270
271 context.Update(&len, sizeof(len));
272 if (arg == nullptr) {
273 continue;
274 }
275 context.Update(arg, len);
276 }
277 context.Final(hash.out(0), hash.out(1));
278
279 return hash.to_string();
280 }
281
sql_spooky_hash_step(sqlite3_context * context,int argc,sqlite3_value ** argv)282 static void sql_spooky_hash_step(sqlite3_context *context,
283 int argc,
284 sqlite3_value **argv)
285 {
286 auto *hasher = (SpookyHash *)sqlite3_aggregate_context(context,
287 sizeof(SpookyHash));
288
289 for (int lpc = 0; lpc < argc; lpc++) {
290 auto value = sqlite3_value_text(argv[lpc]);
291 int64_t len = value != nullptr ? strlen((const char *) value) : 0;
292
293 hasher->Update(&len, sizeof(len));
294 if (value == nullptr) {
295 continue;
296 }
297 hasher->Update(value, len);
298 }
299 }
300
sql_spooky_hash_final(sqlite3_context * context)301 static void sql_spooky_hash_final(sqlite3_context *context)
302 {
303 auto *hasher = (SpookyHash *)sqlite3_aggregate_context(
304 context, sizeof(SpookyHash));
305
306 if (hasher == nullptr) {
307 sqlite3_result_null(context);
308 } else {
309 byte_array<2, uint64> hash;
310
311 hasher->Final(hash.out(0), hash.out(1));
312
313 string hex = hash.to_string();
314 sqlite3_result_text(context, hex.c_str(), hex.length(),
315 SQLITE_TRANSIENT);
316 }
317 }
318
319 struct sparkline_context {
320 bool sc_initialized{true};
321 double sc_max_value{0.0};
322 std::vector<double> sc_values;
323 };
324
sparkline_step(sqlite3_context * context,int argc,sqlite3_value ** argv)325 static void sparkline_step(sqlite3_context *context,
326 int argc,
327 sqlite3_value **argv)
328 {
329 auto *sc = (sparkline_context *)
330 sqlite3_aggregate_context(context, sizeof(sparkline_context));
331
332 if (!sc->sc_initialized) {
333 new (sc) sparkline_context;
334 }
335
336 if (argc == 0) {
337 return;
338 }
339
340 sc->sc_values.push_back(sqlite3_value_double(argv[0]));
341 sc->sc_max_value = std::max(sc->sc_max_value, sc->sc_values.back());
342
343 if (argc >= 2) {
344 sc->sc_max_value = std::max(sc->sc_max_value,
345 sqlite3_value_double(argv[1]));
346 }
347 }
348
sparkline_final(sqlite3_context * context)349 static void sparkline_final(sqlite3_context *context)
350 {
351 auto *sc = (sparkline_context *)
352 sqlite3_aggregate_context(context, sizeof(sparkline_context));
353
354 if (!sc->sc_initialized) {
355 sqlite3_result_text(context, "", 0, SQLITE_STATIC);
356 return;
357 }
358
359 auto retval = (char *) malloc(sc->sc_values.size() * 3 + 1);
360 auto start = retval;
361
362 for (const auto& value : sc->sc_values) {
363 auto bar = humanize::sparkline(value, sc->sc_max_value);
364
365 strcpy(start, bar.c_str());
366 start += bar.length();
367 }
368 *start = '\0';
369
370 sqlite3_result_text(context, retval, -1, free);
371
372 sc->~sparkline_context();
373 }
374
375 nonstd::optional<util::variant<blob_auto_buffer, sqlite3_int64, double>>
sql_gunzip(sqlite3_value * val)376 sql_gunzip(sqlite3_value *val)
377 {
378 switch (sqlite3_value_type(val)) {
379 case SQLITE3_TEXT:
380 case SQLITE_BLOB: {
381 auto buffer = sqlite3_value_blob(val);
382 auto len = sqlite3_value_bytes(val);
383
384 if (!lnav::gzip::is_gzipped((const char *) buffer, len)) {
385 auto retval = auto_buffer::alloc(len);
386
387 memcpy(retval.in(), buffer, len);
388 return blob_auto_buffer{ std::move(retval) };
389 }
390
391 auto res = lnav::gzip::uncompress("", buffer, len);
392
393 if (res.isErr()) {
394 throw sqlite_func_error("unable to uncompress -- {}",
395 res.unwrapErr());
396 }
397
398 return blob_auto_buffer{ res.unwrap() };
399 }
400 case SQLITE_INTEGER:
401 return sqlite3_value_int64(val);
402 case SQLITE_FLOAT:
403 return sqlite3_value_double(val);
404 }
405
406 return nonstd::nullopt;
407 }
408
409 nonstd::optional<blob_auto_buffer>
sql_gzip(sqlite3_value * val)410 sql_gzip(sqlite3_value *val)
411 {
412 switch (sqlite3_value_type(val)) {
413 case SQLITE3_TEXT:
414 case SQLITE_BLOB: {
415 auto buffer = sqlite3_value_blob(val);
416 auto len = sqlite3_value_bytes(val);
417 auto res = lnav::gzip::compress(buffer, len);
418
419 if (res.isErr()) {
420 throw sqlite_func_error("unable to compress -- {}",
421 res.unwrapErr());
422 }
423
424 return blob_auto_buffer{ res.unwrap() };
425 }
426 case SQLITE_INTEGER:
427 case SQLITE_FLOAT: {
428 auto buffer = sqlite3_value_text(val);
429 log_debug("buf %s", buffer);
430 auto res = lnav::gzip::compress(buffer, strlen((const char *) buffer));
431
432 if (res.isErr()) {
433 throw sqlite_func_error("unable to compress -- {}",
434 res.unwrapErr());
435 }
436
437 return blob_auto_buffer{ res.unwrap() };
438 }
439 }
440
441 return nonstd::nullopt;
442 }
443
string_extension_functions(struct FuncDef ** basic_funcs,struct FuncDefAgg ** agg_funcs)444 int string_extension_functions(struct FuncDef **basic_funcs,
445 struct FuncDefAgg **agg_funcs)
446 {
447 static struct FuncDef string_funcs[] = {
448 sqlite_func_adapter<decltype(®exp), regexp>::builder(
449 help_text("regexp",
450 "Test if a string matches a regular expression")
451 .sql_function()
452 .with_parameter({"re", "The regular expression to use"})
453 .with_parameter({"str", "The string to test against the regular expression"})
454 ),
455
456 sqlite_func_adapter<decltype(®exp_match), regexp_match>::builder(
457 help_text("regexp_match",
458 "Match a string against a regular expression and return the capture groups as JSON.")
459 .sql_function()
460 .with_parameter({"re", "The regular expression to use"})
461 .with_parameter({"str", "The string to test against the regular expression"})
462 .with_tags({"string", "regex"})
463 .with_example({
464 "To capture the digits from the string '123'",
465 "SELECT regexp_match('(\\d+)', '123')"
466 })
467 .with_example({
468 "To capture a number and word into a JSON object with the properties 'col_0' and 'col_1'",
469 "SELECT regexp_match('(\\d+) (\\w+)', '123 four')"
470 })
471 .with_example({
472 "To capture a number and word into a JSON object with the named properties 'num' and 'str'",
473 "SELECT regexp_match('(?<num>\\d+) (?<str>\\w+)', '123 four')"
474 })
475 ),
476
477 sqlite_func_adapter<decltype(®exp_replace), regexp_replace>::builder(
478 help_text("regexp_replace",
479 "Replace the parts of a string that match a regular expression.")
480 .sql_function()
481 .with_parameter({"str", "The string to perform replacements on"})
482 .with_parameter({"re", "The regular expression to match"})
483 .with_parameter({"repl", "The replacement string. "
484 "You can reference capture groups with a backslash followed by the number of the "
485 "group, starting with 1."})
486 .with_tags({"string", "regex"})
487 .with_example({
488 "To replace the word at the start of the string 'Hello, World!' with 'Goodbye'",
489 "SELECT regexp_replace('Hello, World!', '^(\\w+)', 'Goodbye')"
490 })
491 .with_example({
492 "To wrap alphanumeric words with angle brackets",
493 "SELECT regexp_replace('123 abc', '(\\w+)', '<\\1>')"
494 })
495 ),
496
497 sqlite_func_adapter<decltype(&humanize::file_size), humanize::file_size>::builder(
498 help_text("humanize_file_size",
499 "Format the given file size as a human-friendly string")
500 .sql_function()
501 .with_parameter({"value", "The file size to format"})
502 .with_tags({"string"})
503 .with_example({
504 "To format an amount",
505 "SELECT humanize_file_size(10 * 1024 * 1024)"
506 })
507 ),
508
509 sqlite_func_adapter<decltype(&humanize::sparkline), humanize::sparkline>::builder(
510 help_text("sparkline",
511 "Function used to generate a sparkline bar chart. "
512 "The non-aggregate version converts a single numeric "
513 "value on a range to a bar chart character. The "
514 "aggregate version returns a string with a bar "
515 "character for every numeric input")
516 .sql_function()
517 .with_parameter({"value", "The numeric value to convert"})
518 .with_parameter(help_text(
519 "upper",
520 "The upper bound of the numeric range. The non-aggregate "
521 "version defaults to 100. The aggregate version uses the "
522 "largest value in the inputs.")
523 .optional())
524 .with_tags({"string"})
525 .with_example({
526 "To get the unicode block element for the value 32 in the "
527 "range of 0-128",
528 "SELECT sparkline(32, 128)"
529 })
530 .with_example({
531 "To chart the values in a JSON array",
532 "SELECT sparkline(value) FROM json_each('[0, 1, 2, 3, 4, 5, 6, 7, 8]')"
533 })
534 ),
535
536 sqlite_func_adapter<decltype(&extract), extract>::builder(
537 help_text("extract",
538 "Automatically Parse and extract data from a string")
539 .sql_function()
540 .with_parameter({"str", "The string to parse"})
541 .with_tags({"string"})
542 .with_example({
543 "To extract key/value pairs from a string",
544 "SELECT extract('foo=1 bar=2 name=\"Rolo Tomassi\"')"
545 })
546 .with_example({
547 "To extract columnar data from a string",
548 "SELECT extract('1.0 abc 2.0')"
549 })
550 ),
551
552 sqlite_func_adapter<decltype(&logfmt2json), logfmt2json>::builder(
553 help_text("logfmt2json",
554 "Convert a logfmt-encoded string into JSON")
555 .sql_function()
556 .with_parameter({"str", "The logfmt message to parse"})
557 .with_tags({"string"})
558 .with_example({
559 "To extract key/value pairs from a log message",
560 "SELECT logfmt2json('foo=1 bar=2 name=\"Rolo Tomassi\"')"
561 })
562 ),
563
564 sqlite_func_adapter<decltype(
565 static_cast<bool (*)(const char *, const char *)>(&startswith)),
566 startswith>::builder(
567 help_text("startswith",
568 "Test if a string begins with the given prefix")
569 .sql_function()
570 .with_parameter({"str", "The string to test"})
571 .with_parameter({"prefix", "The prefix to check in the string"})
572 .with_tags({"string"})
573 .with_example({
574 "To test if the string 'foobar' starts with 'foo'",
575 "SELECT startswith('foobar', 'foo')"
576 })
577 .with_example({
578 "To test if the string 'foobar' starts with 'bar'",
579 "SELECT startswith('foobar', 'bar')"
580 })
581 ),
582
583 sqlite_func_adapter<decltype(
584 static_cast<bool (*)(const char *, const char *)>(&endswith)),
585 endswith>::builder(
586 help_text("endswith",
587 "Test if a string ends with the given suffix")
588 .sql_function()
589 .with_parameter({"str", "The string to test"})
590 .with_parameter({"suffix", "The suffix to check in the string"})
591 .with_tags({"string"})
592 .with_example({
593 "To test if the string 'notbad.jpg' ends with '.jpg'",
594 "SELECT endswith('notbad.jpg', '.jpg')"
595 })
596 .with_example({
597 "To test if the string 'notbad.png' starts with '.jpg'",
598 "SELECT endswith('notbad.png', '.jpg')"
599 })
600 ),
601
602 sqlite_func_adapter<decltype(&spooky_hash), spooky_hash>::builder(
603 help_text("spooky_hash",
604 "Compute the hash value for the given arguments.")
605 .sql_function()
606 .with_parameter(help_text("str", "The string to hash")
607 .one_or_more())
608 .with_tags({"string"})
609 .with_example({
610 "To produce a hash for the string 'Hello, World!'",
611 "SELECT spooky_hash('Hello, World!')"
612 })
613 .with_example({
614 "To produce a hash for the parameters where one is NULL",
615 "SELECT spooky_hash('Hello, World!', NULL)"
616 })
617 .with_example({
618 "To produce a hash for the parameters where one is an empty string",
619 "SELECT spooky_hash('Hello, World!', '')"
620 })
621 .with_example({
622 "To produce a hash for the parameters where one is a number",
623 "SELECT spooky_hash('Hello, World!', 123)"
624 })
625 ),
626
627 sqlite_func_adapter<decltype(&sql_gunzip), sql_gunzip>::builder(
628 help_text("gunzip", "Decompress a gzip file")
629 .sql_function()
630 .with_parameter(help_text("b", "The blob to decompress")
631 .one_or_more())
632 .with_tags({"string"})
633 ),
634
635 sqlite_func_adapter<decltype(&sql_gzip), sql_gzip>::builder(
636 help_text("gzip", "Compress a string into a gzip file")
637 .sql_function()
638 .with_parameter(help_text("value", "The value to compress")
639 .one_or_more())
640 .with_tags({"string"})
641 ),
642
643 {nullptr}
644 };
645
646 static struct FuncDefAgg str_agg_funcs[] = {
647 {"group_spooky_hash", -1, 0,
648 sql_spooky_hash_step, sql_spooky_hash_final,
649 help_text("group_spooky_hash",
650 "Compute the hash value for the given arguments")
651 .sql_agg_function()
652 .with_parameter(help_text("str", "The string to hash")
653 .one_or_more())
654 .with_tags({"string"})
655 .with_example({
656 "To produce a hash of all of the values of 'column1'",
657 "SELECT group_spooky_hash(column1) FROM (VALUES ('abc'), ('123'))"
658 })
659 },
660
661 {
662 "sparkline", -1, 0,
663 sparkline_step, sparkline_final,
664 },
665
666 {nullptr}
667 };
668
669 *basic_funcs = string_funcs;
670 *agg_funcs = str_agg_funcs;
671
672 return SQLITE_OK;
673 }
674