1 /*-
2 * Copyright 2019 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lua_common.h"
18 #include "libcryptobox/cryptobox.h"
19 #include "contrib/fastutf8/fastutf8.h"
20 #include "unix-std.h"
21
22 /***
23 * @module rspamd_text
24 * This module provides access to opaque text structures used widely to prevent
25 * copying between Lua and C for various concerns: performance, security etc...
26 *
27 * You can convert rspamd_text into string but it will copy data.
28 */
29
30 /***
31 * @function rspamd_text.fromstring(str)
32 * Creates rspamd_text from Lua string (copied to the text)
33 * @param {string} str string to use
34 * @return {rspamd_text} resulting text
35 */
36 LUA_FUNCTION_DEF (text, fromstring);
37
38 /***
39 * @function rspamd_text.null()
40 * Creates rspamd_text with NULL pointer for testing purposes
41 * @param {string} str string to use
42 * @return {rspamd_text} resulting text
43 */
44 LUA_FUNCTION_DEF (text, null);
45 /***
46 * @function rspamd_text.randombytes(nbytes)
47 * Creates rspamd_text with random bytes inside (raw bytes)
48 * @param {number} nbytes number of random bytes generated
49 * @return {rspamd_text} random bytes text
50 */
51 LUA_FUNCTION_DEF (text, randombytes);
52
53 /***
54 * @function rspamd_text.fromtable(tbl[, delim])
55 * Same as `table.concat` but generates rspamd_text instead of the Lua string
56 * @param {table} tbl table to use
57 * @param {string} delim optional delimiter
58 * @return {rspamd_text} resulting text
59 */
60 LUA_FUNCTION_DEF (text, fromtable);
61 /***
62 * @method rspamd_text:byte(pos[, pos2])
63 * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
64 * @param {integer} pos index
65 * @param {integer} pos2 index
66 * @return {integer} byte at the position `pos` or varargs of bytes
67 */
68 LUA_FUNCTION_DEF (text, byte);
69 /***
70 * @method rspamd_text:len()
71 * Returns length of a string
72 * @return {number} length of string in **bytes**
73 */
74 LUA_FUNCTION_DEF (text, len);
75 /***
76 * @method rspamd_text:str()
77 * Converts text to string by copying its content
78 * @return {string} copy of text as Lua string
79 */
80 LUA_FUNCTION_DEF (text, str);
81 /***
82 * @method rspamd_text:ptr()
83 * Converts text to lightuserdata
84 * @return {lightuserdata} pointer value of rspamd_text
85 */
86 LUA_FUNCTION_DEF (text, ptr);
87 /***
88 * @method rspamd_text:save_in_file(fname[, mode])
89 * Saves text in file
90 * @return {boolean} true if save has been completed
91 */
92 LUA_FUNCTION_DEF (text, save_in_file);
93 /***
94 * @method rspamd_text:span(start[, len])
95 * Returns a span for lua_text starting at pos [start] (1 indexed) and with
96 * length `len` (or to the end of the text)
97 * @param {integer} start start index
98 * @param {integer} len length of span
99 * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
100 */
101 LUA_FUNCTION_DEF (text, span);
102 /***
103 * @method rspamd_text:sub(start[, len])
104 * Returns a substrin for lua_text similar to string.sub from Lua
105 * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
106 */
107 LUA_FUNCTION_DEF (text, sub);
108 /***
109 * @method rspamd_text:lines([stringify])
110 * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
111 * @param {boolean} stringify stringify lines
112 * @return {iterator} iterator triplet
113 */
114 LUA_FUNCTION_DEF (text, lines);
115 /***
116 * @method rspamd_text:split(regexp, [stringify])
117 * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
118 * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
119 * @param {boolean} stringify stringify lines
120 * @return {iterator} iterator triplet
121 */
122 LUA_FUNCTION_DEF (text, split);
123 /***
124 * @method rspamd_text:at(pos)
125 * Returns a byte at the position `pos`
126 * @param {integer} pos index
127 * @return {integer} byte at the position `pos` or nil if pos out of bound
128 */
129 LUA_FUNCTION_DEF (text, at);
130 /***
131 * @method rspamd_text:memchr(chr, [reverse])
132 * Returns the first or the last position of the character `chr` in the text or
133 * -1 in case if a character has not been found. Indexes start from `1`
134 * @param {string/number} chr character or a character code to find
135 * @param {boolean} reverse last character if `true`
136 * @return {integer} position of the character or `-1`
137 */
138 LUA_FUNCTION_DEF (text, memchr);
139 /***
140 * @method rspamd_text:bytes()
141 * Converts text to an array of bytes
142 * @return {table|integer} bytes in the array (as unsigned char)
143 */
144 LUA_FUNCTION_DEF (text, bytes);
145 /***
146 * @method rspamd_text:lower([is_utf, [inplace]])
147 * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
148 * @param {boolean} is_utf apply utf8 lowercase
149 * @param {boolean} inplace lowercase the original text
150 * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
151 */
152 LUA_FUNCTION_DEF (text, lower);
153 LUA_FUNCTION_DEF (text, take_ownership);
154 /***
155 * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
156 * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
157 * where all chars from `set_to_exclude` are removed
158 * Patterns supported:
159 *
160 * - %s - all space characters
161 * - %n - all newline characters
162 * - %c - all control characters (it includes 8bit characters and spaces)
163 * - %8 - all 8 bit characters
164 * - %% - just a percent character
165 *
166 * @param {string} set_to_exclude characters to exclude
167 * @param {boolean} always_copy always copy the source text
168 * @return {rspamd_text} modified or copied text
169 */
170 LUA_FUNCTION_DEF (text, exclude_chars);
171 /***
172 * @method rspamd_text:oneline([always_copy])
173 * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
174 * where the following transformations are made:
175 * - All spaces sequences are replaced with a single space
176 * - All newlines sequences are replaced with a single space
177 * - Trailing and leading spaces are removed
178 * - Control characters are excluded
179 * - UTF8 sequences are normalised
180 *
181 * @param {boolean} always_copy always copy the source text
182 * @return {rspamd_text} modified or copied text
183 */
184 LUA_FUNCTION_DEF (text, oneline);
185 /***
186 * @method rspamd_text:base32([b32type])
187 * Returns a text encoded in base32 (new rspamd_text is allocated)
188 *
189 * @param {string} b32type base32 type (default, bleach, rfc)
190 * @return {rspamd_text} new text encoded in base32
191 */
192 LUA_FUNCTION_DEF (text, base32);
193 /***
194 * @method rspamd_text:base64([line_length, [nline, [fold]]])
195 * Returns a text encoded in base64 (new rspamd_text is allocated)
196 *
197 * @param {number} line_length return text splited with newlines up to this attribute
198 * @param {string} nline newline type: `cr`, `lf`, `crlf`
199 * @param {boolean} fold use folding when splitting into lines (false by default)
200 * @return {rspamd_text} new text encoded in base64
201 */
202 LUA_FUNCTION_DEF (text, base64);
203 /***
204 * @method rspamd_text:hex()
205 * Returns a text encoded in hex (new rspamd_text is allocated)
206 *
207 * @return {rspamd_text} new text encoded in hex
208 */
209 LUA_FUNCTION_DEF (text, hex);
210 /***
211 * @method rspamd_text:find(pattern [, init])
212 * Looks for the first match of pattern in the string s.
213 * If it finds a match, then find returns the indices of s where this occurrence
214 * starts and ends; otherwise, it returns nil. A third,
215 * optional numerical argument init specifies where to start the search;
216 * its default value is 1 and can be negative.
217 * This method currently supports merely a plain search, no patterns.
218 *
219 * @param {string} pattern pattern to find
220 * @param {number} init specifies where to start the search (1 default)
221 * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
222 */
223 LUA_FUNCTION_DEF (text, find);
224 LUA_FUNCTION_DEF (text, gc);
225 LUA_FUNCTION_DEF (text, eq);
226 LUA_FUNCTION_DEF (text, lt);
227 LUA_FUNCTION_DEF (text, concat);
228 LUA_FUNCTION_DEF (text, strtoul);
229
230 static const struct luaL_reg textlib_f[] = {
231 LUA_INTERFACE_DEF (text, fromstring),
232 {"from_string", lua_text_fromstring},
233 LUA_INTERFACE_DEF (text, fromtable),
234 {"from_table", lua_text_fromtable},
235 LUA_INTERFACE_DEF (text, null),
236 LUA_INTERFACE_DEF (text, randombytes),
237 {NULL, NULL}
238 };
239
240 static const struct luaL_reg textlib_m[] = {
241 LUA_INTERFACE_DEF (text, len),
242 LUA_INTERFACE_DEF (text, str),
243 LUA_INTERFACE_DEF (text, ptr),
244 LUA_INTERFACE_DEF (text, take_ownership),
245 LUA_INTERFACE_DEF (text, save_in_file),
246 LUA_INTERFACE_DEF (text, span),
247 LUA_INTERFACE_DEF (text, sub),
248 LUA_INTERFACE_DEF (text, lines),
249 LUA_INTERFACE_DEF (text, split),
250 LUA_INTERFACE_DEF (text, at),
251 LUA_INTERFACE_DEF (text, memchr),
252 LUA_INTERFACE_DEF (text, byte),
253 LUA_INTERFACE_DEF (text, bytes),
254 LUA_INTERFACE_DEF (text, lower),
255 LUA_INTERFACE_DEF (text, exclude_chars),
256 LUA_INTERFACE_DEF (text, oneline),
257 LUA_INTERFACE_DEF (text, base32),
258 LUA_INTERFACE_DEF (text, base64),
259 LUA_INTERFACE_DEF (text, hex),
260 LUA_INTERFACE_DEF (text, find),
261 LUA_INTERFACE_DEF (text, strtoul),
262 {"write", lua_text_save_in_file},
263 {"__len", lua_text_len},
264 {"__tostring", lua_text_str},
265 {"__gc", lua_text_gc},
266 {"__eq", lua_text_eq},
267 {"__lt", lua_text_lt},
268 {"__concat", lua_text_concat},
269 {NULL, NULL}
270 };
271
272 struct rspamd_lua_text *
lua_check_text(lua_State * L,gint pos)273 lua_check_text (lua_State * L, gint pos)
274 {
275 void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
276 luaL_argcheck (L, ud != NULL, pos, "'text' expected");
277 return ud ? (struct rspamd_lua_text *)ud : NULL;
278 }
279
280 struct rspamd_lua_text *
lua_check_text_or_string(lua_State * L,gint pos)281 lua_check_text_or_string (lua_State * L, gint pos)
282 {
283 gint pos_type = lua_type (L, pos);
284
285 if (pos_type == LUA_TUSERDATA) {
286 void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
287 luaL_argcheck (L, ud != NULL, pos, "'text' expected");
288 return ud ? (struct rspamd_lua_text *) ud : NULL;
289 }
290 else if (pos_type == LUA_TSTRING) {
291 /*
292 * Fake static lua_text, we allow to use this function multiple times
293 * by having a small array of static structures.
294 */
295 static int cur_txt_idx = 0;
296 static struct rspamd_lua_text fake_text[4];
297 gsize len;
298 int sel_idx;
299
300 sel_idx = cur_txt_idx++ % G_N_ELEMENTS (fake_text);
301 fake_text[sel_idx].start = lua_tolstring (L, pos, &len);
302
303 if (len >= G_MAXUINT) {
304 return NULL;
305 }
306
307 fake_text[sel_idx].len = len;
308 fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
309
310 return &fake_text[sel_idx];
311 }
312
313 return NULL;
314 }
315
316 struct rspamd_lua_text *
lua_new_text(lua_State * L,const gchar * start,gsize len,gboolean own)317 lua_new_text (lua_State *L, const gchar *start, gsize len, gboolean own)
318 {
319 struct rspamd_lua_text *t;
320
321 t = lua_newuserdata (L, sizeof (*t));
322 t->flags = 0;
323
324 if (own) {
325 gchar *storage;
326
327 if (len > 0) {
328 storage = g_malloc (len);
329
330 if (start != NULL) {
331 memcpy (storage, start, len);
332 }
333
334 t->start = storage;
335 t->flags = RSPAMD_TEXT_FLAG_OWN;
336 }
337 else {
338 t->start = "";
339 }
340 }
341 else {
342 t->start = start;
343 }
344
345 t->len = len;
346 rspamd_lua_setclass (L, "rspamd{text}", -1);
347
348 return t;
349 }
350
351
352 static gint
lua_text_fromstring(lua_State * L)353 lua_text_fromstring (lua_State *L)
354 {
355 LUA_TRACE_POINT;
356 const gchar *str;
357 gsize l = 0;
358 gboolean transparent = FALSE;
359
360 str = luaL_checklstring (L, 1, &l);
361
362 if (str) {
363 if (lua_isboolean (L, 2)) {
364 transparent = lua_toboolean (L, 2);
365 }
366
367 lua_new_text (L, str, l, !transparent);
368 }
369 else {
370 return luaL_error (L, "invalid arguments");
371 }
372
373
374 return 1;
375 }
376
377 static gint
lua_text_null(lua_State * L)378 lua_text_null (lua_State *L)
379 {
380 LUA_TRACE_POINT;
381
382 lua_new_text (L, NULL, 0, false);
383
384 return 1;
385 }
386
387 static gint
lua_text_randombytes(lua_State * L)388 lua_text_randombytes (lua_State *L)
389 {
390 LUA_TRACE_POINT;
391 guint nbytes = luaL_checkinteger (L, 1);
392 struct rspamd_lua_text *out;
393
394 out = lua_new_text (L, NULL, nbytes, TRUE);
395 randombytes_buf ((char *)out->start, nbytes);
396 out->len = nbytes;
397
398 return 1;
399 }
400
401 #define MAX_REC 10
402
403 static void
lua_text_tbl_length(lua_State * L,gsize dlen,gsize * dest,guint rec)404 lua_text_tbl_length (lua_State *L, gsize dlen, gsize *dest, guint rec)
405 {
406 gsize tblen, stlen;
407 struct rspamd_lua_text *elt;
408
409 if (rec > MAX_REC) {
410 luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
411
412 return;
413 }
414
415 tblen = rspamd_lua_table_size (L, -1);
416
417 for (gsize i = 0; i < tblen; i ++) {
418 lua_rawgeti (L, -1, i + 1);
419
420 if (lua_type (L, -1) == LUA_TSTRING) {
421 #if LUA_VERSION_NUM >= 502
422 stlen = lua_rawlen (L, -1);
423 #else
424 stlen = lua_objlen (L, -1);
425 #endif
426 (*dest) += stlen;
427 }
428 else if (lua_type (L, -1) == LUA_TUSERDATA){
429 elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
430
431 if (elt) {
432 (*dest) += elt->len;
433 }
434 }
435 else if (lua_type (L, -1) == LUA_TTABLE) {
436 lua_text_tbl_length (L, dlen, dest, rec + 1);
437 }
438
439 if (i != tblen - 1) {
440 (*dest) += dlen;
441 }
442
443 lua_pop (L, 1);
444 }
445 }
446
447 static void
lua_text_tbl_append(lua_State * L,const gchar * delim,gsize dlen,gchar ** dest,guint rec)448 lua_text_tbl_append (lua_State *L,
449 const gchar *delim,
450 gsize dlen,
451 gchar **dest,
452 guint rec)
453 {
454 const gchar *st;
455 gsize tblen, stlen;
456 struct rspamd_lua_text *elt;
457
458 if (rec > MAX_REC) {
459 luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
460
461 return;
462 }
463
464 tblen = rspamd_lua_table_size (L, -1);
465
466 for (guint i = 0; i < tblen; i ++) {
467 lua_rawgeti (L, -1, i + 1);
468
469 if (lua_type (L, -1) == LUA_TSTRING) {
470 st = lua_tolstring (L, -1, &stlen);
471 memcpy ((*dest), st, stlen);
472 (*dest) += stlen;
473 }
474 else if (lua_type (L, -1) == LUA_TUSERDATA){
475 elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
476
477 if (elt) {
478 memcpy ((*dest), elt->start, elt->len);
479 (*dest) += elt->len;
480 }
481 }
482 else if (lua_type (L, -1) == LUA_TTABLE) {
483 lua_text_tbl_append (L, delim, dlen, dest, rec + 1);
484 }
485
486 if (dlen && i != tblen - 1) {
487 memcpy ((*dest), delim, dlen);
488 (*dest) += dlen;
489 }
490
491 lua_pop (L, 1);
492 }
493 }
494
495 static gint
lua_text_fromtable(lua_State * L)496 lua_text_fromtable (lua_State *L)
497 {
498 LUA_TRACE_POINT;
499 const gchar *delim = "";
500 struct rspamd_lua_text *t;
501 gsize textlen = 0, dlen, oldtop = lua_gettop (L);
502 gchar *dest;
503
504 if (!lua_istable (L, 1)) {
505 return luaL_error (L, "invalid arguments");
506 }
507
508 if (lua_type (L, 2) == LUA_TSTRING) {
509 delim = lua_tolstring (L, 2, &dlen);
510 }
511 else {
512 dlen = 0;
513 }
514
515 /* Calculate length needed */
516 lua_pushvalue (L, 1);
517 lua_text_tbl_length (L, dlen, &textlen, 0);
518 lua_pop (L, 1);
519
520 /* Allocate new text */
521 t = lua_newuserdata (L, sizeof (*t));
522 dest = g_malloc (textlen);
523 t->start = dest;
524 t->len = textlen;
525 t->flags = RSPAMD_TEXT_FLAG_OWN;
526 rspamd_lua_setclass (L, "rspamd{text}", -1);
527
528 lua_pushvalue (L, 1);
529 lua_text_tbl_append (L, delim, dlen, &dest, 0);
530 lua_pop (L, 1); /* Table arg */
531
532 gint newtop = lua_gettop (L);
533 g_assert ( newtop== oldtop + 1);
534
535 return 1;
536 }
537
538 static gint
lua_text_len(lua_State * L)539 lua_text_len (lua_State *L)
540 {
541 LUA_TRACE_POINT;
542 struct rspamd_lua_text *t = lua_check_text (L, 1);
543 gsize l = 0;
544
545 if (t != NULL) {
546 l = t->len;
547 }
548 else {
549 return luaL_error (L, "invalid arguments");
550 }
551
552 lua_pushinteger (L, l);
553
554 return 1;
555 }
556
557 static gint
lua_text_str(lua_State * L)558 lua_text_str (lua_State *L)
559 {
560 LUA_TRACE_POINT;
561 struct rspamd_lua_text *t = lua_check_text (L, 1);
562
563 if (t != NULL) {
564 lua_pushlstring (L, t->start, t->len);
565 }
566 else {
567 return luaL_error (L, "invalid arguments");
568 }
569
570 return 1;
571 }
572
573 static gint
lua_text_ptr(lua_State * L)574 lua_text_ptr (lua_State *L)
575 {
576 LUA_TRACE_POINT;
577 struct rspamd_lua_text *t = lua_check_text (L, 1);
578
579 if (t != NULL) {
580 lua_pushlightuserdata (L, (gpointer)t->start);
581 }
582 else {
583 return luaL_error (L, "invalid arguments");
584 }
585
586 return 1;
587 }
588
589 static gint
lua_text_take_ownership(lua_State * L)590 lua_text_take_ownership (lua_State *L)
591 {
592 LUA_TRACE_POINT;
593 struct rspamd_lua_text *t = lua_check_text (L, 1);
594 gchar *dest;
595
596 if (t != NULL) {
597 if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
598 /* We already own it */
599 lua_pushboolean (L, true);
600 }
601 else {
602 dest = g_malloc (t->len);
603 memcpy (dest, t->start, t->len);
604 t->start = dest;
605 t->flags |= RSPAMD_TEXT_FLAG_OWN;
606 lua_pushboolean (L, true);
607 }
608 }
609 else {
610 return luaL_error (L, "invalid arguments");
611 }
612
613 return 1;
614 }
615
616 static gint
lua_text_span(lua_State * L)617 lua_text_span (lua_State *L)
618 {
619 LUA_TRACE_POINT;
620 struct rspamd_lua_text *t = lua_check_text (L, 1);
621 gint64 start = lua_tointeger (L, 2), len = -1;
622
623 if (t && start >= 1 && start <= t->len) {
624 if (lua_isnumber (L, 3)) {
625 len = lua_tonumber (L, 3);
626 }
627
628 if (len == -1) {
629 len = t->len - (start - 1);
630 }
631
632 if (len < 0 || (len > (t->len - (start - 1)))) {
633 return luaL_error (L, "invalid length");
634 }
635
636 lua_new_text (L, t->start + (start - 1), len, FALSE);
637 }
638 else {
639 if (!t) {
640 return luaL_error (L, "invalid arguments, text required");
641 }
642 else {
643 return luaL_error (L, "invalid arguments: start offset %d "
644 "is larger than text len %d", (int)start, (int)t->len);
645 }
646 }
647
648 return 1;
649 }
650
651 /* Helpers to behave exactly as Lua does */
652 static inline gsize
relative_pos_start(gint pos,gsize len)653 relative_pos_start (gint pos, gsize len)
654 {
655 if (pos > 0) {
656 return pos;
657 }
658 else if (pos == 0) {
659 return 1;
660 }
661 else if (pos < -((gint) len)) {
662 return 1;
663 }
664
665 /* Negative pos inside str */
666 return len + ((gsize)pos) + 1;
667 }
668
669 static inline gsize
relative_pos_end(gint pos,gsize len)670 relative_pos_end (gint pos, gsize len)
671 {
672 if (pos > (gint)len) {
673 return len;
674 }
675 else if (pos >= 0) {
676 return (size_t) pos;
677 }
678 else if (pos < -((gint)len)) {
679 return 0;
680 }
681
682 return len + ((gsize)pos) + 1;
683 }
684
685 static gint
lua_text_sub(lua_State * L)686 lua_text_sub (lua_State *L)
687 {
688 LUA_TRACE_POINT;
689 struct rspamd_lua_text *t = lua_check_text (L, 1);
690
691 if (t) {
692 size_t start = relative_pos_start (luaL_checkinteger (L, 2),
693 t->len);
694 size_t end = relative_pos_end (luaL_optinteger (L, 3, -1),
695 t->len);
696
697
698 if (start <= end) {
699 lua_new_text (L, t->start + (start - 1),
700 (end - start) + 1, FALSE);
701 }
702 else {
703 lua_new_text (L, "", 0, TRUE);
704 }
705 }
706 else {
707 return luaL_error (L, "invalid arguments");
708 }
709
710 return 1;
711 }
712
713 static gint64
rspamd_lua_text_push_line(lua_State * L,struct rspamd_lua_text * t,gint64 start_offset,const gchar * sep_pos,gboolean stringify)714 rspamd_lua_text_push_line (lua_State *L,
715 struct rspamd_lua_text *t,
716 gint64 start_offset,
717 const gchar *sep_pos,
718 gboolean stringify)
719 {
720 const gchar *start;
721 gsize len;
722 gint64 ret;
723
724 start = t->start + start_offset;
725 len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
726 ret = start_offset + len;
727
728 /* Trim line */
729 while (len > 0) {
730 if (start[len - 1] == '\r' || start[len - 1] == '\n') {
731 len --;
732 }
733 else {
734 break;
735 }
736 }
737
738 if (stringify) {
739 lua_pushlstring (L, start, len);
740 }
741 else {
742 struct rspamd_lua_text *ntext;
743
744 ntext = lua_newuserdata (L, sizeof (*ntext));
745 rspamd_lua_setclass (L, "rspamd{text}", -1);
746 ntext->start = start;
747 ntext->len = len;
748 ntext->flags = 0; /* Not own as it must be owned by a top object */
749 }
750
751 return ret;
752 }
753
754 static gint
rspamd_lua_text_readline(lua_State * L)755 rspamd_lua_text_readline (lua_State *L)
756 {
757 struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1));
758 gboolean stringify = lua_toboolean (L, lua_upvalueindex (2));
759 gint64 pos = lua_tointeger (L, lua_upvalueindex (3));
760
761 if (pos < 0) {
762 return luaL_error (L, "invalid pos: %d", (gint)pos);
763 }
764
765 if (pos >= t->len) {
766 /* We are done */
767 return 0;
768 }
769
770 const gchar *sep_pos;
771
772 /* We look just for `\n` ignoring `\r` as it is very rare nowadays */
773 sep_pos = memchr (t->start + pos, '\n', t->len - pos);
774
775 if (sep_pos == NULL) {
776 /* Either last `\n` or `\r` separated text */
777 sep_pos = memchr (t->start + pos, '\r', t->len - pos);
778 }
779
780 pos = rspamd_lua_text_push_line (L, t, pos, sep_pos, stringify);
781
782 /* Skip separators */
783 while (pos < t->len) {
784 if (t->start[pos] == '\n' || t->start[pos] == '\r') {
785 pos ++;
786 }
787 else {
788 break;
789 }
790 }
791
792 /* Update pos */
793 lua_pushinteger (L, pos);
794 lua_replace (L, lua_upvalueindex (3));
795
796 return 1;
797 }
798
799 static gint
lua_text_lines(lua_State * L)800 lua_text_lines (lua_State *L)
801 {
802 LUA_TRACE_POINT;
803 struct rspamd_lua_text *t = lua_check_text (L, 1);
804 gboolean stringify = FALSE;
805
806 if (t) {
807 if (lua_isboolean (L, 2)) {
808 stringify = lua_toboolean (L, 2);
809 }
810
811 lua_pushvalue (L, 1);
812 lua_pushboolean (L, stringify);
813 lua_pushinteger (L, 0); /* Current pos */
814 lua_pushcclosure (L, rspamd_lua_text_readline, 3);
815 }
816 else {
817 return luaL_error (L, "invalid arguments");
818 }
819
820 return 1;
821 }
822
823 static gint
rspamd_lua_text_regexp_split(lua_State * L)824 rspamd_lua_text_regexp_split (lua_State *L) {
825 struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1)),
826 *new_t;
827 struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
828 lua_touserdata (L, lua_upvalueindex (2));
829 gboolean stringify = lua_toboolean (L, lua_upvalueindex (3));
830 gint64 pos = lua_tointeger (L, lua_upvalueindex (4));
831 gboolean matched;
832
833 if (pos < 0) {
834 return luaL_error (L, "invalid pos: %d", (gint) pos);
835 }
836
837 if (pos >= t->len) {
838 /* We are done */
839 return 0;
840 }
841
842 const gchar *start, *end, *old_start;
843
844 end = t->start + pos;
845
846 for (;;) {
847 old_start = end;
848
849 matched = rspamd_regexp_search (re->re, t->start, t->len, &start, &end, FALSE,
850 NULL);
851
852 if (matched) {
853 if (start - old_start > 0) {
854 if (stringify) {
855 lua_pushlstring (L, old_start, start - old_start);
856 }
857 else {
858 new_t = lua_newuserdata (L, sizeof (*t));
859 rspamd_lua_setclass (L, "rspamd{text}", -1);
860 new_t->start = old_start;
861 new_t->len = start - old_start;
862 new_t->flags = 0;
863 }
864
865 break;
866 }
867 else {
868 if (start == end) {
869 matched = FALSE;
870 break;
871 }
872 /*
873 * All match separators (e.g. starting separator,
874 * we need to skip it). Continue iterations.
875 */
876 }
877 }
878 else {
879 /* No match, stop */
880 break;
881 }
882 }
883
884 if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
885 /* No more matches, but we might need to push the last element */
886 if (end == NULL) {
887 end = t->start;
888 }
889 /* No separators, need to push the whole remaining part */
890 if (stringify) {
891 lua_pushlstring (L, end, (t->start + t->len) - end);
892 }
893 else {
894 new_t = lua_newuserdata (L, sizeof (*t));
895 rspamd_lua_setclass (L, "rspamd{text}", -1);
896 new_t->start = end;
897 new_t->len = (t->start + t->len) - end;
898 new_t->flags = 0;
899 }
900
901 pos = t->len;
902 }
903 else {
904
905 pos = end - t->start;
906 }
907
908 /* Update pos */
909 lua_pushinteger (L, pos);
910 lua_replace (L, lua_upvalueindex (4));
911
912 return 1;
913 }
914
915 static gint
lua_text_split(lua_State * L)916 lua_text_split (lua_State *L)
917 {
918 LUA_TRACE_POINT;
919 struct rspamd_lua_text *t = lua_check_text (L, 1);
920 struct rspamd_lua_regexp *re;
921 gboolean stringify = FALSE, own_re = FALSE;
922
923 if (t == NULL) {
924 return luaL_error (L, "invalid arguments");
925 }
926
927 if (lua_type (L, 2) == LUA_TUSERDATA) {
928 re = lua_check_regexp (L, 2);
929 }
930 else {
931 rspamd_regexp_t *c_re;
932 GError *err = NULL;
933
934 c_re = rspamd_regexp_new (lua_tostring (L, 2), NULL, &err);
935 if (c_re == NULL) {
936
937 gint ret = luaL_error (L, "cannot parse regexp: %s, error: %s",
938 lua_tostring (L, 2),
939 err == NULL ? "undefined" : err->message);
940 if (err) {
941 g_error_free (err);
942 }
943
944 return ret;
945 }
946
947 re = g_malloc0 (sizeof (struct rspamd_lua_regexp));
948 re->re = c_re;
949 re->re_pattern = g_strdup (lua_tostring (L, 2));
950 re->module = rspamd_lua_get_module_name (L);
951 own_re = TRUE;
952 }
953
954 if (re) {
955 if (lua_isboolean (L, 3)) {
956 stringify = lua_toboolean (L, 3);
957 }
958
959 /* Upvalues */
960 lua_pushvalue (L, 1); /* text */
961
962 if (own_re) {
963 struct rspamd_lua_regexp **pre;
964 pre = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
965 rspamd_lua_setclass (L, "rspamd{regexp}", -1);
966 *pre = re;
967 }
968 else {
969 lua_pushvalue (L, 2); /* regexp */
970 }
971
972 lua_pushboolean (L, stringify);
973 lua_pushinteger (L, 0); /* Current pos */
974 lua_pushcclosure (L, rspamd_lua_text_regexp_split, 4);
975 }
976 else {
977 return luaL_error (L, "invalid arguments");
978 }
979
980 return 1;
981 }
982
983
984 static gint
lua_text_at(lua_State * L)985 lua_text_at (lua_State *L)
986 {
987 return lua_text_byte(L);
988 }
989
990 static gint
lua_text_byte(lua_State * L)991 lua_text_byte (lua_State *L)
992 {
993 LUA_TRACE_POINT;
994 struct rspamd_lua_text *t = lua_check_text (L, 1);
995 if (!t) {
996 return luaL_error (L, "invalid arguments");
997 }
998
999 gsize start = relative_pos_start (luaL_optinteger (L, 2, 1), t->len);
1000 gsize end = relative_pos_end (luaL_optinteger (L, 3, start), t->len);
1001 start--;
1002
1003 if (start >= end) {
1004 return 0;
1005 }
1006
1007 for (gsize i = start; i < end; i++) {
1008 lua_pushinteger (L, t->start[i]);
1009 }
1010 return end - start;
1011 }
1012
1013 static gint
lua_text_memchr(lua_State * L)1014 lua_text_memchr (lua_State *L)
1015 {
1016 LUA_TRACE_POINT;
1017 struct rspamd_lua_text *t = lua_check_text (L, 1);
1018 int c;
1019 bool reverse = false;
1020
1021 if (lua_isnumber (L, 2)) {
1022 c = lua_tonumber (L, 2);
1023 }
1024 else {
1025 gsize l;
1026 const gchar *str = lua_tolstring (L, 2, &l);
1027
1028 if (str) {
1029 c = str[0];
1030
1031 if (l != 1) {
1032 return luaL_error (L, "need exactly one character to search");
1033 }
1034 }
1035 else {
1036 return luaL_error (L, "invalid arguments");
1037 }
1038 }
1039
1040 if (t) {
1041 void *f;
1042
1043 if (lua_isboolean (L, 3)) {
1044 reverse = lua_toboolean (L, 3);
1045 }
1046
1047 if (reverse) {
1048 f = rspamd_memrchr (t->start, c, t->len);
1049 }
1050 else {
1051 f = memchr (t->start, c, t->len);
1052 }
1053
1054 if (f) {
1055 lua_pushinteger (L, ((const char *)f) - t->start + 1);
1056 }
1057 else {
1058 lua_pushinteger (L, -1);
1059 }
1060 }
1061 else {
1062 return luaL_error (L, "invalid arguments");
1063 }
1064
1065 return 1;
1066 }
1067
1068 static gint
lua_text_bytes(lua_State * L)1069 lua_text_bytes (lua_State *L)
1070 {
1071 LUA_TRACE_POINT;
1072 struct rspamd_lua_text *t = lua_check_text (L, 1);
1073
1074 if (t) {
1075 lua_createtable (L, t->len, 0);
1076
1077 for (gsize i = 0; i < t->len; i ++) {
1078 lua_pushinteger (L, (guchar)t->start[i]);
1079 lua_rawseti (L, -2, i + 1);
1080 }
1081 }
1082 else {
1083 return luaL_error (L, "invalid arguments");
1084 }
1085
1086 return 1;
1087 }
1088
1089 static gint
lua_text_save_in_file(lua_State * L)1090 lua_text_save_in_file (lua_State *L)
1091 {
1092 LUA_TRACE_POINT;
1093 struct rspamd_lua_text *t = lua_check_text (L, 1);
1094 const gchar *fname = NULL;
1095 guint mode = 00644;
1096 gint fd = -1;
1097 gboolean need_close = FALSE;
1098
1099 if (t != NULL) {
1100 if (lua_type (L, 2) == LUA_TSTRING) {
1101 fname = luaL_checkstring (L, 2);
1102
1103 if (lua_type (L, 3) == LUA_TNUMBER) {
1104 mode = lua_tonumber (L, 3);
1105 }
1106 }
1107 else if (lua_type (L, 2) == LUA_TNUMBER) {
1108 /* Created fd */
1109 fd = lua_tonumber (L, 2);
1110 }
1111
1112 if (fd == -1) {
1113 if (fname) {
1114 fd = rspamd_file_xopen (fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
1115
1116 if (fd == -1) {
1117 lua_pushboolean (L, false);
1118 lua_pushstring (L, strerror (errno));
1119
1120 return 2;
1121 }
1122 need_close = TRUE;
1123 }
1124 else {
1125 fd = STDOUT_FILENO;
1126 }
1127 }
1128
1129 if (write (fd, t->start, t->len) == -1) {
1130 if (fd != STDOUT_FILENO) {
1131 close (fd);
1132 }
1133
1134 lua_pushboolean (L, false);
1135 lua_pushstring (L, strerror (errno));
1136
1137 return 2;
1138 }
1139
1140 if (need_close) {
1141 close (fd);
1142 }
1143
1144 lua_pushboolean (L, true);
1145 }
1146 else {
1147 return luaL_error (L, "invalid arguments");
1148 }
1149
1150 return 1;
1151 }
1152
1153 static gint
lua_text_gc(lua_State * L)1154 lua_text_gc (lua_State *L)
1155 {
1156 LUA_TRACE_POINT;
1157 struct rspamd_lua_text *t = lua_check_text (L, 1);
1158
1159 if (t != NULL) {
1160 g_assert (!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
1161
1162 if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1163 if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
1164 rspamd_explicit_memzero ((guchar *)t->start, t->len);
1165 }
1166
1167 if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
1168 munmap ((gpointer)t->start, t->len);
1169 }
1170 else {
1171 if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
1172 free ((gpointer) t->start);
1173 }
1174 else {
1175 g_free ((gpointer) t->start);
1176 }
1177 }
1178 }
1179
1180 }
1181
1182 return 0;
1183 }
1184
1185 static gint
lua_text_eq(lua_State * L)1186 lua_text_eq (lua_State *L)
1187 {
1188 LUA_TRACE_POINT;
1189 struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1190 *t2 = lua_check_text_or_string (L, 2);
1191
1192 if (t1->len == t2->len) {
1193 lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) == 0);
1194 }
1195 else {
1196 lua_pushboolean (L, false);
1197 }
1198
1199 return 1;
1200 }
1201
1202 static gint
lua_text_lt(lua_State * L)1203 lua_text_lt (lua_State *L)
1204 {
1205 LUA_TRACE_POINT;
1206 struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1207 *t2 = lua_check_text_or_string (L, 2);
1208
1209 if (t1 && t2) {
1210 if (t1->len == t2->len) {
1211 lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) < 0);
1212 }
1213 else {
1214 lua_pushboolean (L, t1->len < t2->len);
1215 }
1216 }
1217
1218 return 1;
1219 }
1220
1221 static gint
lua_text_concat(lua_State * L)1222 lua_text_concat (lua_State *L)
1223 {
1224 LUA_TRACE_POINT;
1225 struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1226 *t2 = lua_check_text_or_string (L, 2);
1227
1228 if (t1 && t2) {
1229 struct rspamd_lua_text *final;
1230
1231 final = lua_new_text (L, NULL, t1->len + t2->len, TRUE);
1232 memcpy ((void *)final->start, t1->start, t1->len);
1233 memcpy ((void *)(final->start + t1->len), t2->start, t2->len);
1234 }
1235
1236 return 1;
1237 }
1238
1239 static gint
lua_text_wipe(lua_State * L)1240 lua_text_wipe (lua_State *L)
1241 {
1242 LUA_TRACE_POINT;
1243 struct rspamd_lua_text *t = lua_check_text (L, 1);
1244
1245 if (t != NULL) {
1246 if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1247 rspamd_explicit_memzero ((guchar *)t->start, t->len);
1248 }
1249 else {
1250 return luaL_error (L, "cannot wipe not owned text");
1251 }
1252
1253 }
1254 else {
1255 return luaL_error (L, "invalid arguments");
1256 }
1257
1258 return 0;
1259 }
1260
1261 static gint
lua_text_base32(lua_State * L)1262 lua_text_base32 (lua_State *L)
1263 {
1264 LUA_TRACE_POINT;
1265 struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1266 enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
1267
1268 if (t != NULL) {
1269 if (lua_type (L, 2) == LUA_TSTRING) {
1270 btype = rspamd_base32_decode_type_from_str (lua_tostring (L, 2));
1271
1272 if (btype == RSPAMD_BASE32_INVALID) {
1273 return luaL_error (L, "invalid b32 type: %s", lua_tostring (L, 2));
1274 }
1275 }
1276
1277 out = lua_new_text (L, NULL, t->len * 8 / 5 + 2, TRUE);
1278 out->len = rspamd_encode_base32_buf (t->start, t->len, (gchar *)out->start,
1279 out->len, btype);
1280 }
1281 else {
1282 return luaL_error (L, "invalid arguments");
1283 }
1284
1285 return 1;
1286 }
1287
1288 static gint
lua_text_base64(lua_State * L)1289 lua_text_base64 (lua_State *L)
1290 {
1291 LUA_TRACE_POINT;
1292 struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1293 gsize line_len = 0;
1294 gboolean fold = FALSE;
1295
1296 if (t != NULL) {
1297 if (lua_type (L, 2) == LUA_TNUMBER) {
1298 line_len = lua_tointeger (L, 2);
1299
1300 if (line_len <= 8) {
1301 return luaL_error (L, "too small line length (at least 8 is required)");
1302 }
1303 }
1304
1305 enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
1306
1307 if (lua_type (L, 3) == LUA_TSTRING) {
1308 const gchar *how_str = lua_tostring (L, 3);
1309
1310 if (g_ascii_strcasecmp (how_str, "cr") == 0) {
1311 how = RSPAMD_TASK_NEWLINES_CR;
1312 }
1313 else if (g_ascii_strcasecmp (how_str, "lf") == 0) {
1314 how = RSPAMD_TASK_NEWLINES_LF;
1315 }
1316 else if (g_ascii_strcasecmp (how_str, "crlf") != 0) {
1317 return luaL_error (L, "invalid newline style: %s", how_str);
1318 }
1319 }
1320
1321 if (lua_type (L, 4) == LUA_TBOOLEAN) {
1322 fold = lua_toboolean (L, 4);
1323 }
1324
1325 gsize sz_len;
1326
1327 out = lua_newuserdata (L, sizeof (*t));
1328 out->flags = RSPAMD_TEXT_FLAG_OWN;
1329 out->start = rspamd_encode_base64_common (t->start, t->len,
1330 line_len, &sz_len, fold, how);
1331 out->len = sz_len;
1332 rspamd_lua_setclass (L, "rspamd{text}", -1);
1333 }
1334 else {
1335 return luaL_error (L, "invalid arguments");
1336 }
1337
1338 return 1;
1339 }
1340
1341 static gint
lua_text_hex(lua_State * L)1342 lua_text_hex (lua_State *L)
1343 {
1344 LUA_TRACE_POINT;
1345 struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1346
1347 if (t != NULL) {
1348
1349 out = lua_new_text (L, NULL, t->len * 2, TRUE);
1350 out->len = rspamd_encode_hex_buf (t->start, t->len, (gchar *)out->start,
1351 out->len);
1352 }
1353 else {
1354 return luaL_error (L, "invalid arguments");
1355 }
1356
1357 return 1;
1358 }
1359
1360 static gint
lua_text_find(lua_State * L)1361 lua_text_find (lua_State *L)
1362 {
1363 LUA_TRACE_POINT;
1364 struct rspamd_lua_text *t = lua_check_text (L, 1);
1365 gsize patlen, init = 1;
1366 const gchar *pat = luaL_checklstring (L, 2, &patlen);
1367
1368 if (t != NULL && pat != NULL) {
1369
1370 if (lua_isnumber (L, 3)) {
1371 init = relative_pos_start (lua_tointeger (L, 3), t->len);
1372 }
1373
1374 init --;
1375
1376 if (init > t->len) {
1377 return luaL_error (L, "invalid arguments to find: init too large");
1378 }
1379
1380 goffset pos = rspamd_substring_search (t->start + init,
1381 t->len - init,
1382 pat, patlen);
1383
1384 if (pos == -1) {
1385 lua_pushnil (L);
1386
1387 return 1;
1388 }
1389
1390 lua_pushinteger (L, pos + 1);
1391 lua_pushinteger (L, pos + patlen);
1392 }
1393 else {
1394 return luaL_error (L, "invalid arguments");
1395 }
1396
1397 return 2;
1398 }
1399
1400 #define BITOP(a,b,op) \
1401 ((a)[(guint64)(b)/(8u*sizeof *(a))] op (guint64)1<<((guint64)(b)%(8u*sizeof *(a))))
1402
1403 static gint
lua_text_exclude_chars(lua_State * L)1404 lua_text_exclude_chars (lua_State *L)
1405 {
1406 LUA_TRACE_POINT;
1407 struct rspamd_lua_text *t = lua_check_text (L, 1);
1408 gssize patlen;
1409 const gchar *pat = lua_tolstring (L, 2, &patlen), *p, *end;
1410 gchar *dest, *d;
1411 guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
1412 gboolean copy = TRUE;
1413 guint *plen;
1414
1415 if (t != NULL && pat && patlen > 0) {
1416 if (lua_isboolean (L, 3)) {
1417 copy = lua_toboolean (L, 3);
1418 }
1419 else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1420 copy = FALSE;
1421 }
1422
1423 if (!copy) {
1424 dest = (gchar *)t->start;
1425 plen = &t->len;
1426 lua_pushvalue (L, 1); /* Push text as a result */
1427 }
1428 else {
1429 /* We need to copy read only text */
1430 struct rspamd_lua_text *nt;
1431
1432 dest = g_malloc (t->len);
1433 nt = lua_newuserdata (L, sizeof (*nt));
1434 rspamd_lua_setclass (L, "rspamd{text}", -1);
1435 nt->len = t->len;
1436 nt->flags = RSPAMD_TEXT_FLAG_OWN;
1437 memcpy (dest, t->start, t->len);
1438 nt->start = dest;
1439 plen = &nt->len;
1440 }
1441
1442 /* Fill pattern bitset */
1443 memset (byteset, 0, sizeof byteset);
1444
1445 while (patlen > 0) {
1446 if (*pat == '%') {
1447 pat ++;
1448 patlen --;
1449
1450 if (patlen > 0) {
1451 /*
1452 * This stuff assumes little endian, but GUINT64_FROM_LE should
1453 * deal with proper conversion
1454 */
1455 switch (*pat) {
1456 case '%':
1457 BITOP (byteset, *(guchar *) pat, |=);
1458 break;
1459 case 's':
1460 /* "\r\n\t\f " */
1461 byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
1462 break;
1463 case 'n':
1464 /* newlines: "\r\n" */
1465 byteset[0] |= GUINT64_FROM_LE (0x2400LLU);
1466 break;
1467 case '8':
1468 /* 8 bit characters */
1469 byteset[2] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1470 byteset[3] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1471 break;
1472 case 'c':
1473 /* Non printable (control) characters */
1474 byteset[0] |= GUINT64_FROM_LE (0xffffffffLLU);
1475 /* Del character */
1476 byteset[1] |= GUINT64_FROM_LE (0x8000000000000000LLU);
1477 break;
1478 }
1479 }
1480 else {
1481 /* Last '%' */
1482 BITOP (byteset, (guchar)'%', |=);
1483 }
1484 }
1485 else {
1486 BITOP (byteset, *(guchar *)pat, |=);
1487 }
1488
1489 pat ++;
1490 patlen --;
1491 }
1492 for (; patlen > 0 && BITOP (byteset, *(guchar *)pat, |=); pat++, patlen --);
1493
1494 p = t->start;
1495 end = t->start + t->len;
1496 d = dest;
1497
1498 while (p < end) {
1499 if (!BITOP (byteset, *(guchar *)p, &)) {
1500 *d++ = *p;
1501 }
1502
1503 p ++;
1504 }
1505
1506 *(plen) = d - dest;
1507 }
1508 else {
1509 return luaL_error (L, "invalid arguments");
1510 }
1511
1512 return 1;
1513 }
1514
1515 static gint
lua_text_oneline(lua_State * L)1516 lua_text_oneline (lua_State *L)
1517 {
1518 LUA_TRACE_POINT;
1519 struct rspamd_lua_text *t = lua_check_text (L, 1);
1520 const gchar *p, *end;
1521 gchar *dest, *d;
1522 guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
1523 gboolean copy = TRUE, seen_8bit = FALSE;
1524 guint *plen;
1525
1526 if (t != NULL) {
1527 if (lua_isboolean (L, 2)) {
1528 copy = lua_toboolean (L, 2);
1529 }
1530 else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1531 copy = FALSE;
1532 }
1533
1534 if (!copy) {
1535 dest = (gchar *)t->start;
1536 plen = &t->len;
1537 lua_pushvalue (L, 1); /* Push text as a result */
1538 }
1539 else {
1540 /* We need to copy read only text */
1541 struct rspamd_lua_text *nt;
1542
1543 dest = g_malloc (t->len);
1544 nt = lua_newuserdata (L, sizeof (*nt));
1545 rspamd_lua_setclass (L, "rspamd{text}", -1);
1546 nt->len = t->len;
1547 nt->flags = RSPAMD_TEXT_FLAG_OWN;
1548 memcpy (dest, t->start, t->len);
1549 nt->start = dest;
1550 plen = &nt->len;
1551 }
1552
1553 /* Fill pattern bitset */
1554 memset (byteset, 0, sizeof byteset);
1555 /* All spaces */
1556 byteset[0] |= GUINT64_FROM_LE (0x100003600LLU);
1557 /* Control characters */
1558 byteset[0] |= GUINT64_FROM_LE (0xffffffffLLU);
1559 /* Del character */
1560 byteset[1] |= GUINT64_FROM_LE (0x8000000000000000LLU);
1561 /* 8 bit characters */
1562 byteset[2] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1563 byteset[3] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1564
1565 p = t->start;
1566 end = t->start + t->len;
1567 d = dest;
1568
1569 while (p < end) {
1570 if (!BITOP (byteset, *(guchar *)p, &)) {
1571 *d++ = *p;
1572 }
1573 else {
1574 if ((*(guchar *)p) & 0x80) {
1575 seen_8bit = TRUE;
1576 *d++ = *p;
1577 }
1578 else {
1579 if (*p == ' ') {
1580 if (d != dest) {
1581 *d++ = *p++;
1582 }
1583
1584 while (p < end && g_ascii_isspace (*p)) {
1585 p ++;
1586 }
1587
1588 continue; /* To avoid p++ */
1589 }
1590 else if (*p == '\r' || *p == '\n') {
1591 if (d != dest) {
1592 *d++ = ' ';
1593 p ++;
1594 }
1595
1596 while (p < end && g_ascii_isspace (*p)) {
1597 p ++;
1598 }
1599
1600 continue; /* To avoid p++ */
1601 }
1602 }
1603 }
1604
1605 p ++;
1606 }
1607
1608 while (d > dest && g_ascii_isspace (*(d - 1))) {
1609 d --;
1610 }
1611
1612 if (seen_8bit) {
1613 if (rspamd_fast_utf8_validate (dest, d - dest) != 0) {
1614 /* Need to make it valid :( */
1615 UChar32 uc;
1616 goffset err_offset;
1617 gsize remain = d - dest;
1618 gchar *nd = dest;
1619
1620 while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (nd, remain)) > 0) {
1621 gint i = 0;
1622
1623 err_offset --; /* As it returns it 1 indexed */
1624 nd += err_offset;
1625 remain -= err_offset;
1626
1627 /* Each invalid character of input requires 3 bytes of output (+2 bytes) */
1628 while (i < remain) {
1629 gint old_pos = i;
1630 U8_NEXT (nd, i, remain, uc);
1631
1632 if (uc < 0) {
1633 nd[old_pos] = '?';
1634 }
1635 else {
1636 break;
1637 }
1638 }
1639
1640 nd += i;
1641 remain -= i;
1642 }
1643 }
1644 }
1645
1646 *(plen) = d - dest;
1647 }
1648 else {
1649 return luaL_error (L, "invalid arguments");
1650 }
1651
1652 return 1;
1653 }
1654
1655 static gint
lua_text_lower(lua_State * L)1656 lua_text_lower (lua_State *L)
1657 {
1658 LUA_TRACE_POINT;
1659 struct rspamd_lua_text *t = lua_check_text (L, 1), *nt;
1660 gboolean is_utf8 = FALSE, is_inplace = FALSE;
1661
1662 if (t != NULL) {
1663 if (lua_isboolean (L, 2)) {
1664 is_utf8 = lua_toboolean (L, 2);
1665 }
1666 if (lua_isboolean (L, 3)) {
1667 is_inplace = lua_toboolean (L, 3);
1668 }
1669
1670 if (is_inplace) {
1671 nt = t;
1672 lua_pushvalue (L, 1);
1673 }
1674 else {
1675 nt = lua_new_text (L, t->start, t->len, TRUE);
1676 }
1677
1678 if (!is_utf8) {
1679 rspamd_str_lc ((gchar *) nt->start, nt->len);
1680 }
1681 else {
1682 rspamd_str_lc_utf8 ((gchar *) nt->start, nt->len);
1683 }
1684 }
1685 else {
1686 return luaL_error (L, "invalid arguments");
1687 }
1688
1689 return 1;
1690 }
1691
1692 static gint
lua_text_strtoul(lua_State * L)1693 lua_text_strtoul (lua_State *L)
1694 {
1695 LUA_TRACE_POINT;
1696 struct rspamd_lua_text *t = lua_check_text (L, 1);
1697
1698 if (t) {
1699 unsigned long ll;
1700
1701 if (rspamd_strtoul (t->start, t->len, &ll)) {
1702 lua_pushinteger (L, ll);
1703 }
1704 else {
1705 lua_pushnil (L);
1706 }
1707 }
1708 else {
1709 return luaL_error (L, "invalid arguments");
1710 }
1711
1712 return 1;
1713 }
1714
1715 /* Used to distinguish lua text metatable */
1716 static const guint rspamd_lua_text_cookie = 0x2b21ef6fU;
1717
1718 static gint
lua_load_text(lua_State * L)1719 lua_load_text (lua_State * L)
1720 {
1721 lua_newtable (L);
1722 lua_pushstring (L, "cookie");
1723 lua_pushnumber (L, rspamd_lua_text_cookie);
1724 lua_settable (L, -3);
1725 luaL_register (L, NULL, textlib_f);
1726
1727 return 1;
1728 }
1729
1730 void
luaopen_text(lua_State * L)1731 luaopen_text (lua_State *L)
1732 {
1733 rspamd_lua_new_class (L, "rspamd{text}", textlib_m);
1734 lua_pushstring (L, "cookie");
1735 lua_pushnumber (L, rspamd_lua_text_cookie);
1736 lua_settable (L, -3);
1737 lua_pop (L, 1);
1738
1739 rspamd_lua_add_preload (L, "rspamd_text", lua_load_text);
1740 }
1741