1 /*-
2  * Copyright 2019 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lua_common.h"
18 #include "libcryptobox/cryptobox.h"
19 #include "contrib/fastutf8/fastutf8.h"
20 #include "unix-std.h"
21 
22 /***
23  * @module rspamd_text
24  * This module provides access to opaque text structures used widely to prevent
25  * copying between Lua and C for various concerns: performance, security etc...
26  *
27  * You can convert rspamd_text into string but it will copy data.
28  */
29 
30 /***
31  * @function rspamd_text.fromstring(str)
32  * Creates rspamd_text from Lua string (copied to the text)
33  * @param {string} str string to use
34  * @return {rspamd_text} resulting text
35  */
36 LUA_FUNCTION_DEF (text, fromstring);
37 
38 /***
39  * @function rspamd_text.null()
40  * Creates rspamd_text with NULL pointer for testing purposes
41  * @param {string} str string to use
42  * @return {rspamd_text} resulting text
43  */
44 LUA_FUNCTION_DEF (text, null);
45 /***
46  * @function rspamd_text.randombytes(nbytes)
47  * Creates rspamd_text with random bytes inside (raw bytes)
48  * @param {number} nbytes number of random bytes generated
49  * @return {rspamd_text} random bytes text
50  */
51 LUA_FUNCTION_DEF (text, randombytes);
52 
53 /***
54  * @function rspamd_text.fromtable(tbl[, delim])
55  * Same as `table.concat` but generates rspamd_text instead of the Lua string
56  * @param {table} tbl table to use
57  * @param {string} delim optional delimiter
58  * @return {rspamd_text} resulting text
59  */
60 LUA_FUNCTION_DEF (text, fromtable);
61 /***
62  * @method rspamd_text:byte(pos[, pos2])
63  * Returns a byte at the position `pos` or bytes from `pos` to `pos2` if specified
64  * @param {integer} pos index
65  * @param {integer} pos2 index
66  * @return {integer} byte at the position `pos` or varargs of bytes
67  */
68 LUA_FUNCTION_DEF (text, byte);
69 /***
70  * @method rspamd_text:len()
71  * Returns length of a string
72  * @return {number} length of string in **bytes**
73  */
74 LUA_FUNCTION_DEF (text, len);
75 /***
76  * @method rspamd_text:str()
77  * Converts text to string by copying its content
78  * @return {string} copy of text as Lua string
79  */
80 LUA_FUNCTION_DEF (text, str);
81 /***
82  * @method rspamd_text:ptr()
83  * Converts text to lightuserdata
84  * @return {lightuserdata} pointer value of rspamd_text
85  */
86 LUA_FUNCTION_DEF (text, ptr);
87 /***
88  * @method rspamd_text:save_in_file(fname[, mode])
89  * Saves text in file
90  * @return {boolean} true if save has been completed
91  */
92 LUA_FUNCTION_DEF (text, save_in_file);
93 /***
94  * @method rspamd_text:span(start[, len])
95  * Returns a span for lua_text starting at pos [start] (1 indexed) and with
96  * length `len` (or to the end of the text)
97  * @param {integer} start start index
98  * @param {integer} len length of span
99  * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
100  */
101 LUA_FUNCTION_DEF (text, span);
102 /***
103  * @method rspamd_text:sub(start[, len])
104  * Returns a substrin for lua_text similar to string.sub from Lua
105  * @return {rspamd_text} new rspamd_text with span (must be careful when using with owned texts...)
106  */
107 LUA_FUNCTION_DEF (text, sub);
108 /***
109  * @method rspamd_text:lines([stringify])
110  * Returns an iter over all lines as rspamd_text objects or as strings if `stringify` is true
111  * @param {boolean} stringify stringify lines
112  * @return {iterator} iterator triplet
113  */
114 LUA_FUNCTION_DEF (text, lines);
115 /***
116  * @method rspamd_text:split(regexp, [stringify])
117  * Returns an iter over all encounters of the specific regexp as rspamd_text objects or as strings if `stringify` is true
118  * @param {rspamd_regexp} regexp regexp (pcre syntax) used for splitting
119  * @param {boolean} stringify stringify lines
120  * @return {iterator} iterator triplet
121  */
122 LUA_FUNCTION_DEF (text, split);
123 /***
124  * @method rspamd_text:at(pos)
125  * Returns a byte at the position `pos`
126  * @param {integer} pos index
127  * @return {integer} byte at the position `pos` or nil if pos out of bound
128  */
129 LUA_FUNCTION_DEF (text, at);
130 /***
131  * @method rspamd_text:memchr(chr, [reverse])
132  * Returns the first or the last position of the character `chr` in the text or
133  * -1 in case if a character has not been found. Indexes start from `1`
134  * @param {string/number} chr character or a character code to find
135  * @param {boolean} reverse last character if `true`
136  * @return {integer} position of the character or `-1`
137  */
138 LUA_FUNCTION_DEF (text, memchr);
139 /***
140  * @method rspamd_text:bytes()
141  * Converts text to an array of bytes
142  * @return {table|integer} bytes in the array (as unsigned char)
143  */
144 LUA_FUNCTION_DEF (text, bytes);
145 /***
146  * @method rspamd_text:lower([is_utf, [inplace]])
147  * Return a new text with lowercased characters, if is_utf is true then Rspamd applies utf8 lowercase
148  * @param {boolean} is_utf apply utf8 lowercase
149  * @param {boolean} inplace lowercase the original text
150  * @return {rspamd_text} new rspamd_text (or the original text if inplace) with lowercased letters
151  */
152 LUA_FUNCTION_DEF (text, lower);
153 LUA_FUNCTION_DEF (text, take_ownership);
154 /***
155  * @method rspamd_text:exclude_chars(set_to_exclude, [always_copy])
156  * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
157  * where all chars from `set_to_exclude` are removed
158  * Patterns supported:
159  *
160  * - %s - all space characters
161  * - %n - all newline characters
162  * - %c - all control characters (it includes 8bit characters and spaces)
163  * - %8 - all 8 bit characters
164  * - %% - just a percent character
165  *
166  * @param {string} set_to_exclude characters to exclude
167  * @param {boolean} always_copy always copy the source text
168  * @return {rspamd_text} modified or copied text
169  */
170 LUA_FUNCTION_DEF (text, exclude_chars);
171 /***
172  * @method rspamd_text:oneline([always_copy])
173  * Returns a text (if owned, then the original text is modified, if not, then it is copied and owned)
174  * where the following transformations are made:
175  * - All spaces sequences are replaced with a single space
176  * - All newlines sequences are replaced with a single space
177  * - Trailing and leading spaces are removed
178  * - Control characters are excluded
179  * - UTF8 sequences are normalised
180  *
181  * @param {boolean} always_copy always copy the source text
182  * @return {rspamd_text} modified or copied text
183  */
184 LUA_FUNCTION_DEF (text, oneline);
185 /***
186  * @method rspamd_text:base32([b32type])
187  * Returns a text encoded in base32 (new rspamd_text is allocated)
188  *
189  * @param {string} b32type base32 type (default, bleach, rfc)
190  * @return {rspamd_text} new text encoded in base32
191  */
192 LUA_FUNCTION_DEF (text, base32);
193 /***
194  * @method rspamd_text:base64([line_length, [nline, [fold]]])
195  * Returns a text encoded in base64 (new rspamd_text is allocated)
196  *
197  * @param {number} line_length return text splited with newlines up to this attribute
198  * @param {string} nline newline type: `cr`, `lf`, `crlf`
199  * @param {boolean} fold use folding when splitting into lines (false by default)
200  * @return {rspamd_text} new text encoded in base64
201  */
202 LUA_FUNCTION_DEF (text, base64);
203 /***
204  * @method rspamd_text:hex()
205  * Returns a text encoded in hex (new rspamd_text is allocated)
206  *
207  * @return {rspamd_text} new text encoded in hex
208  */
209 LUA_FUNCTION_DEF (text, hex);
210 /***
211  * @method rspamd_text:find(pattern [, init])
212  * Looks for the first match of pattern in the string s.
213  * If it finds a match, then find returns the indices of s where this occurrence
214  * starts and ends; otherwise, it returns nil. A third,
215  * optional numerical argument init specifies where to start the search;
216  * its default value is 1 and can be negative.
217  * This method currently supports merely a plain search, no patterns.
218  *
219  * @param {string} pattern pattern to find
220  * @param {number} init specifies where to start the search (1 default)
221  * @return {number,number/nil} If it finds a match, then find returns the indices of s where this occurrence starts and ends; otherwise, it returns nil
222  */
223 LUA_FUNCTION_DEF (text, find);
224 LUA_FUNCTION_DEF (text, gc);
225 LUA_FUNCTION_DEF (text, eq);
226 LUA_FUNCTION_DEF (text, lt);
227 LUA_FUNCTION_DEF (text, concat);
228 LUA_FUNCTION_DEF (text, strtoul);
229 
230 static const struct luaL_reg textlib_f[] = {
231 		LUA_INTERFACE_DEF (text, fromstring),
232 		{"from_string", lua_text_fromstring},
233 		LUA_INTERFACE_DEF (text, fromtable),
234 		{"from_table", lua_text_fromtable},
235 		LUA_INTERFACE_DEF (text, null),
236 		LUA_INTERFACE_DEF (text, randombytes),
237 		{NULL, NULL}
238 };
239 
240 static const struct luaL_reg textlib_m[] = {
241 		LUA_INTERFACE_DEF (text, len),
242 		LUA_INTERFACE_DEF (text, str),
243 		LUA_INTERFACE_DEF (text, ptr),
244 		LUA_INTERFACE_DEF (text, take_ownership),
245 		LUA_INTERFACE_DEF (text, save_in_file),
246 		LUA_INTERFACE_DEF (text, span),
247 		LUA_INTERFACE_DEF (text, sub),
248 		LUA_INTERFACE_DEF (text, lines),
249 		LUA_INTERFACE_DEF (text, split),
250 		LUA_INTERFACE_DEF (text, at),
251 		LUA_INTERFACE_DEF (text, memchr),
252 		LUA_INTERFACE_DEF (text, byte),
253 		LUA_INTERFACE_DEF (text, bytes),
254 		LUA_INTERFACE_DEF (text, lower),
255 		LUA_INTERFACE_DEF (text, exclude_chars),
256 		LUA_INTERFACE_DEF (text, oneline),
257 		LUA_INTERFACE_DEF (text, base32),
258 		LUA_INTERFACE_DEF (text, base64),
259 		LUA_INTERFACE_DEF (text, hex),
260 		LUA_INTERFACE_DEF (text, find),
261 		LUA_INTERFACE_DEF (text, strtoul),
262 		{"write", lua_text_save_in_file},
263 		{"__len", lua_text_len},
264 		{"__tostring", lua_text_str},
265 		{"__gc", lua_text_gc},
266 		{"__eq", lua_text_eq},
267 		{"__lt", lua_text_lt},
268 		{"__concat", lua_text_concat},
269 		{NULL, NULL}
270 };
271 
272 struct rspamd_lua_text *
lua_check_text(lua_State * L,gint pos)273 lua_check_text (lua_State * L, gint pos)
274 {
275 	void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
276 	luaL_argcheck (L, ud != NULL, pos, "'text' expected");
277 	return ud ? (struct rspamd_lua_text *)ud : NULL;
278 }
279 
280 struct rspamd_lua_text *
lua_check_text_or_string(lua_State * L,gint pos)281 lua_check_text_or_string (lua_State * L, gint pos)
282 {
283 	gint pos_type = lua_type (L, pos);
284 
285 	if (pos_type == LUA_TUSERDATA) {
286 		void *ud = rspamd_lua_check_udata (L, pos, "rspamd{text}");
287 		luaL_argcheck (L, ud != NULL, pos, "'text' expected");
288 		return ud ? (struct rspamd_lua_text *) ud : NULL;
289 	}
290 	else if (pos_type == LUA_TSTRING) {
291 		/*
292 		 * Fake static lua_text, we allow to use this function multiple times
293 		 * by having a small array of static structures.
294 		 */
295 		static int cur_txt_idx = 0;
296 		static struct rspamd_lua_text fake_text[4];
297 		gsize len;
298 		int sel_idx;
299 
300 		sel_idx = cur_txt_idx++ % G_N_ELEMENTS (fake_text);
301 		fake_text[sel_idx].start = lua_tolstring (L, pos, &len);
302 
303 		if (len >= G_MAXUINT) {
304 			return NULL;
305 		}
306 
307 		fake_text[sel_idx].len = len;
308 		fake_text[sel_idx].flags = RSPAMD_TEXT_FLAG_FAKE;
309 
310 		return &fake_text[sel_idx];
311 	}
312 
313 	return NULL;
314 }
315 
316 struct rspamd_lua_text *
lua_new_text(lua_State * L,const gchar * start,gsize len,gboolean own)317 lua_new_text (lua_State *L, const gchar *start, gsize len, gboolean own)
318 {
319 	struct rspamd_lua_text *t;
320 
321 	t = lua_newuserdata (L, sizeof (*t));
322 	t->flags = 0;
323 
324 	if (own) {
325 		gchar *storage;
326 
327 		if (len > 0) {
328 			storage = g_malloc (len);
329 
330 			if (start != NULL) {
331 				memcpy (storage, start, len);
332 			}
333 
334 			t->start = storage;
335 			t->flags = RSPAMD_TEXT_FLAG_OWN;
336 		}
337 		else {
338 			t->start = "";
339 		}
340 	}
341 	else {
342 		t->start = start;
343 	}
344 
345 	t->len = len;
346 	rspamd_lua_setclass (L, "rspamd{text}", -1);
347 
348 	return t;
349 }
350 
351 
352 static gint
lua_text_fromstring(lua_State * L)353 lua_text_fromstring (lua_State *L)
354 {
355 	LUA_TRACE_POINT;
356 	const gchar *str;
357 	gsize l = 0;
358 	gboolean transparent = FALSE;
359 
360 	str = luaL_checklstring (L, 1, &l);
361 
362 	if (str) {
363 		if (lua_isboolean (L, 2)) {
364 			transparent = lua_toboolean (L, 2);
365 		}
366 
367 		lua_new_text (L, str, l, !transparent);
368 	}
369 	else {
370 		return luaL_error (L, "invalid arguments");
371 	}
372 
373 
374 	return 1;
375 }
376 
377 static gint
lua_text_null(lua_State * L)378 lua_text_null (lua_State *L)
379 {
380 	LUA_TRACE_POINT;
381 
382 	lua_new_text (L, NULL, 0, false);
383 
384 	return 1;
385 }
386 
387 static gint
lua_text_randombytes(lua_State * L)388 lua_text_randombytes (lua_State *L)
389 {
390 	LUA_TRACE_POINT;
391 	guint nbytes = luaL_checkinteger (L, 1);
392 	struct rspamd_lua_text *out;
393 
394 	out = lua_new_text (L, NULL, nbytes, TRUE);
395 	randombytes_buf ((char *)out->start, nbytes);
396 	out->len = nbytes;
397 
398 	return 1;
399 }
400 
401 #define MAX_REC 10
402 
403 static void
lua_text_tbl_length(lua_State * L,gsize dlen,gsize * dest,guint rec)404 lua_text_tbl_length (lua_State *L, gsize dlen, gsize *dest, guint rec)
405 {
406 	gsize tblen, stlen;
407 	struct rspamd_lua_text *elt;
408 
409 	if (rec > MAX_REC) {
410 		luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
411 
412 		return;
413 	}
414 
415 	tblen = rspamd_lua_table_size (L, -1);
416 
417 	for (gsize i = 0; i < tblen; i ++) {
418 		lua_rawgeti (L, -1, i + 1);
419 
420 		if (lua_type (L, -1) == LUA_TSTRING) {
421 #if LUA_VERSION_NUM >= 502
422 			stlen = lua_rawlen (L, -1);
423 #else
424 			stlen = lua_objlen (L, -1);
425 #endif
426 			(*dest) += stlen;
427 		}
428 		else if (lua_type (L, -1) == LUA_TUSERDATA){
429 			elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
430 
431 			if (elt) {
432 				(*dest) += elt->len;
433 			}
434 		}
435 		else if (lua_type (L, -1) == LUA_TTABLE) {
436 			lua_text_tbl_length (L, dlen, dest, rec + 1);
437 		}
438 
439 		if (i != tblen - 1) {
440 			(*dest) += dlen;
441 		}
442 
443 		lua_pop (L, 1);
444 	}
445 }
446 
447 static void
lua_text_tbl_append(lua_State * L,const gchar * delim,gsize dlen,gchar ** dest,guint rec)448 lua_text_tbl_append (lua_State *L,
449 					 const gchar *delim,
450 					 gsize dlen,
451 					 gchar **dest,
452 					 guint rec)
453 {
454 	const gchar *st;
455 	gsize tblen, stlen;
456 	struct rspamd_lua_text *elt;
457 
458 	if (rec > MAX_REC) {
459 		luaL_error (L, "lua_text_tbl_length: recursion limit exceeded");
460 
461 		return;
462 	}
463 
464 	tblen = rspamd_lua_table_size (L, -1);
465 
466 	for (guint i = 0; i < tblen; i ++) {
467 		lua_rawgeti (L, -1, i + 1);
468 
469 		if (lua_type (L, -1) == LUA_TSTRING) {
470 			st = lua_tolstring (L, -1, &stlen);
471 			memcpy ((*dest), st, stlen);
472 			(*dest) += stlen;
473 		}
474 		else if (lua_type (L, -1) == LUA_TUSERDATA){
475 			elt = (struct rspamd_lua_text *)lua_touserdata (L, -1);
476 
477 			if (elt) {
478 				memcpy ((*dest), elt->start, elt->len);
479 				(*dest) += elt->len;
480 			}
481 		}
482 		else if (lua_type (L, -1) == LUA_TTABLE) {
483 			lua_text_tbl_append (L, delim, dlen, dest, rec + 1);
484 		}
485 
486 		if (dlen && i != tblen - 1) {
487 			memcpy ((*dest), delim, dlen);
488 			(*dest) += dlen;
489 		}
490 
491 		lua_pop (L, 1);
492 	}
493 }
494 
495 static gint
lua_text_fromtable(lua_State * L)496 lua_text_fromtable (lua_State *L)
497 {
498 	LUA_TRACE_POINT;
499 	const gchar *delim = "";
500 	struct rspamd_lua_text *t;
501 	gsize textlen = 0, dlen, oldtop = lua_gettop (L);
502 	gchar *dest;
503 
504 	if (!lua_istable (L, 1)) {
505 		return luaL_error (L, "invalid arguments");
506 	}
507 
508 	if (lua_type (L, 2) == LUA_TSTRING) {
509 		delim = lua_tolstring (L, 2, &dlen);
510 	}
511 	else {
512 		dlen = 0;
513 	}
514 
515 	/* Calculate length needed */
516 	lua_pushvalue (L, 1);
517 	lua_text_tbl_length (L, dlen, &textlen, 0);
518 	lua_pop (L, 1);
519 
520 	/* Allocate new text */
521 	t = lua_newuserdata (L, sizeof (*t));
522 	dest = g_malloc (textlen);
523 	t->start = dest;
524 	t->len = textlen;
525 	t->flags = RSPAMD_TEXT_FLAG_OWN;
526 	rspamd_lua_setclass (L, "rspamd{text}", -1);
527 
528 	lua_pushvalue (L, 1);
529 	lua_text_tbl_append (L, delim, dlen, &dest, 0);
530 	lua_pop (L, 1); /* Table arg */
531 
532 	gint newtop = lua_gettop (L);
533 	g_assert ( newtop== oldtop + 1);
534 
535 	return 1;
536 }
537 
538 static gint
lua_text_len(lua_State * L)539 lua_text_len (lua_State *L)
540 {
541 	LUA_TRACE_POINT;
542 	struct rspamd_lua_text *t = lua_check_text (L, 1);
543 	gsize l = 0;
544 
545 	if (t != NULL) {
546 		l = t->len;
547 	}
548 	else {
549 		return luaL_error (L, "invalid arguments");
550 	}
551 
552 	lua_pushinteger (L, l);
553 
554 	return 1;
555 }
556 
557 static gint
lua_text_str(lua_State * L)558 lua_text_str (lua_State *L)
559 {
560 	LUA_TRACE_POINT;
561 	struct rspamd_lua_text *t = lua_check_text (L, 1);
562 
563 	if (t != NULL) {
564 		lua_pushlstring (L, t->start, t->len);
565 	}
566 	else {
567 		return luaL_error (L, "invalid arguments");
568 	}
569 
570 	return 1;
571 }
572 
573 static gint
lua_text_ptr(lua_State * L)574 lua_text_ptr (lua_State *L)
575 {
576 	LUA_TRACE_POINT;
577 	struct rspamd_lua_text *t = lua_check_text (L, 1);
578 
579 	if (t != NULL) {
580 		lua_pushlightuserdata (L, (gpointer)t->start);
581 	}
582 	else {
583 		return luaL_error (L, "invalid arguments");
584 	}
585 
586 	return 1;
587 }
588 
589 static gint
lua_text_take_ownership(lua_State * L)590 lua_text_take_ownership (lua_State *L)
591 {
592 	LUA_TRACE_POINT;
593 	struct rspamd_lua_text *t = lua_check_text (L, 1);
594 	gchar *dest;
595 
596 	if (t != NULL) {
597 		if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
598 			/* We already own it */
599 			lua_pushboolean (L, true);
600 		}
601 		else {
602 			dest = g_malloc (t->len);
603 			memcpy (dest, t->start, t->len);
604 			t->start = dest;
605 			t->flags |= RSPAMD_TEXT_FLAG_OWN;
606 			lua_pushboolean (L, true);
607 		}
608 	}
609 	else {
610 		return luaL_error (L, "invalid arguments");
611 	}
612 
613 	return 1;
614 }
615 
616 static gint
lua_text_span(lua_State * L)617 lua_text_span (lua_State *L)
618 {
619 	LUA_TRACE_POINT;
620 	struct rspamd_lua_text *t = lua_check_text (L, 1);
621 	gint64 start = lua_tointeger (L, 2), len = -1;
622 
623 	if (t && start >= 1 && start <= t->len) {
624 		if (lua_isnumber (L, 3)) {
625 			len = lua_tonumber (L, 3);
626 		}
627 
628 		if (len == -1) {
629 			len = t->len - (start - 1);
630 		}
631 
632 		if (len < 0 || (len > (t->len - (start - 1)))) {
633 			return luaL_error (L, "invalid length");
634 		}
635 
636 		lua_new_text (L, t->start + (start - 1), len, FALSE);
637 	}
638 	else {
639 		if (!t) {
640 			return luaL_error (L, "invalid arguments, text required");
641 		}
642 		else {
643 			return luaL_error (L, "invalid arguments: start offset %d "
644 						 "is larger than text len %d", (int)start, (int)t->len);
645 		}
646 	}
647 
648 	return 1;
649 }
650 
651 /* Helpers to behave exactly as Lua does */
652 static inline gsize
relative_pos_start(gint pos,gsize len)653 relative_pos_start (gint pos, gsize len)
654 {
655 	if (pos > 0) {
656 		return pos;
657 	}
658 	else if (pos == 0) {
659 		return 1;
660 	}
661 	else if (pos < -((gint) len)) {
662 		return 1;
663 	}
664 
665 	/* Negative pos inside str */
666 	return len + ((gsize)pos) + 1;
667 }
668 
669 static inline gsize
relative_pos_end(gint pos,gsize len)670 relative_pos_end (gint pos, gsize len)
671 {
672 	if (pos > (gint)len) {
673 		return len;
674 	}
675 	else if (pos >= 0) {
676 		return (size_t) pos;
677 	}
678 	else if (pos < -((gint)len)) {
679 		return 0;
680 	}
681 
682 	return len + ((gsize)pos) + 1;
683 }
684 
685 static gint
lua_text_sub(lua_State * L)686 lua_text_sub (lua_State *L)
687 {
688 	LUA_TRACE_POINT;
689 	struct rspamd_lua_text *t = lua_check_text (L, 1);
690 
691 	if (t) {
692 		size_t start = relative_pos_start (luaL_checkinteger (L, 2),
693 				t->len);
694 		size_t end = relative_pos_end (luaL_optinteger (L, 3, -1),
695 				t->len);
696 
697 
698 		if (start <= end) {
699 			lua_new_text (L, t->start + (start - 1),
700 					(end - start) + 1, FALSE);
701 		}
702 		else {
703 			lua_new_text (L, "", 0, TRUE);
704 		}
705 	}
706 	else {
707 		return luaL_error (L, "invalid arguments");
708 	}
709 
710 	return 1;
711 }
712 
713 static gint64
rspamd_lua_text_push_line(lua_State * L,struct rspamd_lua_text * t,gint64 start_offset,const gchar * sep_pos,gboolean stringify)714 rspamd_lua_text_push_line (lua_State *L,
715 						   struct rspamd_lua_text *t,
716 						   gint64 start_offset,
717 						   const gchar *sep_pos,
718 						   gboolean stringify)
719 {
720 	const gchar *start;
721 	gsize len;
722 	gint64 ret;
723 
724 	start = t->start + start_offset;
725 	len = sep_pos ? (sep_pos - start) : (t->len - start_offset);
726 	ret = start_offset + len;
727 
728 	/* Trim line */
729 	while (len > 0) {
730 		if (start[len - 1] == '\r' || start[len - 1] == '\n') {
731 			len --;
732 		}
733 		else {
734 			break;
735 		}
736 	}
737 
738 	if (stringify) {
739 		lua_pushlstring (L, start, len);
740 	}
741 	else {
742 		struct rspamd_lua_text *ntext;
743 
744 		ntext = lua_newuserdata (L, sizeof (*ntext));
745 		rspamd_lua_setclass (L, "rspamd{text}", -1);
746 		ntext->start = start;
747 		ntext->len = len;
748 		ntext->flags = 0; /* Not own as it must be owned by a top object */
749 	}
750 
751 	return ret;
752 }
753 
754 static gint
rspamd_lua_text_readline(lua_State * L)755 rspamd_lua_text_readline (lua_State *L)
756 {
757 	struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1));
758 	gboolean stringify = lua_toboolean (L, lua_upvalueindex (2));
759 	gint64 pos = lua_tointeger (L, lua_upvalueindex (3));
760 
761 	if (pos < 0) {
762 		return luaL_error (L, "invalid pos: %d", (gint)pos);
763 	}
764 
765 	if (pos >= t->len) {
766 		/* We are done */
767 		return 0;
768 	}
769 
770 	const gchar *sep_pos;
771 
772 	/* We look just for `\n` ignoring `\r` as it is very rare nowadays */
773 	sep_pos = memchr (t->start + pos, '\n', t->len - pos);
774 
775 	if (sep_pos == NULL) {
776 		/* Either last `\n` or `\r` separated text */
777 		sep_pos = memchr (t->start + pos, '\r', t->len - pos);
778 	}
779 
780 	pos = rspamd_lua_text_push_line (L, t, pos, sep_pos, stringify);
781 
782 	/* Skip separators */
783 	while (pos < t->len) {
784 		if (t->start[pos] == '\n' || t->start[pos] == '\r') {
785 			pos ++;
786 		}
787 		else {
788 			break;
789 		}
790 	}
791 
792 	/* Update pos */
793 	lua_pushinteger (L, pos);
794 	lua_replace (L, lua_upvalueindex (3));
795 
796 	return 1;
797 }
798 
799 static gint
lua_text_lines(lua_State * L)800 lua_text_lines (lua_State *L)
801 {
802 	LUA_TRACE_POINT;
803 	struct rspamd_lua_text *t = lua_check_text (L, 1);
804 	gboolean stringify = FALSE;
805 
806 	if (t) {
807 		if (lua_isboolean (L, 2)) {
808 			stringify = lua_toboolean (L, 2);
809 		}
810 
811 		lua_pushvalue (L, 1);
812 		lua_pushboolean (L, stringify);
813 		lua_pushinteger (L, 0); /* Current pos */
814 		lua_pushcclosure (L, rspamd_lua_text_readline, 3);
815 	}
816 	else {
817 		return luaL_error (L, "invalid arguments");
818 	}
819 
820 	return 1;
821 }
822 
823 static gint
rspamd_lua_text_regexp_split(lua_State * L)824 rspamd_lua_text_regexp_split (lua_State *L) {
825 	struct rspamd_lua_text *t = lua_touserdata (L, lua_upvalueindex (1)),
826 			*new_t;
827 	struct rspamd_lua_regexp *re = *(struct rspamd_lua_regexp **)
828 			lua_touserdata (L, lua_upvalueindex (2));
829 	gboolean stringify = lua_toboolean (L, lua_upvalueindex (3));
830 	gint64 pos = lua_tointeger (L, lua_upvalueindex (4));
831 	gboolean matched;
832 
833 	if (pos < 0) {
834 		return luaL_error (L, "invalid pos: %d", (gint) pos);
835 	}
836 
837 	if (pos >= t->len) {
838 		/* We are done */
839 		return 0;
840 	}
841 
842 	const gchar *start, *end, *old_start;
843 
844 	end = t->start + pos;
845 
846 	for (;;) {
847 		old_start = end;
848 
849 		matched = rspamd_regexp_search (re->re, t->start, t->len, &start, &end, FALSE,
850 				NULL);
851 
852 		if (matched) {
853 			if (start - old_start > 0) {
854 				if (stringify) {
855 					lua_pushlstring (L, old_start, start - old_start);
856 				}
857 				else {
858 					new_t = lua_newuserdata (L, sizeof (*t));
859 					rspamd_lua_setclass (L, "rspamd{text}", -1);
860 					new_t->start = old_start;
861 					new_t->len = start - old_start;
862 					new_t->flags = 0;
863 				}
864 
865 				break;
866 			}
867 			else {
868 				if (start == end) {
869 					matched = FALSE;
870 					break;
871 				}
872 				/*
873 				 * All match separators (e.g. starting separator,
874 				 * we need to skip it). Continue iterations.
875 				 */
876 			}
877 		}
878 		else {
879 			/* No match, stop */
880 			break;
881 		}
882 	}
883 
884 	if (!matched && (t->len > 0 && (end == NULL || end < t->start + t->len))) {
885 		/* No more matches, but we might need to push the last element */
886 		if (end == NULL) {
887 			end = t->start;
888 		}
889 		/* No separators, need to push the whole remaining part */
890 		if (stringify) {
891 			lua_pushlstring (L, end, (t->start + t->len) - end);
892 		}
893 		else {
894 			new_t = lua_newuserdata (L, sizeof (*t));
895 			rspamd_lua_setclass (L, "rspamd{text}", -1);
896 			new_t->start = end;
897 			new_t->len = (t->start + t->len) - end;
898 			new_t->flags = 0;
899 		}
900 
901 		pos = t->len;
902 	}
903 	else {
904 
905 		pos = end - t->start;
906 	}
907 
908 	/* Update pos */
909 	lua_pushinteger (L, pos);
910 	lua_replace (L, lua_upvalueindex (4));
911 
912 	return 1;
913 }
914 
915 static gint
lua_text_split(lua_State * L)916 lua_text_split (lua_State *L)
917 {
918 	LUA_TRACE_POINT;
919 	struct rspamd_lua_text *t = lua_check_text (L, 1);
920 	struct rspamd_lua_regexp *re;
921 	gboolean stringify = FALSE, own_re = FALSE;
922 
923 	if (t == NULL) {
924 		return luaL_error (L, "invalid arguments");
925 	}
926 
927 	if (lua_type (L, 2) == LUA_TUSERDATA) {
928 		re = lua_check_regexp (L, 2);
929 	}
930 	else {
931 		rspamd_regexp_t *c_re;
932 		GError *err = NULL;
933 
934 		c_re = rspamd_regexp_new (lua_tostring (L, 2), NULL, &err);
935 		if (c_re == NULL) {
936 
937 			gint ret = luaL_error (L, "cannot parse regexp: %s, error: %s",
938 					lua_tostring (L, 2),
939 					err == NULL ? "undefined" : err->message);
940 			if (err) {
941 				g_error_free (err);
942 			}
943 
944 			return ret;
945 		}
946 
947 		re = g_malloc0 (sizeof (struct rspamd_lua_regexp));
948 		re->re = c_re;
949 		re->re_pattern = g_strdup (lua_tostring (L, 2));
950 		re->module = rspamd_lua_get_module_name (L);
951 		own_re = TRUE;
952 	}
953 
954 	if (re) {
955 		if (lua_isboolean (L, 3)) {
956 			stringify = lua_toboolean (L, 3);
957 		}
958 
959 		/* Upvalues */
960 		lua_pushvalue (L, 1); /* text */
961 
962 		if (own_re) {
963 			struct rspamd_lua_regexp **pre;
964 			pre = lua_newuserdata (L, sizeof (struct rspamd_lua_regexp *));
965 			rspamd_lua_setclass (L, "rspamd{regexp}", -1);
966 			*pre = re;
967 		}
968 		else {
969 			lua_pushvalue (L, 2); /* regexp */
970 		}
971 
972 		lua_pushboolean (L, stringify);
973 		lua_pushinteger (L, 0); /* Current pos */
974 		lua_pushcclosure (L, rspamd_lua_text_regexp_split, 4);
975 	}
976 	else {
977 		return luaL_error (L, "invalid arguments");
978 	}
979 
980 	return 1;
981 }
982 
983 
984 static gint
lua_text_at(lua_State * L)985 lua_text_at (lua_State *L)
986 {
987 	return lua_text_byte(L);
988 }
989 
990 static gint
lua_text_byte(lua_State * L)991 lua_text_byte (lua_State *L)
992 {
993 	LUA_TRACE_POINT;
994 	struct rspamd_lua_text *t = lua_check_text (L, 1);
995 	if (!t) {
996 		return luaL_error (L, "invalid arguments");
997 	}
998 
999 	gsize start = relative_pos_start (luaL_optinteger (L, 2, 1), t->len);
1000 	gsize end = relative_pos_end (luaL_optinteger (L, 3, start), t->len);
1001 	start--;
1002 
1003 	if (start >= end) {
1004 		return 0;
1005 	}
1006 
1007 	for (gsize i = start; i < end; i++) {
1008 		lua_pushinteger (L, t->start[i]);
1009 	}
1010 	return end - start;
1011 }
1012 
1013 static gint
lua_text_memchr(lua_State * L)1014 lua_text_memchr (lua_State *L)
1015 {
1016 	LUA_TRACE_POINT;
1017 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1018 	int c;
1019 	bool reverse = false;
1020 
1021 	if (lua_isnumber (L, 2)) {
1022 		c = lua_tonumber (L, 2);
1023 	}
1024 	else {
1025 		gsize l;
1026 		const gchar *str = lua_tolstring (L, 2, &l);
1027 
1028 		if (str) {
1029 			c = str[0];
1030 
1031 			if (l != 1) {
1032 				return luaL_error (L, "need exactly one character to search");
1033 			}
1034 		}
1035 		else {
1036 			return luaL_error (L, "invalid arguments");
1037 		}
1038 	}
1039 
1040 	if (t) {
1041 		void *f;
1042 
1043 		if (lua_isboolean (L, 3)) {
1044 			reverse = lua_toboolean (L, 3);
1045 		}
1046 
1047 		if (reverse) {
1048 			f = rspamd_memrchr (t->start, c, t->len);
1049 		}
1050 		else {
1051 			f = memchr (t->start, c, t->len);
1052 		}
1053 
1054 		if (f) {
1055 			lua_pushinteger (L, ((const char *)f) - t->start + 1);
1056 		}
1057 		else {
1058 			lua_pushinteger (L, -1);
1059 		}
1060 	}
1061 	else {
1062 		return luaL_error (L, "invalid arguments");
1063 	}
1064 
1065 	return 1;
1066 }
1067 
1068 static gint
lua_text_bytes(lua_State * L)1069 lua_text_bytes (lua_State *L)
1070 {
1071 	LUA_TRACE_POINT;
1072 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1073 
1074 	if (t) {
1075 		lua_createtable (L, t->len, 0);
1076 
1077 		for (gsize i = 0; i < t->len; i ++) {
1078 			lua_pushinteger (L, (guchar)t->start[i]);
1079 			lua_rawseti (L, -2, i + 1);
1080 		}
1081 	}
1082 	else {
1083 		return luaL_error (L, "invalid arguments");
1084 	}
1085 
1086 	return 1;
1087 }
1088 
1089 static gint
lua_text_save_in_file(lua_State * L)1090 lua_text_save_in_file (lua_State *L)
1091 {
1092 	LUA_TRACE_POINT;
1093 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1094 	const gchar *fname = NULL;
1095 	guint mode = 00644;
1096 	gint fd = -1;
1097 	gboolean need_close = FALSE;
1098 
1099 	if (t != NULL) {
1100 		if (lua_type (L, 2) == LUA_TSTRING) {
1101 			fname = luaL_checkstring (L, 2);
1102 
1103 			if (lua_type (L, 3) == LUA_TNUMBER) {
1104 				mode = lua_tonumber (L, 3);
1105 			}
1106 		}
1107 		else if (lua_type (L, 2) == LUA_TNUMBER) {
1108 			/* Created fd */
1109 			fd = lua_tonumber (L, 2);
1110 		}
1111 
1112 		if (fd == -1) {
1113 			if (fname) {
1114 				fd = rspamd_file_xopen (fname, O_CREAT | O_WRONLY | O_EXCL, mode, 0);
1115 
1116 				if (fd == -1) {
1117 					lua_pushboolean (L, false);
1118 					lua_pushstring (L, strerror (errno));
1119 
1120 					return 2;
1121 				}
1122 				need_close = TRUE;
1123 			}
1124 			else {
1125 				fd = STDOUT_FILENO;
1126 			}
1127 		}
1128 
1129 		if (write (fd, t->start, t->len) == -1) {
1130 			if (fd != STDOUT_FILENO) {
1131 				close (fd);
1132 			}
1133 
1134 			lua_pushboolean (L, false);
1135 			lua_pushstring (L, strerror (errno));
1136 
1137 			return 2;
1138 		}
1139 
1140 		if (need_close) {
1141 			close (fd);
1142 		}
1143 
1144 		lua_pushboolean (L, true);
1145 	}
1146 	else {
1147 		return luaL_error (L, "invalid arguments");
1148 	}
1149 
1150 	return 1;
1151 }
1152 
1153 static gint
lua_text_gc(lua_State * L)1154 lua_text_gc (lua_State *L)
1155 {
1156 	LUA_TRACE_POINT;
1157 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1158 
1159 	if (t != NULL) {
1160 		g_assert (!(t->flags & RSPAMD_TEXT_FLAG_FAKE));
1161 
1162 		if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1163 			if (t->flags & RSPAMD_TEXT_FLAG_WIPE) {
1164 				rspamd_explicit_memzero ((guchar *)t->start, t->len);
1165 			}
1166 
1167 			if (t->flags & RSPAMD_TEXT_FLAG_MMAPED) {
1168 				munmap ((gpointer)t->start, t->len);
1169 			}
1170 			else {
1171 				if (t->flags & RSPAMD_TEXT_FLAG_SYSMALLOC) {
1172 					free ((gpointer) t->start);
1173 				}
1174 				else {
1175 					g_free ((gpointer) t->start);
1176 				}
1177 			}
1178 		}
1179 
1180 	}
1181 
1182 	return 0;
1183 }
1184 
1185 static gint
lua_text_eq(lua_State * L)1186 lua_text_eq (lua_State *L)
1187 {
1188 	LUA_TRACE_POINT;
1189 	struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1190 			*t2 = lua_check_text_or_string (L, 2);
1191 
1192 	if (t1->len == t2->len) {
1193 		lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) == 0);
1194 	}
1195 	else {
1196 		lua_pushboolean (L, false);
1197 	}
1198 
1199 	return 1;
1200 }
1201 
1202 static gint
lua_text_lt(lua_State * L)1203 lua_text_lt (lua_State *L)
1204 {
1205 	LUA_TRACE_POINT;
1206 	struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1207 			*t2 = lua_check_text_or_string (L, 2);
1208 
1209 	if (t1 && t2) {
1210 		if (t1->len == t2->len) {
1211 			lua_pushboolean (L, memcmp (t1->start, t2->start, t1->len) < 0);
1212 		}
1213 		else {
1214 			lua_pushboolean (L, t1->len < t2->len);
1215 		}
1216 	}
1217 
1218 	return 1;
1219 }
1220 
1221 static gint
lua_text_concat(lua_State * L)1222 lua_text_concat (lua_State *L)
1223 {
1224 	LUA_TRACE_POINT;
1225 	struct rspamd_lua_text *t1 = lua_check_text_or_string (L, 1),
1226 			*t2 = lua_check_text_or_string (L, 2);
1227 
1228 	if (t1 && t2) {
1229 		struct rspamd_lua_text *final;
1230 
1231 		final = lua_new_text (L, NULL, t1->len + t2->len, TRUE);
1232 		memcpy ((void *)final->start, t1->start, t1->len);
1233 		memcpy ((void *)(final->start + t1->len), t2->start, t2->len);
1234 	}
1235 
1236 	return 1;
1237 }
1238 
1239 static gint
lua_text_wipe(lua_State * L)1240 lua_text_wipe (lua_State *L)
1241 {
1242 	LUA_TRACE_POINT;
1243 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1244 
1245 	if (t != NULL) {
1246 		if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1247 			rspamd_explicit_memzero ((guchar *)t->start, t->len);
1248 		}
1249 		else {
1250 			return luaL_error (L, "cannot wipe not owned text");
1251 		}
1252 
1253 	}
1254 	else {
1255 		return luaL_error (L, "invalid arguments");
1256 	}
1257 
1258 	return 0;
1259 }
1260 
1261 static gint
lua_text_base32(lua_State * L)1262 lua_text_base32 (lua_State *L)
1263 {
1264 	LUA_TRACE_POINT;
1265 	struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1266 	enum rspamd_base32_type btype = RSPAMD_BASE32_DEFAULT;
1267 
1268 	if (t != NULL) {
1269 		if (lua_type (L, 2) == LUA_TSTRING) {
1270 			btype = rspamd_base32_decode_type_from_str (lua_tostring (L, 2));
1271 
1272 			if (btype == RSPAMD_BASE32_INVALID) {
1273 				return luaL_error (L, "invalid b32 type: %s", lua_tostring (L, 2));
1274 			}
1275 		}
1276 
1277 		out = lua_new_text (L, NULL, t->len * 8 / 5 + 2, TRUE);
1278 		out->len = rspamd_encode_base32_buf (t->start, t->len, (gchar *)out->start,
1279 				out->len, btype);
1280 	}
1281 	else {
1282 		return luaL_error (L, "invalid arguments");
1283 	}
1284 
1285 	return 1;
1286 }
1287 
1288 static gint
lua_text_base64(lua_State * L)1289 lua_text_base64 (lua_State *L)
1290 {
1291 	LUA_TRACE_POINT;
1292 	struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1293 	gsize line_len = 0;
1294 	gboolean fold = FALSE;
1295 
1296 	if (t != NULL) {
1297 		if (lua_type (L, 2) == LUA_TNUMBER) {
1298 			line_len = lua_tointeger (L, 2);
1299 
1300 			if (line_len <= 8) {
1301 				return luaL_error (L, "too small line length (at least 8 is required)");
1302 			}
1303 		}
1304 
1305 		enum rspamd_newlines_type how = RSPAMD_TASK_NEWLINES_CRLF;
1306 
1307 		if (lua_type (L, 3) == LUA_TSTRING) {
1308 			const gchar *how_str = lua_tostring (L, 3);
1309 
1310 			if (g_ascii_strcasecmp (how_str, "cr") == 0) {
1311 				how = RSPAMD_TASK_NEWLINES_CR;
1312 			}
1313 			else if (g_ascii_strcasecmp (how_str, "lf") == 0) {
1314 				how = RSPAMD_TASK_NEWLINES_LF;
1315 			}
1316 			else if (g_ascii_strcasecmp (how_str, "crlf") != 0) {
1317 				return luaL_error (L, "invalid newline style: %s", how_str);
1318 			}
1319 		}
1320 
1321 		if (lua_type (L, 4) == LUA_TBOOLEAN) {
1322 			fold = lua_toboolean (L, 4);
1323 		}
1324 
1325 		gsize sz_len;
1326 
1327 		out = lua_newuserdata (L, sizeof (*t));
1328 		out->flags = RSPAMD_TEXT_FLAG_OWN;
1329 		out->start = rspamd_encode_base64_common (t->start, t->len,
1330 				line_len, &sz_len, fold, how);
1331 		out->len = sz_len;
1332 		rspamd_lua_setclass (L, "rspamd{text}", -1);
1333 	}
1334 	else {
1335 		return luaL_error (L, "invalid arguments");
1336 	}
1337 
1338 	return 1;
1339 }
1340 
1341 static gint
lua_text_hex(lua_State * L)1342 lua_text_hex (lua_State *L)
1343 {
1344 	LUA_TRACE_POINT;
1345 	struct rspamd_lua_text *t = lua_check_text (L, 1), *out;
1346 
1347 	if (t != NULL) {
1348 
1349 		out = lua_new_text (L, NULL, t->len * 2, TRUE);
1350 		out->len = rspamd_encode_hex_buf (t->start, t->len, (gchar *)out->start,
1351 				out->len);
1352 	}
1353 	else {
1354 		return luaL_error (L, "invalid arguments");
1355 	}
1356 
1357 	return 1;
1358 }
1359 
1360 static gint
lua_text_find(lua_State * L)1361 lua_text_find (lua_State *L)
1362 {
1363 	LUA_TRACE_POINT;
1364 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1365 	gsize patlen, init = 1;
1366 	const gchar *pat = luaL_checklstring (L, 2, &patlen);
1367 
1368 	if (t != NULL && pat != NULL) {
1369 
1370 		if (lua_isnumber (L, 3)) {
1371 			init = relative_pos_start (lua_tointeger (L, 3), t->len);
1372 		}
1373 
1374 		init --;
1375 
1376 		if (init > t->len) {
1377 			return luaL_error (L, "invalid arguments to find: init too large");
1378 		}
1379 
1380 		goffset pos = rspamd_substring_search (t->start + init,
1381 				t->len - init,
1382 				pat, patlen);
1383 
1384 		if (pos == -1) {
1385 			lua_pushnil (L);
1386 
1387 			return 1;
1388 		}
1389 
1390 		lua_pushinteger (L, pos + 1);
1391 		lua_pushinteger (L, pos + patlen);
1392 	}
1393 	else {
1394 		return luaL_error (L, "invalid arguments");
1395 	}
1396 
1397 	return 2;
1398 }
1399 
1400 #define BITOP(a,b,op) \
1401 		((a)[(guint64)(b)/(8u*sizeof *(a))] op (guint64)1<<((guint64)(b)%(8u*sizeof *(a))))
1402 
1403 static gint
lua_text_exclude_chars(lua_State * L)1404 lua_text_exclude_chars (lua_State *L)
1405 {
1406 	LUA_TRACE_POINT;
1407 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1408 	gssize patlen;
1409 	const gchar *pat = lua_tolstring (L, 2, &patlen), *p, *end;
1410 	gchar *dest, *d;
1411 	guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
1412 	gboolean copy = TRUE;
1413 	guint *plen;
1414 
1415 	if (t != NULL && pat && patlen > 0) {
1416 		if (lua_isboolean (L, 3)) {
1417 			copy = lua_toboolean (L, 3);
1418 		}
1419 		else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1420 			copy = FALSE;
1421 		}
1422 
1423 		if (!copy) {
1424 			dest = (gchar *)t->start;
1425 			plen = &t->len;
1426 			lua_pushvalue (L, 1); /* Push text as a result */
1427 		}
1428 		else {
1429 			/* We need to copy read only text */
1430 			struct rspamd_lua_text *nt;
1431 
1432 			dest = g_malloc (t->len);
1433 			nt = lua_newuserdata (L, sizeof (*nt));
1434 			rspamd_lua_setclass (L, "rspamd{text}", -1);
1435 			nt->len = t->len;
1436 			nt->flags = RSPAMD_TEXT_FLAG_OWN;
1437 			memcpy (dest, t->start, t->len);
1438 			nt->start = dest;
1439 			plen = &nt->len;
1440 		}
1441 
1442 		/* Fill pattern bitset */
1443 		memset (byteset, 0, sizeof byteset);
1444 
1445 		while (patlen > 0) {
1446 			if (*pat == '%') {
1447 				pat ++;
1448 				patlen --;
1449 
1450 				if (patlen > 0) {
1451 					/*
1452 					 * This stuff assumes little endian, but GUINT64_FROM_LE should
1453 					 * deal with proper conversion
1454 					 */
1455 					switch (*pat) {
1456 					case '%':
1457 						BITOP (byteset, *(guchar *) pat, |=);
1458 						break;
1459 					case 's':
1460 						/* "\r\n\t\f " */
1461 						byteset[0] |= GUINT64_FROM_LE(0x100003600LLU);
1462 						break;
1463 					case 'n':
1464 						/* newlines: "\r\n" */
1465 						byteset[0] |= GUINT64_FROM_LE (0x2400LLU);
1466 						break;
1467 					case '8':
1468 						/* 8 bit characters */
1469 						byteset[2] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1470 						byteset[3] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1471 						break;
1472 					case 'c':
1473 						/* Non printable (control) characters */
1474 						byteset[0] |= GUINT64_FROM_LE (0xffffffffLLU);
1475 						/* Del character */
1476 						byteset[1] |= GUINT64_FROM_LE (0x8000000000000000LLU);
1477 						break;
1478 					}
1479 				}
1480 				else {
1481 					/* Last '%' */
1482 					BITOP (byteset, (guchar)'%', |=);
1483 				}
1484 			}
1485 			else {
1486 				BITOP (byteset, *(guchar *)pat, |=);
1487 			}
1488 
1489 			pat ++;
1490 			patlen --;
1491 		}
1492 		for (; patlen > 0 && BITOP (byteset, *(guchar *)pat, |=); pat++, patlen --);
1493 
1494 		p = t->start;
1495 		end = t->start + t->len;
1496 		d = dest;
1497 
1498 		while (p < end) {
1499 			if (!BITOP (byteset, *(guchar *)p, &)) {
1500 				*d++ = *p;
1501 			}
1502 
1503 			p ++;
1504 		}
1505 
1506 		*(plen) = d - dest;
1507 	}
1508 	else {
1509 		return luaL_error (L, "invalid arguments");
1510 	}
1511 
1512 	return 1;
1513 }
1514 
1515 static gint
lua_text_oneline(lua_State * L)1516 lua_text_oneline (lua_State *L)
1517 {
1518 	LUA_TRACE_POINT;
1519 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1520 	const gchar *p, *end;
1521 	gchar *dest, *d;
1522 	guint64 byteset[32 / sizeof(guint64)]; /* Bitset for ascii */
1523 	gboolean copy = TRUE, seen_8bit = FALSE;
1524 	guint *plen;
1525 
1526 	if (t != NULL) {
1527 		if (lua_isboolean (L, 2)) {
1528 			copy = lua_toboolean (L, 2);
1529 		}
1530 		else if (t->flags & RSPAMD_TEXT_FLAG_OWN) {
1531 			copy = FALSE;
1532 		}
1533 
1534 		if (!copy) {
1535 			dest = (gchar *)t->start;
1536 			plen = &t->len;
1537 			lua_pushvalue (L, 1); /* Push text as a result */
1538 		}
1539 		else {
1540 			/* We need to copy read only text */
1541 			struct rspamd_lua_text *nt;
1542 
1543 			dest = g_malloc (t->len);
1544 			nt = lua_newuserdata (L, sizeof (*nt));
1545 			rspamd_lua_setclass (L, "rspamd{text}", -1);
1546 			nt->len = t->len;
1547 			nt->flags = RSPAMD_TEXT_FLAG_OWN;
1548 			memcpy (dest, t->start, t->len);
1549 			nt->start = dest;
1550 			plen = &nt->len;
1551 		}
1552 
1553 		/* Fill pattern bitset */
1554 		memset (byteset, 0, sizeof byteset);
1555 		/* All spaces */
1556 		byteset[0] |= GUINT64_FROM_LE (0x100003600LLU);
1557 		/* Control characters */
1558 		byteset[0] |= GUINT64_FROM_LE (0xffffffffLLU);
1559 		/* Del character */
1560 		byteset[1] |= GUINT64_FROM_LE (0x8000000000000000LLU);
1561 		/* 8 bit characters */
1562 		byteset[2] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1563 		byteset[3] |= GUINT64_FROM_LE (0xffffffffffffffffLLU);
1564 
1565 		p = t->start;
1566 		end = t->start + t->len;
1567 		d = dest;
1568 
1569 		while (p < end) {
1570 			if (!BITOP (byteset, *(guchar *)p, &)) {
1571 				*d++ = *p;
1572 			}
1573 			else {
1574 				if ((*(guchar *)p) & 0x80) {
1575 					seen_8bit = TRUE;
1576 					*d++ = *p;
1577 				}
1578 				else {
1579 					if (*p == ' ') {
1580 						if (d != dest) {
1581 							*d++ = *p++;
1582 						}
1583 
1584 						while (p < end && g_ascii_isspace (*p)) {
1585 							p ++;
1586 						}
1587 
1588 						continue; /* To avoid p++ */
1589 					}
1590 					else if (*p == '\r' || *p == '\n') {
1591 						if (d != dest) {
1592 							*d++ = ' ';
1593 							p ++;
1594 						}
1595 
1596 						while (p < end && g_ascii_isspace (*p)) {
1597 							p ++;
1598 						}
1599 
1600 						continue; /* To avoid p++ */
1601 					}
1602 				}
1603 			}
1604 
1605 			p ++;
1606 		}
1607 
1608 		while (d > dest && g_ascii_isspace (*(d - 1))) {
1609 			d --;
1610 		}
1611 
1612 		if (seen_8bit) {
1613 			if (rspamd_fast_utf8_validate (dest, d - dest) != 0) {
1614 				/* Need to make it valid :( */
1615 				UChar32 uc;
1616 				goffset err_offset;
1617 				gsize remain = d - dest;
1618 				gchar *nd = dest;
1619 
1620 				while (remain > 0 && (err_offset = rspamd_fast_utf8_validate (nd, remain)) > 0) {
1621 					gint i = 0;
1622 
1623 					err_offset --; /* As it returns it 1 indexed */
1624 					nd += err_offset;
1625 					remain -= err_offset;
1626 
1627 					/* Each invalid character of input requires 3 bytes of output (+2 bytes) */
1628 					while (i < remain) {
1629 						gint old_pos = i;
1630 						U8_NEXT (nd, i, remain, uc);
1631 
1632 						if (uc < 0) {
1633 							nd[old_pos] = '?';
1634 						}
1635 						else {
1636 							break;
1637 						}
1638 					}
1639 
1640 					nd += i;
1641 					remain -= i;
1642 				}
1643 			}
1644 		}
1645 
1646 		*(plen) = d - dest;
1647 	}
1648 	else {
1649 		return luaL_error (L, "invalid arguments");
1650 	}
1651 
1652 	return 1;
1653 }
1654 
1655 static gint
lua_text_lower(lua_State * L)1656 lua_text_lower (lua_State *L)
1657 {
1658 	LUA_TRACE_POINT;
1659 	struct rspamd_lua_text *t = lua_check_text (L, 1), *nt;
1660 	gboolean is_utf8 = FALSE, is_inplace = FALSE;
1661 
1662 	if (t != NULL) {
1663 		if (lua_isboolean (L, 2)) {
1664 			is_utf8 = lua_toboolean (L, 2);
1665 		}
1666 		if (lua_isboolean (L, 3)) {
1667 			is_inplace = lua_toboolean (L, 3);
1668 		}
1669 
1670 		if (is_inplace) {
1671 			nt = t;
1672 			lua_pushvalue (L, 1);
1673 		}
1674 		else {
1675 			nt = lua_new_text (L, t->start, t->len, TRUE);
1676 		}
1677 
1678 		if (!is_utf8) {
1679 			rspamd_str_lc ((gchar *) nt->start, nt->len);
1680 		}
1681 		else {
1682 			rspamd_str_lc_utf8 ((gchar *) nt->start, nt->len);
1683 		}
1684 	}
1685 	else {
1686 		return luaL_error (L, "invalid arguments");
1687 	}
1688 
1689 	return 1;
1690 }
1691 
1692 static gint
lua_text_strtoul(lua_State * L)1693 lua_text_strtoul (lua_State *L)
1694 {
1695 	LUA_TRACE_POINT;
1696 	struct rspamd_lua_text *t = lua_check_text (L, 1);
1697 
1698 	if (t) {
1699 		unsigned long ll;
1700 
1701 		if (rspamd_strtoul (t->start, t->len, &ll)) {
1702 			lua_pushinteger (L, ll);
1703 		}
1704 		else {
1705 			lua_pushnil (L);
1706 		}
1707 	}
1708 	else {
1709 		return luaL_error (L, "invalid arguments");
1710 	}
1711 
1712 	return 1;
1713 }
1714 
1715 /* Used to distinguish lua text metatable */
1716 static const guint rspamd_lua_text_cookie = 0x2b21ef6fU;
1717 
1718 static gint
lua_load_text(lua_State * L)1719 lua_load_text (lua_State * L)
1720 {
1721 	lua_newtable (L);
1722 	lua_pushstring (L, "cookie");
1723 	lua_pushnumber (L, rspamd_lua_text_cookie);
1724 	lua_settable (L, -3);
1725 	luaL_register (L, NULL, textlib_f);
1726 
1727 	return 1;
1728 }
1729 
1730 void
luaopen_text(lua_State * L)1731 luaopen_text (lua_State *L)
1732 {
1733 	rspamd_lua_new_class (L, "rspamd{text}", textlib_m);
1734 	lua_pushstring (L, "cookie");
1735 	lua_pushnumber (L, rspamd_lua_text_cookie);
1736 	lua_settable (L, -3);
1737 	lua_pop (L, 1);
1738 
1739 	rspamd_lua_add_preload (L, "rspamd_text", lua_load_text);
1740 }
1741