1 /*-
2  * Copyright 2016 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "lua_common.h"
18 #include "lua_url.h"
19 #include "libmime/message.h"
20 #include "libmime/lang_detection.h"
21 #include "libstat/stat_api.h"
22 #include "libcryptobox/cryptobox.h"
23 #include "libutil/shingles.h"
24 
25 #include "contrib/uthash/utlist.h"
26 
27 /* Textpart methods */
28 /***
29  * @module rspamd_textpart
30  * This module provides different methods to manipulate text parts data. Text parts
31  * could be obtained from the `rspamd_task` by using of method `task:get_text_parts()`
32 @example
33 rspamd_config.R_EMPTY_IMAGE = function (task)
34 	parts = task:get_text_parts()
35 	if parts then
36 		for _,part in ipairs(parts) do
37 			if part:is_empty() then
38 				texts = task:get_texts()
39 				if texts then
40 					return true
41 				end
42 				return false
43 			end
44 		end
45 	end
46 	return false
47 end
48  */
49 
50 /***
51  * @method text_part:is_utf()
52  * Return TRUE if part is a valid utf text
53  * @return {boolean} true if part is valid `UTF8` part
54  */
55 LUA_FUNCTION_DEF (textpart, is_utf);
56 
57 /***
58  * @method text_part:has_8bit_raw()
59  * Return TRUE if a part has raw 8bit characters
60  * @return {boolean} true if a part has raw 8bit characters
61  */
62 LUA_FUNCTION_DEF (textpart, has_8bit_raw);
63 
64 /***
65  * @method text_part:has_8bit()
66  * Return TRUE if a part has raw 8bit characters
67  * @return {boolean} true if a part has encoded 8bit characters
68  */
69 LUA_FUNCTION_DEF (textpart, has_8bit);
70 
71 /***
72  * @method text_part:get_content([type])
73  * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
74  * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
75  * - `content_oneline`: utf8 content with HTML tags and newlines stripped
76  * - `raw`: raw content, not mime decoded nor utf8 converted
77  * - `raw_parsed`: raw content, mime decoded, not utf8 converted
78  * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
79  * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
80  */
81 LUA_FUNCTION_DEF (textpart, get_content);
82 /***
83  * @method text_part:get_raw_content()
84  * Get the original text of the part
85  * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
86  */
87 LUA_FUNCTION_DEF (textpart, get_raw_content);
88 /***
89  * @method text_part:get_content_oneline()
90  *Get the text of the part (html tags and newlines stripped)
91  * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
92  */
93 LUA_FUNCTION_DEF (textpart, get_content_oneline);
94 /***
95  * @method text_part:get_length()
96  * Get length of the text of the part
97  * @return {integer} length of part in **bytes**
98  */
99 LUA_FUNCTION_DEF (textpart, get_length);
100 /***
101  * @method mime_part:get_raw_length()
102  * Get length of the **raw** content of the part (e.g. HTML with tags unstripped)
103  * @return {integer} length of part in **bytes**
104  */
105 LUA_FUNCTION_DEF (textpart, get_raw_length);
106 /***
107  * @method mime_part:get_urls_length()
108  * Get length of the urls within the part
109  * @return {integer} length of urls in **bytes**
110  */
111 LUA_FUNCTION_DEF (textpart, get_urls_length);
112 /***
113  * @method mime_part:get_lines_count()
114  * Get lines number in the part
115  * @return {integer} number of lines in the part
116  */
117 LUA_FUNCTION_DEF (textpart, get_lines_count);
118 /***
119  * @method mime_part:get_stats()
120  * Returns a table with the following data:
121  * - `lines`: number of lines
122  * - `spaces`: number of spaces
123  * - `double_spaces`: double spaces
124  * - `empty_lines`: number of empty lines
125  * - `non_ascii_characters`: number of non ascii characters
126  * - `ascii_characters`: number of ascii characters
127  * @return {table} table of stats
128  */
129 LUA_FUNCTION_DEF (textpart, get_stats);
130 /***
131  * @method mime_part:get_words_count()
132  * Get words number in the part
133  * @return {integer} number of words in the part
134  */
135 LUA_FUNCTION_DEF (textpart, get_words_count);
136 
137 /***
138  * @method mime_part:get_words([how])
139  * Get words in the part. Optional `how` argument defines type of words returned:
140  * - `stem`: stemmed words (default)
141  * - `norm`: normalised words (utf normalised + lowercased)
142  * - `raw`: raw words in utf (if possible)
143  * - `full`: list of tables, each table has the following fields:
144  *   - [1] - stemmed word
145  *   - [2] - normalised word
146  *   - [3] - raw word
147  *   - [4] - flags (table of strings)
148  * @return {table/strings} words in the part
149  */
150 LUA_FUNCTION_DEF (textpart, get_words);
151 
152 /***
153  * @method mime_part:filter_words(regexp, [how][, max]])
154  * Filter words using some regexp:
155  * - `stem`: stemmed words (default)
156  * - `norm`: normalised words (utf normalised + lowercased)
157  * - `raw`: raw words in utf (if possible)
158  * - `full`: list of tables, each table has the following fields:
159  *   - [1] - stemmed word
160  *   - [2] - normalised word
161  *   - [3] - raw word
162  *   - [4] - flags (table of strings)
163  * @param {rspamd_regexp} regexp regexp to match
164  * @param {string} how what words to extract
165  * @param {number} max maximum number of hits returned (all hits if <= 0 or nil)
166  * @return {table/strings} words matching regexp
167  */
168 LUA_FUNCTION_DEF (textpart, filter_words);
169 
170 /***
171  * @method text_part:is_empty()
172  * Returns `true` if the specified part is empty
173  * @return {bool} whether a part is empty
174  */
175 LUA_FUNCTION_DEF (textpart, is_empty);
176 /***
177  * @method text_part:is_html()
178  * Returns `true` if the specified part has HTML content
179  * @return {bool} whether a part is HTML part
180  */
181 LUA_FUNCTION_DEF (textpart, is_html);
182 /***
183  * @method text_part:get_html()
184  * Returns html content of the specified part
185  * @return {html} html content
186  */
187 LUA_FUNCTION_DEF (textpart, get_html);
188 /***
189  * @method text_part:get_language()
190  * Returns the code of the most used unicode script in the text part. Does not work with raw parts
191  * @return {string} short abbreviation (such as `ru`) for the script's language
192  */
193 LUA_FUNCTION_DEF (textpart, get_language);
194 
195 /***
196  * @method text_part:get_charset()
197  * Returns part real charset
198  * @return {string} charset of the part
199  */
200 LUA_FUNCTION_DEF (textpart, get_charset);
201 /***
202  * @method text_part:get_languages()
203  * Returns array of tables of all languages detected for a part:
204  * - 'code': language code (short string)
205  * - 'prob': logarithm of probability
206  * @return {array|tables} all languages detected for the part
207  */
208 LUA_FUNCTION_DEF (textpart, get_languages);
209 /***
210  * @method text_part:get_fuzzy_hashes(mempool)
211  * @param {rspamd_mempool} mempool - memory pool (usually task pool)
212  * Returns direct hash of textpart as a string and array [1..32] of shingles each represented as a following table:
213  * - [1] - 64 bit fuzzy hash represented as a string
214  * - [2..4] - strings used to generate this hash
215  * @return {string,array|tables} fuzzy hashes calculated
216  */
217 LUA_FUNCTION_DEF (textpart, get_fuzzy_hashes);
218 /***
219  * @method text_part:get_mimepart()
220  * Returns the mime part object corresponding to this text part
221  * @return {mimepart} mimepart object
222  */
223 LUA_FUNCTION_DEF (textpart, get_mimepart);
224 
225 static const struct luaL_reg textpartlib_m[] = {
226 	LUA_INTERFACE_DEF (textpart, is_utf),
227 	LUA_INTERFACE_DEF (textpart, has_8bit_raw),
228 	LUA_INTERFACE_DEF (textpart, has_8bit),
229 	LUA_INTERFACE_DEF (textpart, get_content),
230 	LUA_INTERFACE_DEF (textpart, get_raw_content),
231 	LUA_INTERFACE_DEF (textpart, get_content_oneline),
232 	LUA_INTERFACE_DEF (textpart, get_length),
233 	LUA_INTERFACE_DEF (textpart, get_raw_length),
234 	LUA_INTERFACE_DEF (textpart, get_urls_length),
235 	LUA_INTERFACE_DEF (textpart, get_lines_count),
236 	LUA_INTERFACE_DEF (textpart, get_words_count),
237 	LUA_INTERFACE_DEF (textpart, get_words),
238 	LUA_INTERFACE_DEF (textpart, filter_words),
239 	LUA_INTERFACE_DEF (textpart, is_empty),
240 	LUA_INTERFACE_DEF (textpart, is_html),
241 	LUA_INTERFACE_DEF (textpart, get_html),
242 	LUA_INTERFACE_DEF (textpart, get_language),
243 	LUA_INTERFACE_DEF (textpart, get_charset),
244 	LUA_INTERFACE_DEF (textpart, get_languages),
245 	LUA_INTERFACE_DEF (textpart, get_mimepart),
246 	LUA_INTERFACE_DEF (textpart, get_stats),
247 	LUA_INTERFACE_DEF (textpart, get_fuzzy_hashes),
248 	{"__tostring", rspamd_lua_class_tostring},
249 	{NULL, NULL}
250 };
251 
252 /* Mimepart methods */
253 
254 /***
255  * @module rspamd_mimepart
256  * This module provides access to mime parts found in a message
257 @example
258 rspamd_config.MISSING_CONTENT_TYPE = function(task)
259 	local parts = task:get_parts()
260 	if parts and #parts > 1 then
261 		-- We have more than one part
262 		for _,p in ipairs(parts) do
263 			local ct = p:get_header('Content-Type')
264 			-- And some parts have no Content-Type header
265 			if not ct then
266 				return true
267 			end
268 		end
269 	end
270 	return false
271 end
272  */
273 
274 /***
275  * @method mime_part:get_header(name[, case_sensitive])
276  * Get decoded value of a header specified with optional case_sensitive flag.
277  * By default headers are searched in caseless matter.
278  * @param {string} name name of header to get
279  * @param {boolean} case_sensitive case sensitiveness flag to search for a header
280  * @return {string} decoded value of a header
281  */
282 LUA_FUNCTION_DEF (mimepart, get_header);
283 /***
284  * @method mime_part:get_header_raw(name[, case_sensitive])
285  * Get raw value of a header specified with optional case_sensitive flag.
286  * By default headers are searched in caseless matter.
287  * @param {string} name name of header to get
288  * @param {boolean} case_sensitive case sensitiveness flag to search for a header
289  * @return {string} raw value of a header
290  */
291 LUA_FUNCTION_DEF (mimepart, get_header_raw);
292 /***
293  * @method mime_part:get_header_full(name[, case_sensitive])
294  * Get raw value of a header specified with optional case_sensitive flag.
295  * By default headers are searched in caseless matter. This method returns more
296  * information about the header as a list of tables with the following structure:
297  *
298  * - `name` - name of a header
299  * - `value` - raw value of a header
300  * - `decoded` - decoded value of a header
301  * - `tab_separated` - `true` if a header and a value are separated by `tab` character
302  * - `empty_separator` - `true` if there are no separator between a header and a value
303  * @param {string} name name of header to get
304  * @param {boolean} case_sensitive case sensitiveness flag to search for a header
305  * @return {list of tables} all values of a header as specified above
306 @example
307 function check_header_delimiter_tab(task, header_name)
308 	for _,rh in ipairs(task:get_header_full(header_name)) do
309 		if rh['tab_separated'] then return true end
310 	end
311 	return false
312 end
313  */
314 LUA_FUNCTION_DEF (mimepart, get_header_full);
315 /***
316  * @method mimepart:get_header_count(name[, case_sensitive])
317  * Lightweight version if you need just a header's count
318  *  * By default headers are searched in caseless matter.
319  * @param {string} name name of header to get
320  * @param {boolean} case_sensitive case sensitiveness flag to search for a header
321  * @return {number} number of header's occurrencies or 0 if not found
322  */
323 LUA_FUNCTION_DEF (mimepart, get_header_count);
324 
325 /***
326  * @method mimepart:get_raw_headers()
327  * Get all undecoded headers of a mime part as a string
328  * @return {rspamd_text} all raw headers for a message as opaque text
329  */
330 LUA_FUNCTION_DEF (mimepart, get_raw_headers);
331 
332 /***
333  * @method mimepart:get_headers()
334  * Get all undecoded headers of a mime part as a string
335  * @return {rspamd_text} all raw headers for a message as opaque text
336  */
337 LUA_FUNCTION_DEF (mimepart, get_headers);
338 
339 /***
340  * @method mime_part:get_content()
341  * Get the parsed content of part
342  * @return {text} opaque text object (zero-copy if not casted to lua string)
343  */
344 LUA_FUNCTION_DEF (mimepart, get_content);
345 /***
346  * @method mime_part:get_raw_content()
347  * Get the raw content of part
348  * @return {text} opaque text object (zero-copy if not casted to lua string)
349  */
350 LUA_FUNCTION_DEF (mimepart, get_raw_content);
351 /***
352  * @method mime_part:get_length()
353  * Get length of the content of the part
354  * @return {integer} length of part in **bytes**
355  */
356 LUA_FUNCTION_DEF (mimepart, get_length);
357 /***
358  * @method mime_part:get_type()
359  * Extract content-type string of the mime part
360  * @return {string,string} content type in form 'type','subtype'
361  */
362 LUA_FUNCTION_DEF (mimepart, get_type);
363 
364 /***
365  * @method mime_part:get_type_full()
366  * Extract content-type string of the mime part with all attributes
367  * @return {string,string,table} content type in form 'type','subtype', {attrs}
368  */
369 LUA_FUNCTION_DEF (mimepart, get_type_full);
370 
371 /***
372  * @method mime_part:get_detected_type()
373  * Extract content-type string of the mime part. Use lua_magic detection
374  * @return {string,string} content type in form 'type','subtype'
375  */
376 LUA_FUNCTION_DEF (mimepart, get_detected_type);
377 
378 /***
379  * @method mime_part:get_detected_type_full()
380  * Extract content-type string of the mime part with all attributes. Use lua_magic detection
381  * @return {string,string,table} content type in form 'type','subtype', {attrs}
382  */
383 LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
384 
385 /***
386  * @method mime_part:get_detected_ext()
387  * Returns a msdos extension name according to lua_magic detection
388  * @return {string} detected extension (see lua_magic.types)
389  */
390 LUA_FUNCTION_DEF (mimepart, get_detected_ext);
391 
392 /***
393  * @method mime_part:get_cte()
394  * Extract content-transfer-encoding for a part
395  * @return {string} content transfer encoding (e.g. `base64` or `7bit`)
396  */
397 LUA_FUNCTION_DEF (mimepart, get_cte);
398 
399 /***
400  * @method mime_part:get_filename()
401  * Extract filename associated with mime part if it is an attachment
402  * @return {string} filename or `nil` if no file is associated with this part
403  */
404 LUA_FUNCTION_DEF (mimepart, get_filename);
405 /***
406  * @method mime_part:is_image()
407  * Returns true if mime part is an image
408  * @return {bool} true if a part is an image
409  */
410 LUA_FUNCTION_DEF (mimepart, is_image);
411 /***
412  * @method mime_part:get_image()
413  * Returns rspamd_image structure associated with this part. This structure has
414  * the following methods:
415  *
416  * * `get_width` - return width of an image in pixels
417  * * `get_height` - return height of an image in pixels
418  * * `get_type` - return string representation of image's type (e.g. 'jpeg')
419  * * `get_filename` - return string with image's file name
420  * * `get_size` - return size in bytes
421  * @return {rspamd_image} image structure or nil if a part is not an image
422  */
423 LUA_FUNCTION_DEF (mimepart, get_image);
424 /***
425  * @method mime_part:is_archive()
426  * Returns true if mime part is an archive
427  * @return {bool} true if a part is an archive
428  */
429 LUA_FUNCTION_DEF (mimepart, is_archive);
430 /***
431  * @method mime_part:is_attachment()
432  * Returns true if mime part looks like an attachment
433  * @return {bool} true if a part looks like an attachment
434  */
435 LUA_FUNCTION_DEF (mimepart, is_attachment);
436 
437 /***
438  * @method mime_part:get_archive()
439  * Returns rspamd_archive structure associated with this part. This structure has
440  * the following methods:
441  *
442  * * `get_files` - return list of strings with filenames inside archive
443  * * `get_files_full` - return list of tables with all information about files
444  * * `is_encrypted` - return true if an archive is encrypted
445  * * `get_type` - return string representation of image's type (e.g. 'zip')
446  * * `get_filename` - return string with archive's file name
447  * * `get_size` - return size in bytes
448  * @return {rspamd_archive} archive structure or nil if a part is not an archive
449  */
450 LUA_FUNCTION_DEF (mimepart, get_archive);
451 /***
452  * @method mime_part:is_multipart()
453  * Returns true if mime part is a multipart part
454  * @return {bool} true if a part is is a multipart part
455  */
456 LUA_FUNCTION_DEF (mimepart, is_multipart);
457 /***
458  * @method mime_part:is_message()
459  * Returns true if mime part is a message part (message/rfc822)
460  * @return {bool} true if a part is is a message part
461  */
462 LUA_FUNCTION_DEF (mimepart, is_message);
463 /***
464  * @method mime_part:get_boundary()
465  * Returns boundary for a part (extracted from parent multipart for normal parts and
466  * from the part itself for multipart)
467  * @return {string} boundary value or nil
468  */
469 LUA_FUNCTION_DEF (mimepart, get_boundary);
470 
471 /***
472  * @method mime_part:get_enclosing_boundary()
473  * Returns an enclosing boundary for a part even for multiparts. For normal parts
474  * this method is identical to `get_boundary`
475  * @return {string} boundary value or nil
476  */
477 LUA_FUNCTION_DEF (mimepart, get_enclosing_boundary);
478 
479 /***
480  * @method mime_part:get_children()
481  * Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart
482  * or a message part.
483  * @return {rspamd_mimepart} table of children
484  */
485 LUA_FUNCTION_DEF (mimepart, get_children);
486 /***
487  * @method mime_part:is_text()
488  * Returns true if mime part is a text part
489  * @return {bool} true if a part is a text part
490  */
491 LUA_FUNCTION_DEF (mimepart, is_text);
492 /***
493  * @method mime_part:get_text()
494  * Returns rspamd_textpart structure associated with this part.
495  * @return {rspamd_textpart} textpart structure or nil if a part is not an text
496  */
497 LUA_FUNCTION_DEF (mimepart, get_text);
498 
499 /***
500  * @method mime_part:get_digest()
501  * Returns the unique digest for this mime part
502  * @return {string} 128 characters hex string with digest of the part
503  */
504 LUA_FUNCTION_DEF (mimepart, get_digest);
505 
506 /***
507  * @method mime_part:get_id()
508  * Returns the order of the part in parts list
509  * @return {number} index of the part (starting from 1 as it is Lua API)
510  */
511 LUA_FUNCTION_DEF (mimepart, get_id);
512 /***
513  * @method mime_part:is_broken()
514  * Returns true if mime part has incorrectly specified content type
515  * @return {bool} true if a part has bad content type
516  */
517 LUA_FUNCTION_DEF (mimepart, is_broken);
518 /***
519  * @method mime_part:headers_foreach(callback, [params])
520  * This method calls `callback` for each header that satisfies some condition.
521  * By default, all headers are iterated unless `callback` returns `true`. Nil or
522  * false means continue of iterations.
523  * Params could be as following:
524  *
525  * - `full`: header value is full table of all attributes @see task:get_header_full for details
526  * - `regexp`: return headers that satisfies the specified regexp
527  * @param {function} callback function from header name and header value
528  * @param {table} params optional parameters
529  */
530 LUA_FUNCTION_DEF (mimepart, headers_foreach);
531 /***
532  * @method mime_part:get_parent()
533  * Returns parent part for this part
534  * @return {rspamd_mimepart} parent part or nil
535  */
536 LUA_FUNCTION_DEF (mimepart, get_parent);
537 
538 /***
539  * @method mime_part:get_specific()
540  * Returns specific lua content for this part
541  * @return {any} specific lua content
542  */
543 LUA_FUNCTION_DEF (mimepart, get_specific);
544 
545 /***
546  * @method mime_part:set_specific(<any>)
547  * Sets a specific content for this part
548  * @return {any} previous specific lua content (or nil)
549  */
550 LUA_FUNCTION_DEF (mimepart, set_specific);
551 
552 /***
553  * @method mime_part:is_specific(<any>)
554  * Returns true if part has specific lua content
555  * @return {boolean} flag
556  */
557 LUA_FUNCTION_DEF (mimepart, is_specific);
558 
559 /***
560  * @method mime_part:get_urls([need_emails|list_protos][, need_images])
561  * Get all URLs found in a mime part. Telephone urls and emails are not included unless explicitly asked in `list_protos`
562  * @param {boolean} need_emails if `true` then reutrn also email urls, this can be a comma separated string of protocols desired or a table (e.g. `mailto` or `telephone`)
563  * @param {boolean} need_images return urls from images (<img src=...>) as well
564  * @return {table rspamd_url} list of all urls found
565  */
566 LUA_FUNCTION_DEF (mimepart, get_urls);
567 
568 static const struct luaL_reg mimepartlib_m[] = {
569 	LUA_INTERFACE_DEF (mimepart, get_content),
570 	LUA_INTERFACE_DEF (mimepart, get_raw_content),
571 	LUA_INTERFACE_DEF (mimepart, get_length),
572 	LUA_INTERFACE_DEF (mimepart, get_type),
573 	LUA_INTERFACE_DEF (mimepart, get_type_full),
574 	LUA_INTERFACE_DEF (mimepart, get_detected_type),
575 	LUA_INTERFACE_DEF (mimepart, get_detected_ext),
576 	LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
577 	LUA_INTERFACE_DEF (mimepart, get_cte),
578 	LUA_INTERFACE_DEF (mimepart, get_filename),
579 	LUA_INTERFACE_DEF (mimepart, get_boundary),
580 	LUA_INTERFACE_DEF (mimepart, get_enclosing_boundary),
581 	LUA_INTERFACE_DEF (mimepart, get_header),
582 	LUA_INTERFACE_DEF (mimepart, get_header_raw),
583 	LUA_INTERFACE_DEF (mimepart, get_header_full),
584 	LUA_INTERFACE_DEF (mimepart, get_header_count),
585 	LUA_INTERFACE_DEF (mimepart, get_raw_headers),
586 	LUA_INTERFACE_DEF (mimepart, get_headers),
587 	LUA_INTERFACE_DEF (mimepart, is_image),
588 	LUA_INTERFACE_DEF (mimepart, get_image),
589 	LUA_INTERFACE_DEF (mimepart, is_archive),
590 	LUA_INTERFACE_DEF (mimepart, get_archive),
591 	LUA_INTERFACE_DEF (mimepart, is_multipart),
592 	LUA_INTERFACE_DEF (mimepart, is_message),
593 	LUA_INTERFACE_DEF (mimepart, get_children),
594 	LUA_INTERFACE_DEF (mimepart, get_parent),
595 	LUA_INTERFACE_DEF (mimepart, get_urls),
596 	LUA_INTERFACE_DEF (mimepart, is_text),
597 	LUA_INTERFACE_DEF (mimepart, is_broken),
598 	LUA_INTERFACE_DEF (mimepart, is_attachment),
599 	LUA_INTERFACE_DEF (mimepart, get_text),
600 	LUA_INTERFACE_DEF (mimepart, get_digest),
601 	LUA_INTERFACE_DEF (mimepart, get_id),
602 	LUA_INTERFACE_DEF (mimepart, headers_foreach),
603 	LUA_INTERFACE_DEF (mimepart, get_specific),
604 	LUA_INTERFACE_DEF (mimepart, set_specific),
605 	LUA_INTERFACE_DEF (mimepart, is_specific),
606 	{"__tostring", rspamd_lua_class_tostring},
607 	{NULL, NULL}
608 };
609 
610 
611 static struct rspamd_mime_text_part *
lua_check_textpart(lua_State * L)612 lua_check_textpart (lua_State * L)
613 {
614 	void *ud = rspamd_lua_check_udata (L, 1, "rspamd{textpart}");
615 	luaL_argcheck (L, ud != NULL, 1, "'textpart' expected");
616 	return ud ? *((struct rspamd_mime_text_part **)ud) : NULL;
617 }
618 
619 static struct rspamd_mime_part *
lua_check_mimepart(lua_State * L)620 lua_check_mimepart (lua_State * L)
621 {
622 	void *ud = rspamd_lua_check_udata (L, 1, "rspamd{mimepart}");
623 	luaL_argcheck (L, ud != NULL, 1, "'mimepart' expected");
624 	return ud ? *((struct rspamd_mime_part **)ud) : NULL;
625 }
626 
627 
628 static gint
lua_textpart_is_utf(lua_State * L)629 lua_textpart_is_utf (lua_State * L)
630 {
631 	LUA_TRACE_POINT;
632 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
633 
634 	if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
635 		lua_pushboolean (L, FALSE);
636 		return 1;
637 	}
638 
639 	lua_pushboolean (L, IS_TEXT_PART_UTF (part));
640 
641 	return 1;
642 }
643 
644 
645 static gint
lua_textpart_has_8bit_raw(lua_State * L)646 lua_textpart_has_8bit_raw (lua_State * L)
647 {
648 	LUA_TRACE_POINT;
649 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
650 
651 	if (part) {
652 		if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW) {
653 			lua_pushboolean (L, TRUE);
654 		}
655 		else {
656 			lua_pushboolean (L, FALSE);
657 		}
658 	}
659 	else {
660 		return luaL_error (L, "invalid arguments");
661 	}
662 
663 	return 1;
664 }
665 
666 static gint
lua_textpart_has_8bit(lua_State * L)667 lua_textpart_has_8bit (lua_State * L)
668 {
669 	LUA_TRACE_POINT;
670 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
671 
672 	if (part) {
673 		if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED) {
674 			lua_pushboolean (L, TRUE);
675 		}
676 		else {
677 			lua_pushboolean (L, FALSE);
678 		}
679 	}
680 	else {
681 		return luaL_error (L, "invalid arguments");
682 	}
683 
684 	return 1;
685 }
686 
687 
688 static gint
lua_textpart_get_content(lua_State * L)689 lua_textpart_get_content (lua_State * L)
690 {
691 	LUA_TRACE_POINT;
692 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
693 	struct rspamd_lua_text *t;
694 	gsize len;
695 	const gchar *start, *type = NULL;
696 
697 	if (part == NULL) {
698 		lua_pushnil (L);
699 		return 1;
700 	}
701 
702 	if (lua_type (L, 2) == LUA_TSTRING) {
703 		type = lua_tostring (L, 2);
704 	}
705 
706 	if (!type) {
707 		if (IS_TEXT_PART_EMPTY (part)) {
708 			lua_pushnil (L);
709 			return 1;
710 		}
711 		start = part->utf_content.begin;
712 		len = part->utf_content.len;
713 	}
714 	else if (strcmp (type, "content") == 0) {
715 		if (IS_TEXT_PART_EMPTY (part)) {
716 			lua_pushnil (L);
717 			return 1;
718 		}
719 
720 		start = part->utf_content.begin;
721 		len = part->utf_content.len;
722 	}
723 	else if (strcmp (type, "content_oneline") == 0) {
724 		if (IS_TEXT_PART_EMPTY (part)) {
725 			lua_pushnil (L);
726 			return 1;
727 		}
728 
729 		start = part->utf_stripped_content->data;
730 		len = part->utf_stripped_content->len;
731 	}
732 	else if (strcmp (type, "raw_parsed") == 0) {
733 		if (part->parsed.len == 0) {
734 			lua_pushnil (L);
735 			return 1;
736 		}
737 
738 		start = part->parsed.begin;
739 		len = part->parsed.len;
740 	}
741 	else if (strcmp (type, "raw_utf") == 0) {
742 		if (part->utf_raw_content == NULL || part->utf_raw_content->len == 0) {
743 			lua_pushnil (L);
744 			return 1;
745 		}
746 
747 		start = part->utf_raw_content->data;
748 		len = part->utf_raw_content->len;
749 	}
750 	else if (strcmp (type, "raw") == 0) {
751 		if (part->raw.len == 0) {
752 			lua_pushnil (L);
753 			return 1;
754 		}
755 
756 		start = part->raw.begin;
757 		len = part->raw.len;
758 	}
759 	else {
760 		return luaL_error (L, "invalid content type: %s", type);
761 	}
762 
763 	t = lua_newuserdata (L, sizeof (*t));
764 	rspamd_lua_setclass (L, "rspamd{text}", -1);
765 
766 	t->start = start;
767 	t->len = len;
768 	t->flags = 0;
769 
770 	return 1;
771 }
772 
773 static gint
lua_textpart_get_raw_content(lua_State * L)774 lua_textpart_get_raw_content (lua_State * L)
775 {
776 	LUA_TRACE_POINT;
777 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
778 	struct rspamd_lua_text *t;
779 
780 	if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
781 		lua_pushnil (L);
782 		return 1;
783 	}
784 
785 	t = lua_newuserdata (L, sizeof (*t));
786 	rspamd_lua_setclass (L, "rspamd{text}", -1);
787 	t->start = part->raw.begin;
788 	t->len = part->raw.len;
789 	t->flags = 0;
790 
791 	return 1;
792 }
793 
794 static gint
lua_textpart_get_content_oneline(lua_State * L)795 lua_textpart_get_content_oneline (lua_State * L)
796 {
797 	LUA_TRACE_POINT;
798 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
799 	struct rspamd_lua_text *t;
800 
801 	if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
802 		lua_pushnil (L);
803 		return 1;
804 	}
805 
806 	t = lua_newuserdata (L, sizeof (*t));
807 	rspamd_lua_setclass (L, "rspamd{text}", -1);
808 	t->start = part->utf_stripped_content->data;
809 	t->len = part->utf_stripped_content->len;
810 	t->flags = 0;
811 
812 	return 1;
813 }
814 
815 static gint
lua_textpart_get_length(lua_State * L)816 lua_textpart_get_length (lua_State * L)
817 {
818 	LUA_TRACE_POINT;
819 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
820 
821 	if (part == NULL) {
822 		lua_pushnil (L);
823 		return 1;
824 	}
825 
826 	if (IS_TEXT_PART_EMPTY (part) || part->utf_content.len == 0) {
827 		lua_pushinteger (L, 0);
828 	}
829 	else {
830 		lua_pushinteger (L, part->utf_content.len);
831 	}
832 
833 	return 1;
834 }
835 
836 static gint
lua_textpart_get_raw_length(lua_State * L)837 lua_textpart_get_raw_length (lua_State * L)
838 {
839 	LUA_TRACE_POINT;
840 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
841 
842 	if (part == NULL) {
843 		lua_pushnil (L);
844 		return 1;
845 	}
846 
847 	lua_pushinteger (L, part->raw.len);
848 
849 	return 1;
850 }
851 
852 static gint
lua_textpart_get_urls_length(lua_State * L)853 lua_textpart_get_urls_length (lua_State * L)
854 {
855 	LUA_TRACE_POINT;
856 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
857 	GList *cur;
858 	guint total = 0;
859 	struct rspamd_process_exception *ex;
860 
861 	if (part == NULL) {
862 		lua_pushnil (L);
863 		return 1;
864 	}
865 
866 	for (cur = part->exceptions; cur != NULL; cur = g_list_next (cur)) {
867 		ex = cur->data;
868 
869 		if (ex->type == RSPAMD_EXCEPTION_URL) {
870 			total += ex->len;
871 		}
872 	}
873 
874 	lua_pushinteger (L, total);
875 
876 	return 1;
877 }
878 
879 static gint
lua_textpart_get_lines_count(lua_State * L)880 lua_textpart_get_lines_count (lua_State * L)
881 {
882 	LUA_TRACE_POINT;
883 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
884 
885 	if (part == NULL) {
886 		lua_pushnil (L);
887 		return 1;
888 	}
889 
890 	if (IS_TEXT_PART_EMPTY (part)) {
891 		lua_pushinteger (L, 0);
892 	}
893 	else {
894 		lua_pushinteger (L, part->nlines);
895 	}
896 
897 	return 1;
898 }
899 
900 static gint
lua_textpart_get_words_count(lua_State * L)901 lua_textpart_get_words_count (lua_State *L)
902 {
903 	LUA_TRACE_POINT;
904 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
905 
906 	if (part == NULL) {
907 		lua_pushnil (L);
908 		return 1;
909 	}
910 
911 	if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
912 		lua_pushinteger (L, 0);
913 	}
914 	else {
915 		lua_pushinteger (L, part->nwords);
916 	}
917 
918 	return 1;
919 }
920 
921 static inline enum rspamd_lua_words_type
word_extract_type_from_string(const gchar * how_str)922 word_extract_type_from_string (const gchar *how_str)
923 {
924 	enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_MAX;
925 
926 	if (strcmp (how_str, "stem") == 0) {
927 		how = RSPAMD_LUA_WORDS_STEM;
928 	}
929 	else if (strcmp (how_str, "norm") == 0) {
930 		how = RSPAMD_LUA_WORDS_NORM;
931 	}
932 	else if (strcmp (how_str, "raw") == 0) {
933 		how = RSPAMD_LUA_WORDS_RAW;
934 	}
935 	else if (strcmp (how_str, "full") == 0) {
936 		how = RSPAMD_LUA_WORDS_FULL;
937 	}
938 
939 	return how;
940 }
941 
942 static gint
lua_textpart_get_words(lua_State * L)943 lua_textpart_get_words (lua_State *L)
944 {
945 	LUA_TRACE_POINT;
946 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
947 	enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
948 
949 	if (part == NULL) {
950 		return luaL_error (L, "invalid arguments");
951 	}
952 
953 	if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
954 		lua_createtable (L, 0, 0);
955 	}
956 	else {
957 		if (lua_type (L, 2) == LUA_TSTRING) {
958 			const gchar *how_str = lua_tostring (L, 2);
959 
960 			how = word_extract_type_from_string (how_str);
961 
962 			if (how == RSPAMD_LUA_WORDS_MAX) {
963 				return luaL_error (L, "invalid extraction type: %s", how_str);
964 			}
965 		}
966 
967 		return rspamd_lua_push_words (L, part->utf_words, how);
968 	}
969 
970 	return 1;
971 }
972 
973 static gint
lua_textpart_filter_words(lua_State * L)974 lua_textpart_filter_words (lua_State *L)
975 {
976 	LUA_TRACE_POINT;
977 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
978 	struct rspamd_lua_regexp *re = lua_check_regexp (L, 2);
979 	gint lim = -1;
980 	enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
981 
982 	if (part == NULL || re == NULL) {
983 		return luaL_error (L, "invalid arguments");
984 	}
985 
986 	if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
987 		lua_createtable (L, 0, 0);
988 	}
989 	else {
990 		if (lua_type (L, 3) == LUA_TSTRING) {
991 			const gchar *how_str = lua_tostring (L, 3);
992 
993 			how = word_extract_type_from_string (how_str);
994 
995 			if (how == RSPAMD_LUA_WORDS_MAX) {
996 				return luaL_error (L, "invalid extraction type: %s", how_str);
997 			}
998 		}
999 
1000 		if (lua_type (L, 4) == LUA_TNUMBER) {
1001 			lim = lua_tointeger (L, 4);
1002 		}
1003 
1004 		guint cnt, i;
1005 
1006 		lua_createtable (L, 8, 0);
1007 
1008 		for (i = 0, cnt = 1; i < part->utf_words->len; i ++) {
1009 			rspamd_stat_token_t *w = &g_array_index (part->utf_words,
1010 					rspamd_stat_token_t, i);
1011 
1012 			switch (how) {
1013 			case RSPAMD_LUA_WORDS_STEM:
1014 				if (w->stemmed.len > 0) {
1015 					if (rspamd_regexp_match (re->re, w->stemmed.begin,
1016 							w->stemmed.len, FALSE)) {
1017 						lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
1018 						lua_rawseti (L, -2, cnt++);
1019 					}
1020 				}
1021 				break;
1022 			case RSPAMD_LUA_WORDS_NORM:
1023 				if (w->normalized.len > 0) {
1024 					if (rspamd_regexp_match (re->re, w->normalized.begin,
1025 							w->normalized.len, FALSE)) {
1026 						lua_pushlstring (L, w->normalized.begin, w->normalized.len);
1027 						lua_rawseti (L, -2, cnt++);
1028 					}
1029 				}
1030 				break;
1031 			case RSPAMD_LUA_WORDS_RAW:
1032 				if (w->original.len > 0) {
1033 					if (rspamd_regexp_match (re->re, w->original.begin,
1034 							w->original.len, TRUE)) {
1035 						lua_pushlstring (L, w->original.begin, w->original.len);
1036 						lua_rawseti (L, -2, cnt++);
1037 					}
1038 				}
1039 				break;
1040 			case RSPAMD_LUA_WORDS_FULL:
1041 				if (rspamd_regexp_match (re->re, w->normalized.begin,
1042 						w->normalized.len, FALSE)) {
1043 					rspamd_lua_push_full_word (L, w);
1044 					/* Push to the resulting vector */
1045 					lua_rawseti (L, -2, cnt++);
1046 				}
1047 				break;
1048 			default:
1049 				break;
1050 			}
1051 
1052 			if (lim > 0 && cnt >= lim) {
1053 				break;
1054 			}
1055 		}
1056 	}
1057 
1058 	return 1;
1059 }
1060 
1061 static gint
lua_textpart_is_empty(lua_State * L)1062 lua_textpart_is_empty (lua_State * L)
1063 {
1064 	LUA_TRACE_POINT;
1065 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1066 
1067 	if (part == NULL) {
1068 		lua_pushnil (L);
1069 		return 1;
1070 	}
1071 
1072 	lua_pushboolean (L, IS_TEXT_PART_EMPTY (part));
1073 
1074 	return 1;
1075 }
1076 
1077 static gint
lua_textpart_is_html(lua_State * L)1078 lua_textpart_is_html (lua_State * L)
1079 {
1080 	LUA_TRACE_POINT;
1081 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1082 
1083 	if (part == NULL) {
1084 		lua_pushnil (L);
1085 		return 1;
1086 	}
1087 
1088 	lua_pushboolean (L, IS_TEXT_PART_HTML (part));
1089 
1090 	return 1;
1091 }
1092 
1093 static gint
lua_textpart_get_html(lua_State * L)1094 lua_textpart_get_html (lua_State * L)
1095 {
1096 	LUA_TRACE_POINT;
1097 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1098 	struct html_content **phc;
1099 
1100 	if (part == NULL || part->html == NULL) {
1101 		lua_pushnil (L);
1102 	}
1103 	else {
1104 		phc = lua_newuserdata (L, sizeof (*phc));
1105 		rspamd_lua_setclass (L, "rspamd{html}", -1);
1106 		*phc = part->html;
1107 	}
1108 
1109 	return 1;
1110 }
1111 
1112 static gint
lua_textpart_get_language(lua_State * L)1113 lua_textpart_get_language (lua_State * L)
1114 {
1115 	LUA_TRACE_POINT;
1116 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1117 
1118 	if (part != NULL) {
1119 		if (part->language != NULL && part->language[0] != '\0') {
1120 			lua_pushstring (L, part->language);
1121 			return 1;
1122 		}
1123 		else {
1124 			lua_pushnil (L);
1125 		}
1126 	}
1127 	else {
1128 		return luaL_error (L, "invalid arguments");
1129 	}
1130 
1131 	return 1;
1132 }
1133 
1134 static gint
lua_textpart_get_charset(lua_State * L)1135 lua_textpart_get_charset (lua_State * L)
1136 {
1137 	LUA_TRACE_POINT;
1138 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1139 
1140 	if (part != NULL) {
1141 		if (part->real_charset != NULL) {
1142 			lua_pushstring (L, part->real_charset);
1143 			return 1;
1144 		}
1145 		else {
1146 			lua_pushnil (L);
1147 		}
1148 	}
1149 	else {
1150 		return luaL_error (L, "invalid arguments");
1151 	}
1152 
1153 	return 1;
1154 }
1155 
1156 static gint
lua_textpart_get_languages(lua_State * L)1157 lua_textpart_get_languages (lua_State * L)
1158 {
1159 	LUA_TRACE_POINT;
1160 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1161 	guint i;
1162 	struct rspamd_lang_detector_res *cur;
1163 
1164 	if (part != NULL) {
1165 		if (part->languages != NULL) {
1166 			lua_createtable (L, part->languages->len, 0);
1167 
1168 			PTR_ARRAY_FOREACH (part->languages, i, cur) {
1169 				lua_createtable (L, 0, 2);
1170 				lua_pushstring (L, "code");
1171 				lua_pushstring (L, cur->lang);
1172 				lua_settable (L, -3);
1173 				lua_pushstring (L, "prob");
1174 				lua_pushnumber (L, cur->prob);
1175 				lua_settable (L, -3);
1176 
1177 				lua_rawseti (L, -2, i + 1);
1178 			}
1179 		}
1180 		else {
1181 			lua_newtable (L);
1182 		}
1183 	}
1184 	else {
1185 		luaL_error (L, "invalid arguments");
1186 	}
1187 
1188 	return 1;
1189 }
1190 
1191 struct lua_shingle_data {
1192 	guint64 hash;
1193 	rspamd_ftok_t t1;
1194 	rspamd_ftok_t t2;
1195 	rspamd_ftok_t t3;
1196 };
1197 
1198 struct lua_shingle_filter_cbdata {
1199 	struct rspamd_mime_text_part *part;
1200 	rspamd_mempool_t *pool;
1201 };
1202 
1203 #define STORE_TOKEN(i, t) do { \
1204     if ((i) < part->utf_words->len) { \
1205         word = &g_array_index (part->utf_words, rspamd_stat_token_t, (i)); \
1206         sd->t.begin = word->stemmed.begin; \
1207         sd->t.len = word->stemmed.len; \
1208     } \
1209     }while (0)
1210 
1211 static guint64
lua_shingles_filter(guint64 * input,gsize count,gint shno,const guchar * key,gpointer ud)1212 lua_shingles_filter (guint64 *input, gsize count,
1213 					 gint shno, const guchar *key, gpointer ud)
1214 {
1215 	guint64 minimal = G_MAXUINT64;
1216 	gsize i, min_idx = 0;
1217 	struct lua_shingle_data *sd;
1218 	rspamd_stat_token_t *word;
1219 	struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *)ud;
1220 	struct rspamd_mime_text_part *part;
1221 
1222 	part = cbd->part;
1223 
1224 	for (i = 0; i < count; i ++) {
1225 		if (minimal > input[i]) {
1226 			minimal = input[i];
1227 			min_idx = i;
1228 		}
1229 	}
1230 
1231 	sd = rspamd_mempool_alloc0 (cbd->pool, sizeof (*sd));
1232 	sd->hash = minimal;
1233 
1234 
1235 	STORE_TOKEN (min_idx, t1);
1236 	STORE_TOKEN (min_idx + 1, t2);
1237 	STORE_TOKEN (min_idx + 2, t3);
1238 
1239 	return GPOINTER_TO_SIZE (sd);
1240 }
1241 
1242 #undef STORE_TOKEN
1243 
1244 static gint
lua_textpart_get_fuzzy_hashes(lua_State * L)1245 lua_textpart_get_fuzzy_hashes (lua_State * L)
1246 {
1247 	LUA_TRACE_POINT;
1248 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1249 	rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 2);
1250 	guchar key[rspamd_cryptobox_HASHBYTES], digest[rspamd_cryptobox_HASHBYTES],
1251 			hexdigest[rspamd_cryptobox_HASHBYTES * 2 + 1], numbuf[64];
1252 	struct rspamd_shingle *sgl;
1253 	guint i;
1254 	struct lua_shingle_data *sd;
1255 	rspamd_cryptobox_hash_state_t st;
1256 	rspamd_stat_token_t *word;
1257 	struct lua_shingle_filter_cbdata cbd;
1258 
1259 
1260 	if (part == NULL || pool == NULL) {
1261 		return luaL_error (L, "invalid arguments");
1262 	}
1263 
1264 	if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
1265 		lua_pushnil (L);
1266 		lua_pushnil (L);
1267 	}
1268 	else {
1269 		/* TODO: add keys and algorithms support */
1270 		rspamd_cryptobox_hash (key, "rspamd", strlen ("rspamd"), NULL, 0);
1271 
1272 		/* TODO: add short text support */
1273 
1274 		/* Calculate direct hash */
1275 		rspamd_cryptobox_hash_init (&st, key, rspamd_cryptobox_HASHKEYBYTES);
1276 
1277 		for (i = 0; i < part->utf_words->len; i ++) {
1278 			word = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
1279 			rspamd_cryptobox_hash_update (&st,
1280 					word->stemmed.begin, word->stemmed.len);
1281 		}
1282 
1283 		rspamd_cryptobox_hash_final (&st, digest);
1284 
1285 		rspamd_encode_hex_buf (digest, sizeof (digest), hexdigest,
1286 				sizeof (hexdigest));
1287 		lua_pushlstring (L, hexdigest, sizeof (hexdigest) - 1);
1288 
1289 		cbd.pool = pool;
1290 		cbd.part = part;
1291 		sgl = rspamd_shingles_from_text (part->utf_words, key,
1292 				pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH);
1293 
1294 		if (sgl == NULL) {
1295 			lua_pushnil (L);
1296 		}
1297 		else {
1298 			lua_createtable (L, G_N_ELEMENTS (sgl->hashes), 0);
1299 
1300 			for (i = 0; i < G_N_ELEMENTS (sgl->hashes); i ++) {
1301 				sd = GSIZE_TO_POINTER (sgl->hashes[i]);
1302 
1303 				lua_createtable (L, 4, 0);
1304 				rspamd_snprintf (numbuf, sizeof (numbuf), "%uL", sd->hash);
1305 				lua_pushstring (L, numbuf);
1306 				lua_rawseti (L, -2, 1);
1307 
1308 				/* Tokens */
1309 				lua_pushlstring (L, sd->t1.begin, sd->t1.len);
1310 				lua_rawseti (L, -2, 2);
1311 
1312 				lua_pushlstring (L, sd->t2.begin, sd->t2.len);
1313 				lua_rawseti (L, -2, 3);
1314 
1315 				lua_pushlstring (L, sd->t3.begin, sd->t3.len);
1316 				lua_rawseti (L, -2, 4);
1317 
1318 				lua_rawseti (L, -2, i + 1); /* Store table */
1319 			}
1320 		}
1321 	}
1322 
1323 	return 2;
1324 }
1325 
1326 static gint
lua_textpart_get_mimepart(lua_State * L)1327 lua_textpart_get_mimepart (lua_State * L)
1328 {
1329 	LUA_TRACE_POINT;
1330 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1331 	struct rspamd_mime_part **pmime;
1332 
1333 	if (part != NULL) {
1334 		if (part->mime_part != NULL) {
1335 			pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
1336 			rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
1337 			*pmime = part->mime_part;
1338 
1339 			return 1;
1340 		}
1341 	}
1342 
1343 	lua_pushnil (L);
1344 	return 1;
1345 }
1346 
1347 /***
1348  * @method mime_part:get_stats()
1349  * Returns a table with the following data:
1350  * -
1351  * - `lines`: number of lines
1352  * - `spaces`: number of spaces
1353  * - `double_spaces`: double spaces
1354  * - `empty_lines`: number of empty lines
1355  * - `non_ascii_characters`: number of non ascii characters
1356  * - `ascii_characters`: number of ascii characters
1357  * @return {table} table of stats
1358  */
1359 static gint
lua_textpart_get_stats(lua_State * L)1360 lua_textpart_get_stats (lua_State * L)
1361 {
1362 	LUA_TRACE_POINT;
1363 	struct rspamd_mime_text_part *part = lua_check_textpart (L);
1364 
1365 	if (part != NULL) {
1366 		lua_createtable (L, 0, 9);
1367 
1368 		lua_pushstring (L, "lines");
1369 		lua_pushinteger (L, part->nlines);
1370 		lua_settable (L, -3);
1371 		lua_pushstring (L, "empty_lines");
1372 		lua_pushinteger (L, part->empty_lines);
1373 		lua_settable (L, -3);
1374 		lua_pushstring (L, "spaces");
1375 		lua_pushinteger (L, part->spaces);
1376 		lua_settable (L, -3);
1377 		lua_pushstring (L, "non_spaces");
1378 		lua_pushinteger (L, part->non_spaces);
1379 		lua_settable (L, -3);
1380 		lua_pushstring (L, "double_spaces");
1381 		lua_pushinteger (L, part->double_spaces);
1382 		lua_settable (L, -3);
1383 		lua_pushstring (L, "ascii_characters");
1384 		lua_pushinteger (L, part->ascii_chars);
1385 		lua_settable (L, -3);
1386 		lua_pushstring (L, "non_ascii_characters");
1387 		lua_pushinteger (L, part->non_ascii_chars);
1388 		lua_settable (L, -3);
1389 		lua_pushstring (L, "capital_letters");
1390 		lua_pushinteger (L, part->capital_letters);
1391 		lua_settable (L, -3);
1392 		lua_pushstring (L, "numeric_characters");
1393 		lua_pushinteger (L, part->numeric_characters);
1394 		lua_settable (L, -3);
1395 	}
1396 	else {
1397 		return luaL_error (L, "invalid arguments");
1398 	}
1399 
1400 	return 1;
1401 }
1402 
1403 /* Mimepart implementation */
1404 
1405 static gint
lua_mimepart_get_content(lua_State * L)1406 lua_mimepart_get_content (lua_State * L)
1407 {
1408 	LUA_TRACE_POINT;
1409 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1410 	struct rspamd_lua_text *t;
1411 
1412 	if (part == NULL) {
1413 		lua_pushnil (L);
1414 		return 1;
1415 	}
1416 
1417 	t = lua_newuserdata (L, sizeof (*t));
1418 	rspamd_lua_setclass (L, "rspamd{text}", -1);
1419 	t->start = part->parsed_data.begin;
1420 	t->len = part->parsed_data.len;
1421 	t->flags = 0;
1422 
1423 	return 1;
1424 }
1425 
1426 static gint
lua_mimepart_get_raw_content(lua_State * L)1427 lua_mimepart_get_raw_content (lua_State * L)
1428 {
1429 	LUA_TRACE_POINT;
1430 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1431 	struct rspamd_lua_text *t;
1432 
1433 	if (part == NULL) {
1434 		lua_pushnil (L);
1435 		return 1;
1436 	}
1437 
1438 	t = lua_newuserdata (L, sizeof (*t));
1439 	rspamd_lua_setclass (L, "rspamd{text}", -1);
1440 	t->start = part->raw_data.begin;
1441 	t->len = part->raw_data.len;
1442 	t->flags = 0;
1443 
1444 	return 1;
1445 }
1446 
1447 static gint
lua_mimepart_get_length(lua_State * L)1448 lua_mimepart_get_length (lua_State * L)
1449 {
1450 	LUA_TRACE_POINT;
1451 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1452 
1453 	if (part == NULL) {
1454 		lua_pushnil (L);
1455 		return 1;
1456 	}
1457 
1458 	lua_pushinteger (L, part->parsed_data.len);
1459 
1460 	return 1;
1461 }
1462 
1463 static gint
lua_mimepart_get_type_common(lua_State * L,struct rspamd_content_type * ct,gboolean full)1464 lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
1465 		gboolean full)
1466 {
1467 
1468 	GHashTableIter it;
1469 	gpointer k, v;
1470 	struct rspamd_content_type_param *param;
1471 
1472 	if (ct == NULL) {
1473 		lua_pushnil (L);
1474 		lua_pushnil (L);
1475 		return 2;
1476 	}
1477 
1478 	lua_pushlstring (L, ct->type.begin, ct->type.len);
1479 	lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
1480 
1481 	if (!full) {
1482 		return 2;
1483 	}
1484 
1485 	lua_createtable (L, 0, 2 + (ct->attrs ?
1486 			g_hash_table_size (ct->attrs) : 0));
1487 
1488 	if (ct->charset.len > 0) {
1489 		lua_pushstring (L, "charset");
1490 		lua_pushlstring (L, ct->charset.begin, ct->charset.len);
1491 		lua_settable (L, -3);
1492 	}
1493 
1494 	if (ct->boundary.len > 0) {
1495 		lua_pushstring (L, "boundary");
1496 		lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
1497 		lua_settable (L, -3);
1498 	}
1499 
1500 	if (ct->attrs) {
1501 		g_hash_table_iter_init (&it, ct->attrs);
1502 
1503 		while (g_hash_table_iter_next (&it, &k, &v)) {
1504 			param = v;
1505 
1506 			if (param->name.len > 0 && param->value.len > 0) {
1507 				/* TODO: think about multiple values here */
1508 				lua_pushlstring (L, param->name.begin, param->name.len);
1509 				lua_pushlstring (L, param->value.begin, param->value.len);
1510 				lua_settable (L, -3);
1511 			}
1512 		}
1513 	}
1514 
1515 	return 3;
1516 }
1517 
1518 static gint
lua_mimepart_get_type(lua_State * L)1519 lua_mimepart_get_type (lua_State * L)
1520 {
1521 	LUA_TRACE_POINT;
1522 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1523 
1524 	if (part == NULL) {
1525 		return luaL_error (L, "invalid arguments");
1526 	}
1527 
1528 	return lua_mimepart_get_type_common (L, part->ct, FALSE);
1529 }
1530 
1531 static gint
lua_mimepart_get_type_full(lua_State * L)1532 lua_mimepart_get_type_full (lua_State * L)
1533 {
1534 	LUA_TRACE_POINT;
1535 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1536 
1537 	if (part == NULL) {
1538 		return luaL_error (L, "invalid arguments");
1539 	}
1540 
1541 	return lua_mimepart_get_type_common (L, part->ct, TRUE);
1542 }
1543 
1544 static gint
lua_mimepart_get_detected_type(lua_State * L)1545 lua_mimepart_get_detected_type (lua_State * L)
1546 {
1547 	LUA_TRACE_POINT;
1548 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1549 
1550 	if (part == NULL) {
1551 		return luaL_error (L, "invalid arguments");
1552 	}
1553 
1554 	return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
1555 }
1556 
1557 static gint
lua_mimepart_get_detected_type_full(lua_State * L)1558 lua_mimepart_get_detected_type_full (lua_State * L)
1559 {
1560 	LUA_TRACE_POINT;
1561 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1562 
1563 	if (part == NULL) {
1564 		return luaL_error (L, "invalid arguments");
1565 	}
1566 
1567 	return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
1568 }
1569 
1570 static gint
lua_mimepart_get_detected_ext(lua_State * L)1571 lua_mimepart_get_detected_ext (lua_State * L)
1572 {
1573 	LUA_TRACE_POINT;
1574 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1575 
1576 	if (part == NULL) {
1577 		return luaL_error (L, "invalid arguments");
1578 	}
1579 
1580 	if (part->detected_ext) {
1581 		lua_pushstring (L, part->detected_ext);
1582 	}
1583 	else {
1584 		lua_pushnil (L);
1585 	}
1586 
1587 	return 1;
1588 }
1589 
1590 static gint
lua_mimepart_get_cte(lua_State * L)1591 lua_mimepart_get_cte (lua_State * L)
1592 {
1593 	LUA_TRACE_POINT;
1594 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1595 
1596 	if (part == NULL) {
1597 		lua_pushnil (L);
1598 		return 1;
1599 	}
1600 
1601 	lua_pushstring (L, rspamd_cte_to_string (part->cte));
1602 
1603 	return 1;
1604 }
1605 
1606 static gint
lua_mimepart_get_filename(lua_State * L)1607 lua_mimepart_get_filename (lua_State * L)
1608 {
1609 	LUA_TRACE_POINT;
1610 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1611 
1612 	if (part == NULL || part->cd == NULL || part->cd->filename.len == 0) {
1613 		lua_pushnil (L);
1614 		return 1;
1615 	}
1616 
1617 	lua_pushlstring (L, part->cd->filename.begin, part->cd->filename.len);
1618 
1619 	return 1;
1620 }
1621 
1622 static gint
lua_mimepart_get_boundary(lua_State * L)1623 lua_mimepart_get_boundary (lua_State * L)
1624 {
1625 	LUA_TRACE_POINT;
1626 	struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
1627 
1628 	if (part == NULL) {
1629 		return luaL_error (L, "invalid arguments");
1630 	}
1631 
1632 	if (IS_PART_MULTIPART (part)) {
1633 		lua_pushlstring (L, part->specific.mp->boundary.begin,
1634 				part->specific.mp->boundary.len);
1635 	}
1636 	else {
1637 		parent = part->parent_part;
1638 
1639 		if (!parent || !IS_PART_MULTIPART (parent)) {
1640 			lua_pushnil (L);
1641 		}
1642 		else {
1643 			lua_pushlstring (L, parent->specific.mp->boundary.begin,
1644 					parent->specific.mp->boundary.len);
1645 		}
1646 	}
1647 
1648 	return 1;
1649 }
1650 
1651 static gint
lua_mimepart_get_enclosing_boundary(lua_State * L)1652 lua_mimepart_get_enclosing_boundary (lua_State * L)
1653 {
1654 	LUA_TRACE_POINT;
1655 	struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
1656 
1657 	if (part == NULL) {
1658 		return luaL_error (L, "invalid arguments");
1659 	}
1660 
1661 	parent = part->parent_part;
1662 
1663 	if (!parent || !IS_PART_MULTIPART (parent)) {
1664 		lua_pushnil (L);
1665 	}
1666 	else {
1667 		lua_pushlstring (L, parent->specific.mp->boundary.begin,
1668 				parent->specific.mp->boundary.len);
1669 	}
1670 
1671 	return 1;
1672 }
1673 
1674 static gint
lua_mimepart_get_header_common(lua_State * L,enum rspamd_lua_task_header_type how)1675 lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how)
1676 {
1677 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1678 	const gchar *name;
1679 	gboolean strong = FALSE;
1680 
1681 	name = luaL_checkstring (L, 2);
1682 
1683 	if (name && part) {
1684 
1685 		if (lua_isboolean (L, 3)) {
1686 			strong = lua_toboolean (L, 3);
1687 		}
1688 
1689 		return rspamd_lua_push_header_array (L,
1690 				name,
1691 				rspamd_message_get_header_from_hash(part->raw_headers, name, FALSE),
1692 				how,
1693 				strong);
1694 	}
1695 
1696 	lua_pushnil (L);
1697 
1698 	return 1;
1699 }
1700 
1701 static gint
lua_mimepart_get_header_full(lua_State * L)1702 lua_mimepart_get_header_full (lua_State * L)
1703 {
1704 	LUA_TRACE_POINT;
1705 	return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_FULL);
1706 }
1707 
1708 static gint
lua_mimepart_get_header(lua_State * L)1709 lua_mimepart_get_header (lua_State * L)
1710 {
1711 	LUA_TRACE_POINT;
1712 	return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_SIMPLE);
1713 }
1714 
1715 static gint
lua_mimepart_get_header_raw(lua_State * L)1716 lua_mimepart_get_header_raw (lua_State * L)
1717 {
1718 	LUA_TRACE_POINT;
1719 	return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_RAW);
1720 }
1721 
1722 static gint
lua_mimepart_get_header_count(lua_State * L)1723 lua_mimepart_get_header_count (lua_State * L)
1724 {
1725 	LUA_TRACE_POINT;
1726 	return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_COUNT);
1727 }
1728 
1729 static gint
lua_mimepart_get_raw_headers(lua_State * L)1730 lua_mimepart_get_raw_headers (lua_State *L)
1731 {
1732 	LUA_TRACE_POINT;
1733 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1734 	struct rspamd_lua_text *t;
1735 
1736 	if (part) {
1737 		t = lua_newuserdata (L, sizeof (*t));
1738 		rspamd_lua_setclass (L, "rspamd{text}", -1);
1739 		t->start = part->raw_headers_str;
1740 		t->len = part->raw_headers_len;
1741 		t->flags = 0;
1742 	}
1743 	else {
1744 		return luaL_error (L, "invalid arguments");
1745 	}
1746 
1747 
1748 	return 1;
1749 }
1750 
1751 static gint
lua_mimepart_get_headers(lua_State * L)1752 lua_mimepart_get_headers (lua_State *L)
1753 {
1754 	LUA_TRACE_POINT;
1755 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1756 	bool need_modified = lua_isnoneornil(L, 2) ? false : lua_toboolean(L, 2);
1757 
1758 	if (part) {
1759 		struct rspamd_mime_header *cur;
1760 		int i = 1;
1761 
1762 		lua_createtable (L, rspamd_mime_headers_count(part->raw_headers), 0);
1763 		LL_FOREACH2(part->headers_order, cur, ord_next) {
1764 			if (need_modified && cur->modified_chain) {
1765 				struct rspamd_mime_header *cur_modified;
1766 
1767 				LL_FOREACH(cur->modified_chain, cur_modified) {
1768 					rspamd_lua_push_header(L, cur_modified, RSPAMD_TASK_HEADER_PUSH_FULL);
1769 					lua_rawseti(L, -2, i++);
1770 				}
1771 			}
1772 			else {
1773 				rspamd_lua_push_header(L, cur, RSPAMD_TASK_HEADER_PUSH_FULL);
1774 				lua_rawseti(L, -2, i++);
1775 			}
1776 
1777 		}
1778 	}
1779 	else {
1780 		return luaL_error (L, "invalid arguments");
1781 	}
1782 
1783 
1784 	return 1;
1785 }
1786 
1787 
1788 static gint
lua_mimepart_is_image(lua_State * L)1789 lua_mimepart_is_image (lua_State * L)
1790 {
1791 	LUA_TRACE_POINT;
1792 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1793 
1794 	if (part == NULL) {
1795 		return luaL_error (L, "invalid arguments");
1796 	}
1797 
1798 	lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_IMAGE);
1799 
1800 	return 1;
1801 }
1802 
1803 static gint
lua_mimepart_is_archive(lua_State * L)1804 lua_mimepart_is_archive (lua_State * L)
1805 {
1806 	LUA_TRACE_POINT;
1807 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1808 
1809 	if (part == NULL) {
1810 		return luaL_error (L, "invalid arguments");
1811 	}
1812 
1813 	lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_ARCHIVE);
1814 
1815 	return 1;
1816 }
1817 
1818 static gint
lua_mimepart_is_multipart(lua_State * L)1819 lua_mimepart_is_multipart (lua_State * L)
1820 {
1821 	LUA_TRACE_POINT;
1822 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1823 
1824 	if (part == NULL) {
1825 		return luaL_error (L, "invalid arguments");
1826 	}
1827 
1828 	lua_pushboolean (L, IS_PART_MULTIPART (part) ? true : false);
1829 
1830 	return 1;
1831 }
1832 
1833 static gint
lua_mimepart_is_message(lua_State * L)1834 lua_mimepart_is_message (lua_State * L)
1835 {
1836 	LUA_TRACE_POINT;
1837 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1838 
1839 	if (part == NULL) {
1840 		return luaL_error (L, "invalid arguments");
1841 	}
1842 
1843 	lua_pushboolean (L, IS_PART_MESSAGE (part) ? true : false);
1844 
1845 	return 1;
1846 }
1847 
1848 static gint
lua_mimepart_is_attachment(lua_State * L)1849 lua_mimepart_is_attachment (lua_State * L)
1850 {
1851 	LUA_TRACE_POINT;
1852 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1853 
1854 	if (part == NULL) {
1855 		return luaL_error (L, "invalid arguments");
1856 	}
1857 
1858 	if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
1859 		lua_pushboolean (L, true);
1860 	}
1861 	else {
1862 		/* if has_name and not (image and Content-ID_header_present) */
1863 		if (part->cd && part->cd->filename.len > 0) {
1864 			if (part->part_type != RSPAMD_MIME_PART_IMAGE &&
1865 					rspamd_message_get_header_from_hash(part->raw_headers,
1866 							"Content-Id", FALSE) == NULL) {
1867 				/* Filename is presented but no content id and not image */
1868 				lua_pushboolean (L, true);
1869 			}
1870 			else {
1871 				/* Image or an embeded object */
1872 				lua_pushboolean (L, false);
1873 			}
1874 		}
1875 		else {
1876 			/* No filename */
1877 			lua_pushboolean (L, false);
1878 		}
1879 	}
1880 
1881 	return 1;
1882 }
1883 
1884 static gint
lua_mimepart_is_text(lua_State * L)1885 lua_mimepart_is_text (lua_State * L)
1886 {
1887 	LUA_TRACE_POINT;
1888 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1889 
1890 	if (part == NULL) {
1891 		return luaL_error (L, "invalid arguments");
1892 	}
1893 
1894 	lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_TEXT);
1895 
1896 	return 1;
1897 }
1898 
1899 static gint
lua_mimepart_is_broken(lua_State * L)1900 lua_mimepart_is_broken (lua_State * L)
1901 {
1902 	LUA_TRACE_POINT;
1903 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1904 
1905 	if (part == NULL) {
1906 		return luaL_error (L, "invalid arguments");
1907 	}
1908 
1909 	if (part->ct) {
1910 		lua_pushboolean (L, (part->ct->flags & RSPAMD_CONTENT_TYPE_BROKEN) ?
1911 				true : false);
1912 	}
1913 	else {
1914 		lua_pushboolean (L, false);
1915 	}
1916 
1917 	return 1;
1918 }
1919 
1920 static gint
lua_mimepart_get_image(lua_State * L)1921 lua_mimepart_get_image (lua_State * L)
1922 {
1923 	LUA_TRACE_POINT;
1924 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1925 	struct rspamd_image **pimg;
1926 
1927 	if (part == NULL) {
1928 		return luaL_error (L, "invalid arguments");
1929 	}
1930 
1931 	if (part->part_type != RSPAMD_MIME_PART_IMAGE || part->specific.img == NULL) {
1932 		lua_pushnil (L);
1933 	}
1934 	else {
1935 		pimg = lua_newuserdata (L, sizeof (*pimg));
1936 		*pimg = part->specific.img;
1937 		rspamd_lua_setclass (L, "rspamd{image}", -1);
1938 	}
1939 
1940 	return 1;
1941 }
1942 
1943 static gint
lua_mimepart_get_archive(lua_State * L)1944 lua_mimepart_get_archive (lua_State * L)
1945 {
1946 	LUA_TRACE_POINT;
1947 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1948 	struct rspamd_archive **parch;
1949 
1950 	if (part == NULL) {
1951 		return luaL_error (L, "invalid arguments");
1952 	}
1953 
1954 	if (part->part_type != RSPAMD_MIME_PART_ARCHIVE || part->specific.arch == NULL) {
1955 		lua_pushnil (L);
1956 	}
1957 	else {
1958 		parch = lua_newuserdata (L, sizeof (*parch));
1959 		*parch = part->specific.arch;
1960 		rspamd_lua_setclass (L, "rspamd{archive}", -1);
1961 	}
1962 
1963 	return 1;
1964 }
1965 
1966 static gint
lua_mimepart_get_children(lua_State * L)1967 lua_mimepart_get_children (lua_State * L)
1968 {
1969 	LUA_TRACE_POINT;
1970 	struct rspamd_mime_part *part = lua_check_mimepart (L);
1971 	struct rspamd_mime_part **pcur, *cur;
1972 	guint i;
1973 
1974 	if (part == NULL) {
1975 		return luaL_error (L, "invalid arguments");
1976 	}
1977 
1978 	if (!IS_PART_MULTIPART (part) || part->specific.mp->children == NULL) {
1979 		lua_pushnil (L);
1980 	}
1981 	else {
1982 		lua_createtable (L, part->specific.mp->children->len, 0);
1983 
1984 		PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) {
1985 			pcur = lua_newuserdata (L, sizeof (*pcur));
1986 			*pcur = cur;
1987 			rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
1988 			lua_rawseti (L, -2, i + 1);
1989 		}
1990 	}
1991 
1992 	return 1;
1993 }
1994 
1995 static gint
lua_mimepart_get_parent(lua_State * L)1996 lua_mimepart_get_parent (lua_State * L)
1997 {
1998 	LUA_TRACE_POINT;
1999 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2000 	struct rspamd_mime_part **pparent;
2001 
2002 	if (part == NULL) {
2003 		return luaL_error (L, "invalid arguments");
2004 	}
2005 
2006 	if (part->parent_part) {
2007 		pparent = lua_newuserdata (L, sizeof (*pparent));
2008 		*pparent = part->parent_part;
2009 		rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
2010 	}
2011 	else {
2012 		lua_pushnil (L);
2013 	}
2014 
2015 	return 1;
2016 }
2017 
2018 
2019 static gint
lua_mimepart_get_text(lua_State * L)2020 lua_mimepart_get_text (lua_State * L)
2021 {
2022 	LUA_TRACE_POINT;
2023 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2024 	struct rspamd_mime_text_part **ppart;
2025 
2026 	if (part == NULL) {
2027 		return luaL_error (L, "invalid arguments");
2028 	}
2029 
2030 	if (part->part_type != RSPAMD_MIME_PART_TEXT || part->specific.txt == NULL) {
2031 		lua_pushnil (L);
2032 	}
2033 	else {
2034 		ppart = lua_newuserdata (L, sizeof (*ppart));
2035 		*ppart = part->specific.txt;
2036 		rspamd_lua_setclass (L, "rspamd{textpart}", -1);
2037 	}
2038 
2039 	return 1;
2040 }
2041 
2042 static gint
lua_mimepart_get_digest(lua_State * L)2043 lua_mimepart_get_digest (lua_State * L)
2044 {
2045 	LUA_TRACE_POINT;
2046 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2047 	gchar digestbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
2048 
2049 	if (part == NULL) {
2050 		return luaL_error (L, "invalid arguments");
2051 	}
2052 
2053 	memset (digestbuf, 0, sizeof (digestbuf));
2054 	rspamd_encode_hex_buf (part->digest, sizeof (part->digest),
2055 			digestbuf, sizeof (digestbuf));
2056 	lua_pushstring (L, digestbuf);
2057 
2058 	return 1;
2059 }
2060 
2061 static gint
lua_mimepart_get_id(lua_State * L)2062 lua_mimepart_get_id (lua_State * L)
2063 {
2064 	LUA_TRACE_POINT;
2065 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2066 
2067 	if (part == NULL) {
2068 		return luaL_error (L, "invalid arguments");
2069 	}
2070 
2071 	lua_pushinteger (L, part->part_number);
2072 
2073 	return 1;
2074 }
2075 
2076 static gint
lua_mimepart_headers_foreach(lua_State * L)2077 lua_mimepart_headers_foreach (lua_State *L)
2078 {
2079 	LUA_TRACE_POINT;
2080 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2081 	enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE;
2082 	struct rspamd_lua_regexp *re = NULL;
2083 	struct rspamd_mime_header *hdr, *cur;
2084 	gint old_top;
2085 
2086 	if (part && lua_isfunction (L, 2)) {
2087 		if (lua_istable (L, 3)) {
2088 			lua_pushstring (L, "full");
2089 			lua_gettable (L, 3);
2090 
2091 			if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
2092 				how = RSPAMD_TASK_HEADER_PUSH_FULL;
2093 			}
2094 
2095 			lua_pop (L, 1);
2096 
2097 			lua_pushstring (L, "raw");
2098 			lua_gettable (L, 3);
2099 
2100 			if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
2101 				how = RSPAMD_TASK_HEADER_PUSH_RAW;
2102 			}
2103 
2104 			lua_pop (L, 1);
2105 
2106 			lua_pushstring (L, "regexp");
2107 			lua_gettable (L, 3);
2108 
2109 			if (lua_isuserdata (L, -1)) {
2110 				RSPAMD_LUA_CHECK_UDATA_PTR_OR_RETURN(L, -1, "rspamd{regexp}",
2111 						struct rspamd_lua_regexp, re);
2112 			}
2113 
2114 			lua_pop (L, 1);
2115 		}
2116 
2117 		if (part->headers_order) {
2118 			hdr = part->headers_order;
2119 
2120 			LL_FOREACH2 (hdr, cur, ord_next) {
2121 				if (re && re->re) {
2122 					if (!rspamd_regexp_match (re->re, cur->name,
2123 							strlen (cur->name),FALSE)) {
2124 						continue;
2125 					}
2126 				}
2127 
2128 				old_top = lua_gettop (L);
2129 				lua_pushvalue (L, 2);
2130 				lua_pushstring (L, cur->name);
2131 				rspamd_lua_push_header (L, cur, how);
2132 
2133 				if (lua_pcall (L, 2, LUA_MULTRET, 0) != 0) {
2134 					msg_err ("call to header_foreach failed: %s",
2135 							lua_tostring (L, -1));
2136 					lua_settop (L, old_top);
2137 					break;
2138 				}
2139 				else {
2140 					if (lua_gettop (L) > old_top) {
2141 						if (lua_isboolean (L, old_top + 1)) {
2142 							if (lua_toboolean (L, old_top + 1)) {
2143 								lua_settop (L, old_top);
2144 								break;
2145 							}
2146 						}
2147 					}
2148 				}
2149 
2150 				lua_settop (L, old_top);
2151 			}
2152 		}
2153 	}
2154 
2155 	return 0;
2156 }
2157 
2158 static gint
lua_mimepart_get_specific(lua_State * L)2159 lua_mimepart_get_specific (lua_State * L)
2160 {
2161 	LUA_TRACE_POINT;
2162 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2163 
2164 	if (part == NULL) {
2165 		return luaL_error (L, "invalid arguments");
2166 	}
2167 
2168 	if (part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
2169 		lua_pushnil (L);
2170 	}
2171 	else {
2172 		lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2173 	}
2174 
2175 	return 1;
2176 }
2177 
2178 static gint
lua_mimepart_get_urls(lua_State * L)2179 lua_mimepart_get_urls (lua_State * L)
2180 {
2181 	LUA_TRACE_POINT;
2182 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2183 
2184 	if (part == NULL) {
2185 		return luaL_error (L, "invalid arguments");
2186 	}
2187 
2188 	struct lua_tree_cb_data cb;
2189 	struct rspamd_url *u;
2190 	static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
2191 											   PROTOCOL_FILE|PROTOCOL_FTP;
2192 	gsize sz, max_urls = 0, i;
2193 
2194 	if (part->urls == NULL) {
2195 		lua_newtable (L);
2196 
2197 		return 1;
2198 	}
2199 
2200 	if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask,
2201 			~(0), max_urls)) {
2202 		return luaL_error (L, "invalid arguments");
2203 	}
2204 
2205 	sz = part->urls->len;
2206 
2207 	lua_createtable (L, sz, 0);
2208 
2209 	PTR_ARRAY_FOREACH (part->urls, i, u) {
2210 		lua_tree_url_callback (u, u, &cb);
2211 	}
2212 
2213 	lua_url_cbdata_dtor (&cb);
2214 
2215 	return 1;
2216 }
2217 
2218 static gint
lua_mimepart_is_specific(lua_State * L)2219 lua_mimepart_is_specific (lua_State * L)
2220 {
2221 	LUA_TRACE_POINT;
2222 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2223 
2224 	if (part == NULL) {
2225 		return luaL_error (L, "invalid arguments");
2226 	}
2227 
2228 	lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA);
2229 
2230 	return 1;
2231 }
2232 
2233 static gint
lua_mimepart_set_specific(lua_State * L)2234 lua_mimepart_set_specific (lua_State * L)
2235 {
2236 	LUA_TRACE_POINT;
2237 	struct rspamd_mime_part *part = lua_check_mimepart (L);
2238 
2239 	if (part == NULL || lua_isnil (L, 2)) {
2240 		return luaL_error (L, "invalid arguments");
2241 	}
2242 
2243 	if (part->part_type != RSPAMD_MIME_PART_UNDEFINED &&
2244 			part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
2245 		return luaL_error (L,
2246 				"internal error: trying to set specific lua content on part of type %d",
2247 				part->part_type);
2248 	}
2249 
2250 	if (part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
2251 		/* Push old specific data */
2252 		lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2253 		luaL_unref (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2254 	}
2255 	else {
2256 		part->part_type = RSPAMD_MIME_PART_CUSTOM_LUA;
2257 		lua_pushnil (L);
2258 	}
2259 
2260 	/* Now, we push argument on the position 2 and save its reference */
2261 	lua_pushvalue (L, 2);
2262 	part->specific.lua_specific.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
2263 	/* Now stack has just a return value as luaL_ref removes value from stack */
2264 
2265 	gint ltype = lua_type (L, 2);
2266 
2267 	switch (ltype) {
2268 	case LUA_TTABLE:
2269 		part->specific.lua_specific.type = RSPAMD_LUA_PART_TABLE;
2270 		break;
2271 	case LUA_TSTRING:
2272 		part->specific.lua_specific.type = RSPAMD_LUA_PART_STRING;
2273 		break;
2274 	case LUA_TUSERDATA:
2275 		if (rspamd_lua_check_udata_maybe (L, 2, "rspamd{text}")) {
2276 			part->specific.lua_specific.type = RSPAMD_LUA_PART_TEXT;
2277 		}
2278 		else {
2279 			part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
2280 		}
2281 		break;
2282 	case LUA_TFUNCTION:
2283 		part->specific.lua_specific.type = RSPAMD_LUA_PART_FUNCTION;
2284 		break;
2285 	default:
2286 		part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
2287 		break;
2288 	}
2289 
2290 	return 1;
2291 }
2292 
2293 void
luaopen_textpart(lua_State * L)2294 luaopen_textpart (lua_State * L)
2295 {
2296 	rspamd_lua_new_class (L, "rspamd{textpart}", textpartlib_m);
2297 	lua_pop (L, 1);
2298 }
2299 
2300 void
luaopen_mimepart(lua_State * L)2301 luaopen_mimepart (lua_State * L)
2302 {
2303 	rspamd_lua_new_class (L, "rspamd{mimepart}", mimepartlib_m);
2304 	lua_pop (L, 1);
2305 }
2306 
2307