1 /*-
2 * Copyright 2016 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lua_common.h"
18 #include "lua_url.h"
19 #include "libmime/message.h"
20 #include "libmime/lang_detection.h"
21 #include "libstat/stat_api.h"
22 #include "libcryptobox/cryptobox.h"
23 #include "libutil/shingles.h"
24
25 #include "contrib/uthash/utlist.h"
26
27 /* Textpart methods */
28 /***
29 * @module rspamd_textpart
30 * This module provides different methods to manipulate text parts data. Text parts
31 * could be obtained from the `rspamd_task` by using of method `task:get_text_parts()`
32 @example
33 rspamd_config.R_EMPTY_IMAGE = function (task)
34 parts = task:get_text_parts()
35 if parts then
36 for _,part in ipairs(parts) do
37 if part:is_empty() then
38 texts = task:get_texts()
39 if texts then
40 return true
41 end
42 return false
43 end
44 end
45 end
46 return false
47 end
48 */
49
50 /***
51 * @method text_part:is_utf()
52 * Return TRUE if part is a valid utf text
53 * @return {boolean} true if part is valid `UTF8` part
54 */
55 LUA_FUNCTION_DEF (textpart, is_utf);
56
57 /***
58 * @method text_part:has_8bit_raw()
59 * Return TRUE if a part has raw 8bit characters
60 * @return {boolean} true if a part has raw 8bit characters
61 */
62 LUA_FUNCTION_DEF (textpart, has_8bit_raw);
63
64 /***
65 * @method text_part:has_8bit()
66 * Return TRUE if a part has raw 8bit characters
67 * @return {boolean} true if a part has encoded 8bit characters
68 */
69 LUA_FUNCTION_DEF (textpart, has_8bit);
70
71 /***
72 * @method text_part:get_content([type])
73 * Get the text of the part (html tags stripped). Optional `type` defines type of content to get:
74 * - `content` (default): utf8 content with HTML tags stripped and newlines preserved
75 * - `content_oneline`: utf8 content with HTML tags and newlines stripped
76 * - `raw`: raw content, not mime decoded nor utf8 converted
77 * - `raw_parsed`: raw content, mime decoded, not utf8 converted
78 * - `raw_utf`: raw content, mime decoded, utf8 converted (but with HTML tags and newlines)
79 * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
80 */
81 LUA_FUNCTION_DEF (textpart, get_content);
82 /***
83 * @method text_part:get_raw_content()
84 * Get the original text of the part
85 * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
86 */
87 LUA_FUNCTION_DEF (textpart, get_raw_content);
88 /***
89 * @method text_part:get_content_oneline()
90 *Get the text of the part (html tags and newlines stripped)
91 * @return {text} `UTF8` encoded content of the part (zero-copy if not converted to a lua string)
92 */
93 LUA_FUNCTION_DEF (textpart, get_content_oneline);
94 /***
95 * @method text_part:get_length()
96 * Get length of the text of the part
97 * @return {integer} length of part in **bytes**
98 */
99 LUA_FUNCTION_DEF (textpart, get_length);
100 /***
101 * @method mime_part:get_raw_length()
102 * Get length of the **raw** content of the part (e.g. HTML with tags unstripped)
103 * @return {integer} length of part in **bytes**
104 */
105 LUA_FUNCTION_DEF (textpart, get_raw_length);
106 /***
107 * @method mime_part:get_urls_length()
108 * Get length of the urls within the part
109 * @return {integer} length of urls in **bytes**
110 */
111 LUA_FUNCTION_DEF (textpart, get_urls_length);
112 /***
113 * @method mime_part:get_lines_count()
114 * Get lines number in the part
115 * @return {integer} number of lines in the part
116 */
117 LUA_FUNCTION_DEF (textpart, get_lines_count);
118 /***
119 * @method mime_part:get_stats()
120 * Returns a table with the following data:
121 * - `lines`: number of lines
122 * - `spaces`: number of spaces
123 * - `double_spaces`: double spaces
124 * - `empty_lines`: number of empty lines
125 * - `non_ascii_characters`: number of non ascii characters
126 * - `ascii_characters`: number of ascii characters
127 * @return {table} table of stats
128 */
129 LUA_FUNCTION_DEF (textpart, get_stats);
130 /***
131 * @method mime_part:get_words_count()
132 * Get words number in the part
133 * @return {integer} number of words in the part
134 */
135 LUA_FUNCTION_DEF (textpart, get_words_count);
136
137 /***
138 * @method mime_part:get_words([how])
139 * Get words in the part. Optional `how` argument defines type of words returned:
140 * - `stem`: stemmed words (default)
141 * - `norm`: normalised words (utf normalised + lowercased)
142 * - `raw`: raw words in utf (if possible)
143 * - `full`: list of tables, each table has the following fields:
144 * - [1] - stemmed word
145 * - [2] - normalised word
146 * - [3] - raw word
147 * - [4] - flags (table of strings)
148 * @return {table/strings} words in the part
149 */
150 LUA_FUNCTION_DEF (textpart, get_words);
151
152 /***
153 * @method mime_part:filter_words(regexp, [how][, max]])
154 * Filter words using some regexp:
155 * - `stem`: stemmed words (default)
156 * - `norm`: normalised words (utf normalised + lowercased)
157 * - `raw`: raw words in utf (if possible)
158 * - `full`: list of tables, each table has the following fields:
159 * - [1] - stemmed word
160 * - [2] - normalised word
161 * - [3] - raw word
162 * - [4] - flags (table of strings)
163 * @param {rspamd_regexp} regexp regexp to match
164 * @param {string} how what words to extract
165 * @param {number} max maximum number of hits returned (all hits if <= 0 or nil)
166 * @return {table/strings} words matching regexp
167 */
168 LUA_FUNCTION_DEF (textpart, filter_words);
169
170 /***
171 * @method text_part:is_empty()
172 * Returns `true` if the specified part is empty
173 * @return {bool} whether a part is empty
174 */
175 LUA_FUNCTION_DEF (textpart, is_empty);
176 /***
177 * @method text_part:is_html()
178 * Returns `true` if the specified part has HTML content
179 * @return {bool} whether a part is HTML part
180 */
181 LUA_FUNCTION_DEF (textpart, is_html);
182 /***
183 * @method text_part:get_html()
184 * Returns html content of the specified part
185 * @return {html} html content
186 */
187 LUA_FUNCTION_DEF (textpart, get_html);
188 /***
189 * @method text_part:get_language()
190 * Returns the code of the most used unicode script in the text part. Does not work with raw parts
191 * @return {string} short abbreviation (such as `ru`) for the script's language
192 */
193 LUA_FUNCTION_DEF (textpart, get_language);
194
195 /***
196 * @method text_part:get_charset()
197 * Returns part real charset
198 * @return {string} charset of the part
199 */
200 LUA_FUNCTION_DEF (textpart, get_charset);
201 /***
202 * @method text_part:get_languages()
203 * Returns array of tables of all languages detected for a part:
204 * - 'code': language code (short string)
205 * - 'prob': logarithm of probability
206 * @return {array|tables} all languages detected for the part
207 */
208 LUA_FUNCTION_DEF (textpart, get_languages);
209 /***
210 * @method text_part:get_fuzzy_hashes(mempool)
211 * @param {rspamd_mempool} mempool - memory pool (usually task pool)
212 * Returns direct hash of textpart as a string and array [1..32] of shingles each represented as a following table:
213 * - [1] - 64 bit fuzzy hash represented as a string
214 * - [2..4] - strings used to generate this hash
215 * @return {string,array|tables} fuzzy hashes calculated
216 */
217 LUA_FUNCTION_DEF (textpart, get_fuzzy_hashes);
218 /***
219 * @method text_part:get_mimepart()
220 * Returns the mime part object corresponding to this text part
221 * @return {mimepart} mimepart object
222 */
223 LUA_FUNCTION_DEF (textpart, get_mimepart);
224
225 static const struct luaL_reg textpartlib_m[] = {
226 LUA_INTERFACE_DEF (textpart, is_utf),
227 LUA_INTERFACE_DEF (textpart, has_8bit_raw),
228 LUA_INTERFACE_DEF (textpart, has_8bit),
229 LUA_INTERFACE_DEF (textpart, get_content),
230 LUA_INTERFACE_DEF (textpart, get_raw_content),
231 LUA_INTERFACE_DEF (textpart, get_content_oneline),
232 LUA_INTERFACE_DEF (textpart, get_length),
233 LUA_INTERFACE_DEF (textpart, get_raw_length),
234 LUA_INTERFACE_DEF (textpart, get_urls_length),
235 LUA_INTERFACE_DEF (textpart, get_lines_count),
236 LUA_INTERFACE_DEF (textpart, get_words_count),
237 LUA_INTERFACE_DEF (textpart, get_words),
238 LUA_INTERFACE_DEF (textpart, filter_words),
239 LUA_INTERFACE_DEF (textpart, is_empty),
240 LUA_INTERFACE_DEF (textpart, is_html),
241 LUA_INTERFACE_DEF (textpart, get_html),
242 LUA_INTERFACE_DEF (textpart, get_language),
243 LUA_INTERFACE_DEF (textpart, get_charset),
244 LUA_INTERFACE_DEF (textpart, get_languages),
245 LUA_INTERFACE_DEF (textpart, get_mimepart),
246 LUA_INTERFACE_DEF (textpart, get_stats),
247 LUA_INTERFACE_DEF (textpart, get_fuzzy_hashes),
248 {"__tostring", rspamd_lua_class_tostring},
249 {NULL, NULL}
250 };
251
252 /* Mimepart methods */
253
254 /***
255 * @module rspamd_mimepart
256 * This module provides access to mime parts found in a message
257 @example
258 rspamd_config.MISSING_CONTENT_TYPE = function(task)
259 local parts = task:get_parts()
260 if parts and #parts > 1 then
261 -- We have more than one part
262 for _,p in ipairs(parts) do
263 local ct = p:get_header('Content-Type')
264 -- And some parts have no Content-Type header
265 if not ct then
266 return true
267 end
268 end
269 end
270 return false
271 end
272 */
273
274 /***
275 * @method mime_part:get_header(name[, case_sensitive])
276 * Get decoded value of a header specified with optional case_sensitive flag.
277 * By default headers are searched in caseless matter.
278 * @param {string} name name of header to get
279 * @param {boolean} case_sensitive case sensitiveness flag to search for a header
280 * @return {string} decoded value of a header
281 */
282 LUA_FUNCTION_DEF (mimepart, get_header);
283 /***
284 * @method mime_part:get_header_raw(name[, case_sensitive])
285 * Get raw value of a header specified with optional case_sensitive flag.
286 * By default headers are searched in caseless matter.
287 * @param {string} name name of header to get
288 * @param {boolean} case_sensitive case sensitiveness flag to search for a header
289 * @return {string} raw value of a header
290 */
291 LUA_FUNCTION_DEF (mimepart, get_header_raw);
292 /***
293 * @method mime_part:get_header_full(name[, case_sensitive])
294 * Get raw value of a header specified with optional case_sensitive flag.
295 * By default headers are searched in caseless matter. This method returns more
296 * information about the header as a list of tables with the following structure:
297 *
298 * - `name` - name of a header
299 * - `value` - raw value of a header
300 * - `decoded` - decoded value of a header
301 * - `tab_separated` - `true` if a header and a value are separated by `tab` character
302 * - `empty_separator` - `true` if there are no separator between a header and a value
303 * @param {string} name name of header to get
304 * @param {boolean} case_sensitive case sensitiveness flag to search for a header
305 * @return {list of tables} all values of a header as specified above
306 @example
307 function check_header_delimiter_tab(task, header_name)
308 for _,rh in ipairs(task:get_header_full(header_name)) do
309 if rh['tab_separated'] then return true end
310 end
311 return false
312 end
313 */
314 LUA_FUNCTION_DEF (mimepart, get_header_full);
315 /***
316 * @method mimepart:get_header_count(name[, case_sensitive])
317 * Lightweight version if you need just a header's count
318 * * By default headers are searched in caseless matter.
319 * @param {string} name name of header to get
320 * @param {boolean} case_sensitive case sensitiveness flag to search for a header
321 * @return {number} number of header's occurrencies or 0 if not found
322 */
323 LUA_FUNCTION_DEF (mimepart, get_header_count);
324
325 /***
326 * @method mimepart:get_raw_headers()
327 * Get all undecoded headers of a mime part as a string
328 * @return {rspamd_text} all raw headers for a message as opaque text
329 */
330 LUA_FUNCTION_DEF (mimepart, get_raw_headers);
331
332 /***
333 * @method mimepart:get_headers()
334 * Get all undecoded headers of a mime part as a string
335 * @return {rspamd_text} all raw headers for a message as opaque text
336 */
337 LUA_FUNCTION_DEF (mimepart, get_headers);
338
339 /***
340 * @method mime_part:get_content()
341 * Get the parsed content of part
342 * @return {text} opaque text object (zero-copy if not casted to lua string)
343 */
344 LUA_FUNCTION_DEF (mimepart, get_content);
345 /***
346 * @method mime_part:get_raw_content()
347 * Get the raw content of part
348 * @return {text} opaque text object (zero-copy if not casted to lua string)
349 */
350 LUA_FUNCTION_DEF (mimepart, get_raw_content);
351 /***
352 * @method mime_part:get_length()
353 * Get length of the content of the part
354 * @return {integer} length of part in **bytes**
355 */
356 LUA_FUNCTION_DEF (mimepart, get_length);
357 /***
358 * @method mime_part:get_type()
359 * Extract content-type string of the mime part
360 * @return {string,string} content type in form 'type','subtype'
361 */
362 LUA_FUNCTION_DEF (mimepart, get_type);
363
364 /***
365 * @method mime_part:get_type_full()
366 * Extract content-type string of the mime part with all attributes
367 * @return {string,string,table} content type in form 'type','subtype', {attrs}
368 */
369 LUA_FUNCTION_DEF (mimepart, get_type_full);
370
371 /***
372 * @method mime_part:get_detected_type()
373 * Extract content-type string of the mime part. Use lua_magic detection
374 * @return {string,string} content type in form 'type','subtype'
375 */
376 LUA_FUNCTION_DEF (mimepart, get_detected_type);
377
378 /***
379 * @method mime_part:get_detected_type_full()
380 * Extract content-type string of the mime part with all attributes. Use lua_magic detection
381 * @return {string,string,table} content type in form 'type','subtype', {attrs}
382 */
383 LUA_FUNCTION_DEF (mimepart, get_detected_type_full);
384
385 /***
386 * @method mime_part:get_detected_ext()
387 * Returns a msdos extension name according to lua_magic detection
388 * @return {string} detected extension (see lua_magic.types)
389 */
390 LUA_FUNCTION_DEF (mimepart, get_detected_ext);
391
392 /***
393 * @method mime_part:get_cte()
394 * Extract content-transfer-encoding for a part
395 * @return {string} content transfer encoding (e.g. `base64` or `7bit`)
396 */
397 LUA_FUNCTION_DEF (mimepart, get_cte);
398
399 /***
400 * @method mime_part:get_filename()
401 * Extract filename associated with mime part if it is an attachment
402 * @return {string} filename or `nil` if no file is associated with this part
403 */
404 LUA_FUNCTION_DEF (mimepart, get_filename);
405 /***
406 * @method mime_part:is_image()
407 * Returns true if mime part is an image
408 * @return {bool} true if a part is an image
409 */
410 LUA_FUNCTION_DEF (mimepart, is_image);
411 /***
412 * @method mime_part:get_image()
413 * Returns rspamd_image structure associated with this part. This structure has
414 * the following methods:
415 *
416 * * `get_width` - return width of an image in pixels
417 * * `get_height` - return height of an image in pixels
418 * * `get_type` - return string representation of image's type (e.g. 'jpeg')
419 * * `get_filename` - return string with image's file name
420 * * `get_size` - return size in bytes
421 * @return {rspamd_image} image structure or nil if a part is not an image
422 */
423 LUA_FUNCTION_DEF (mimepart, get_image);
424 /***
425 * @method mime_part:is_archive()
426 * Returns true if mime part is an archive
427 * @return {bool} true if a part is an archive
428 */
429 LUA_FUNCTION_DEF (mimepart, is_archive);
430 /***
431 * @method mime_part:is_attachment()
432 * Returns true if mime part looks like an attachment
433 * @return {bool} true if a part looks like an attachment
434 */
435 LUA_FUNCTION_DEF (mimepart, is_attachment);
436
437 /***
438 * @method mime_part:get_archive()
439 * Returns rspamd_archive structure associated with this part. This structure has
440 * the following methods:
441 *
442 * * `get_files` - return list of strings with filenames inside archive
443 * * `get_files_full` - return list of tables with all information about files
444 * * `is_encrypted` - return true if an archive is encrypted
445 * * `get_type` - return string representation of image's type (e.g. 'zip')
446 * * `get_filename` - return string with archive's file name
447 * * `get_size` - return size in bytes
448 * @return {rspamd_archive} archive structure or nil if a part is not an archive
449 */
450 LUA_FUNCTION_DEF (mimepart, get_archive);
451 /***
452 * @method mime_part:is_multipart()
453 * Returns true if mime part is a multipart part
454 * @return {bool} true if a part is is a multipart part
455 */
456 LUA_FUNCTION_DEF (mimepart, is_multipart);
457 /***
458 * @method mime_part:is_message()
459 * Returns true if mime part is a message part (message/rfc822)
460 * @return {bool} true if a part is is a message part
461 */
462 LUA_FUNCTION_DEF (mimepart, is_message);
463 /***
464 * @method mime_part:get_boundary()
465 * Returns boundary for a part (extracted from parent multipart for normal parts and
466 * from the part itself for multipart)
467 * @return {string} boundary value or nil
468 */
469 LUA_FUNCTION_DEF (mimepart, get_boundary);
470
471 /***
472 * @method mime_part:get_enclosing_boundary()
473 * Returns an enclosing boundary for a part even for multiparts. For normal parts
474 * this method is identical to `get_boundary`
475 * @return {string} boundary value or nil
476 */
477 LUA_FUNCTION_DEF (mimepart, get_enclosing_boundary);
478
479 /***
480 * @method mime_part:get_children()
481 * Returns rspamd_mimepart table of part's childer. Returns nil if mime part is not multipart
482 * or a message part.
483 * @return {rspamd_mimepart} table of children
484 */
485 LUA_FUNCTION_DEF (mimepart, get_children);
486 /***
487 * @method mime_part:is_text()
488 * Returns true if mime part is a text part
489 * @return {bool} true if a part is a text part
490 */
491 LUA_FUNCTION_DEF (mimepart, is_text);
492 /***
493 * @method mime_part:get_text()
494 * Returns rspamd_textpart structure associated with this part.
495 * @return {rspamd_textpart} textpart structure or nil if a part is not an text
496 */
497 LUA_FUNCTION_DEF (mimepart, get_text);
498
499 /***
500 * @method mime_part:get_digest()
501 * Returns the unique digest for this mime part
502 * @return {string} 128 characters hex string with digest of the part
503 */
504 LUA_FUNCTION_DEF (mimepart, get_digest);
505
506 /***
507 * @method mime_part:get_id()
508 * Returns the order of the part in parts list
509 * @return {number} index of the part (starting from 1 as it is Lua API)
510 */
511 LUA_FUNCTION_DEF (mimepart, get_id);
512 /***
513 * @method mime_part:is_broken()
514 * Returns true if mime part has incorrectly specified content type
515 * @return {bool} true if a part has bad content type
516 */
517 LUA_FUNCTION_DEF (mimepart, is_broken);
518 /***
519 * @method mime_part:headers_foreach(callback, [params])
520 * This method calls `callback` for each header that satisfies some condition.
521 * By default, all headers are iterated unless `callback` returns `true`. Nil or
522 * false means continue of iterations.
523 * Params could be as following:
524 *
525 * - `full`: header value is full table of all attributes @see task:get_header_full for details
526 * - `regexp`: return headers that satisfies the specified regexp
527 * @param {function} callback function from header name and header value
528 * @param {table} params optional parameters
529 */
530 LUA_FUNCTION_DEF (mimepart, headers_foreach);
531 /***
532 * @method mime_part:get_parent()
533 * Returns parent part for this part
534 * @return {rspamd_mimepart} parent part or nil
535 */
536 LUA_FUNCTION_DEF (mimepart, get_parent);
537
538 /***
539 * @method mime_part:get_specific()
540 * Returns specific lua content for this part
541 * @return {any} specific lua content
542 */
543 LUA_FUNCTION_DEF (mimepart, get_specific);
544
545 /***
546 * @method mime_part:set_specific(<any>)
547 * Sets a specific content for this part
548 * @return {any} previous specific lua content (or nil)
549 */
550 LUA_FUNCTION_DEF (mimepart, set_specific);
551
552 /***
553 * @method mime_part:is_specific(<any>)
554 * Returns true if part has specific lua content
555 * @return {boolean} flag
556 */
557 LUA_FUNCTION_DEF (mimepart, is_specific);
558
559 /***
560 * @method mime_part:get_urls([need_emails|list_protos][, need_images])
561 * Get all URLs found in a mime part. Telephone urls and emails are not included unless explicitly asked in `list_protos`
562 * @param {boolean} need_emails if `true` then reutrn also email urls, this can be a comma separated string of protocols desired or a table (e.g. `mailto` or `telephone`)
563 * @param {boolean} need_images return urls from images (<img src=...>) as well
564 * @return {table rspamd_url} list of all urls found
565 */
566 LUA_FUNCTION_DEF (mimepart, get_urls);
567
568 static const struct luaL_reg mimepartlib_m[] = {
569 LUA_INTERFACE_DEF (mimepart, get_content),
570 LUA_INTERFACE_DEF (mimepart, get_raw_content),
571 LUA_INTERFACE_DEF (mimepart, get_length),
572 LUA_INTERFACE_DEF (mimepart, get_type),
573 LUA_INTERFACE_DEF (mimepart, get_type_full),
574 LUA_INTERFACE_DEF (mimepart, get_detected_type),
575 LUA_INTERFACE_DEF (mimepart, get_detected_ext),
576 LUA_INTERFACE_DEF (mimepart, get_detected_type_full),
577 LUA_INTERFACE_DEF (mimepart, get_cte),
578 LUA_INTERFACE_DEF (mimepart, get_filename),
579 LUA_INTERFACE_DEF (mimepart, get_boundary),
580 LUA_INTERFACE_DEF (mimepart, get_enclosing_boundary),
581 LUA_INTERFACE_DEF (mimepart, get_header),
582 LUA_INTERFACE_DEF (mimepart, get_header_raw),
583 LUA_INTERFACE_DEF (mimepart, get_header_full),
584 LUA_INTERFACE_DEF (mimepart, get_header_count),
585 LUA_INTERFACE_DEF (mimepart, get_raw_headers),
586 LUA_INTERFACE_DEF (mimepart, get_headers),
587 LUA_INTERFACE_DEF (mimepart, is_image),
588 LUA_INTERFACE_DEF (mimepart, get_image),
589 LUA_INTERFACE_DEF (mimepart, is_archive),
590 LUA_INTERFACE_DEF (mimepart, get_archive),
591 LUA_INTERFACE_DEF (mimepart, is_multipart),
592 LUA_INTERFACE_DEF (mimepart, is_message),
593 LUA_INTERFACE_DEF (mimepart, get_children),
594 LUA_INTERFACE_DEF (mimepart, get_parent),
595 LUA_INTERFACE_DEF (mimepart, get_urls),
596 LUA_INTERFACE_DEF (mimepart, is_text),
597 LUA_INTERFACE_DEF (mimepart, is_broken),
598 LUA_INTERFACE_DEF (mimepart, is_attachment),
599 LUA_INTERFACE_DEF (mimepart, get_text),
600 LUA_INTERFACE_DEF (mimepart, get_digest),
601 LUA_INTERFACE_DEF (mimepart, get_id),
602 LUA_INTERFACE_DEF (mimepart, headers_foreach),
603 LUA_INTERFACE_DEF (mimepart, get_specific),
604 LUA_INTERFACE_DEF (mimepart, set_specific),
605 LUA_INTERFACE_DEF (mimepart, is_specific),
606 {"__tostring", rspamd_lua_class_tostring},
607 {NULL, NULL}
608 };
609
610
611 static struct rspamd_mime_text_part *
lua_check_textpart(lua_State * L)612 lua_check_textpart (lua_State * L)
613 {
614 void *ud = rspamd_lua_check_udata (L, 1, "rspamd{textpart}");
615 luaL_argcheck (L, ud != NULL, 1, "'textpart' expected");
616 return ud ? *((struct rspamd_mime_text_part **)ud) : NULL;
617 }
618
619 static struct rspamd_mime_part *
lua_check_mimepart(lua_State * L)620 lua_check_mimepart (lua_State * L)
621 {
622 void *ud = rspamd_lua_check_udata (L, 1, "rspamd{mimepart}");
623 luaL_argcheck (L, ud != NULL, 1, "'mimepart' expected");
624 return ud ? *((struct rspamd_mime_part **)ud) : NULL;
625 }
626
627
628 static gint
lua_textpart_is_utf(lua_State * L)629 lua_textpart_is_utf (lua_State * L)
630 {
631 LUA_TRACE_POINT;
632 struct rspamd_mime_text_part *part = lua_check_textpart (L);
633
634 if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
635 lua_pushboolean (L, FALSE);
636 return 1;
637 }
638
639 lua_pushboolean (L, IS_TEXT_PART_UTF (part));
640
641 return 1;
642 }
643
644
645 static gint
lua_textpart_has_8bit_raw(lua_State * L)646 lua_textpart_has_8bit_raw (lua_State * L)
647 {
648 LUA_TRACE_POINT;
649 struct rspamd_mime_text_part *part = lua_check_textpart (L);
650
651 if (part) {
652 if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_RAW) {
653 lua_pushboolean (L, TRUE);
654 }
655 else {
656 lua_pushboolean (L, FALSE);
657 }
658 }
659 else {
660 return luaL_error (L, "invalid arguments");
661 }
662
663 return 1;
664 }
665
666 static gint
lua_textpart_has_8bit(lua_State * L)667 lua_textpart_has_8bit (lua_State * L)
668 {
669 LUA_TRACE_POINT;
670 struct rspamd_mime_text_part *part = lua_check_textpart (L);
671
672 if (part) {
673 if (part->flags & RSPAMD_MIME_TEXT_PART_FLAG_8BIT_ENCODED) {
674 lua_pushboolean (L, TRUE);
675 }
676 else {
677 lua_pushboolean (L, FALSE);
678 }
679 }
680 else {
681 return luaL_error (L, "invalid arguments");
682 }
683
684 return 1;
685 }
686
687
688 static gint
lua_textpart_get_content(lua_State * L)689 lua_textpart_get_content (lua_State * L)
690 {
691 LUA_TRACE_POINT;
692 struct rspamd_mime_text_part *part = lua_check_textpart (L);
693 struct rspamd_lua_text *t;
694 gsize len;
695 const gchar *start, *type = NULL;
696
697 if (part == NULL) {
698 lua_pushnil (L);
699 return 1;
700 }
701
702 if (lua_type (L, 2) == LUA_TSTRING) {
703 type = lua_tostring (L, 2);
704 }
705
706 if (!type) {
707 if (IS_TEXT_PART_EMPTY (part)) {
708 lua_pushnil (L);
709 return 1;
710 }
711 start = part->utf_content.begin;
712 len = part->utf_content.len;
713 }
714 else if (strcmp (type, "content") == 0) {
715 if (IS_TEXT_PART_EMPTY (part)) {
716 lua_pushnil (L);
717 return 1;
718 }
719
720 start = part->utf_content.begin;
721 len = part->utf_content.len;
722 }
723 else if (strcmp (type, "content_oneline") == 0) {
724 if (IS_TEXT_PART_EMPTY (part)) {
725 lua_pushnil (L);
726 return 1;
727 }
728
729 start = part->utf_stripped_content->data;
730 len = part->utf_stripped_content->len;
731 }
732 else if (strcmp (type, "raw_parsed") == 0) {
733 if (part->parsed.len == 0) {
734 lua_pushnil (L);
735 return 1;
736 }
737
738 start = part->parsed.begin;
739 len = part->parsed.len;
740 }
741 else if (strcmp (type, "raw_utf") == 0) {
742 if (part->utf_raw_content == NULL || part->utf_raw_content->len == 0) {
743 lua_pushnil (L);
744 return 1;
745 }
746
747 start = part->utf_raw_content->data;
748 len = part->utf_raw_content->len;
749 }
750 else if (strcmp (type, "raw") == 0) {
751 if (part->raw.len == 0) {
752 lua_pushnil (L);
753 return 1;
754 }
755
756 start = part->raw.begin;
757 len = part->raw.len;
758 }
759 else {
760 return luaL_error (L, "invalid content type: %s", type);
761 }
762
763 t = lua_newuserdata (L, sizeof (*t));
764 rspamd_lua_setclass (L, "rspamd{text}", -1);
765
766 t->start = start;
767 t->len = len;
768 t->flags = 0;
769
770 return 1;
771 }
772
773 static gint
lua_textpart_get_raw_content(lua_State * L)774 lua_textpart_get_raw_content (lua_State * L)
775 {
776 LUA_TRACE_POINT;
777 struct rspamd_mime_text_part *part = lua_check_textpart (L);
778 struct rspamd_lua_text *t;
779
780 if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
781 lua_pushnil (L);
782 return 1;
783 }
784
785 t = lua_newuserdata (L, sizeof (*t));
786 rspamd_lua_setclass (L, "rspamd{text}", -1);
787 t->start = part->raw.begin;
788 t->len = part->raw.len;
789 t->flags = 0;
790
791 return 1;
792 }
793
794 static gint
lua_textpart_get_content_oneline(lua_State * L)795 lua_textpart_get_content_oneline (lua_State * L)
796 {
797 LUA_TRACE_POINT;
798 struct rspamd_mime_text_part *part = lua_check_textpart (L);
799 struct rspamd_lua_text *t;
800
801 if (part == NULL || IS_TEXT_PART_EMPTY (part)) {
802 lua_pushnil (L);
803 return 1;
804 }
805
806 t = lua_newuserdata (L, sizeof (*t));
807 rspamd_lua_setclass (L, "rspamd{text}", -1);
808 t->start = part->utf_stripped_content->data;
809 t->len = part->utf_stripped_content->len;
810 t->flags = 0;
811
812 return 1;
813 }
814
815 static gint
lua_textpart_get_length(lua_State * L)816 lua_textpart_get_length (lua_State * L)
817 {
818 LUA_TRACE_POINT;
819 struct rspamd_mime_text_part *part = lua_check_textpart (L);
820
821 if (part == NULL) {
822 lua_pushnil (L);
823 return 1;
824 }
825
826 if (IS_TEXT_PART_EMPTY (part) || part->utf_content.len == 0) {
827 lua_pushinteger (L, 0);
828 }
829 else {
830 lua_pushinteger (L, part->utf_content.len);
831 }
832
833 return 1;
834 }
835
836 static gint
lua_textpart_get_raw_length(lua_State * L)837 lua_textpart_get_raw_length (lua_State * L)
838 {
839 LUA_TRACE_POINT;
840 struct rspamd_mime_text_part *part = lua_check_textpart (L);
841
842 if (part == NULL) {
843 lua_pushnil (L);
844 return 1;
845 }
846
847 lua_pushinteger (L, part->raw.len);
848
849 return 1;
850 }
851
852 static gint
lua_textpart_get_urls_length(lua_State * L)853 lua_textpart_get_urls_length (lua_State * L)
854 {
855 LUA_TRACE_POINT;
856 struct rspamd_mime_text_part *part = lua_check_textpart (L);
857 GList *cur;
858 guint total = 0;
859 struct rspamd_process_exception *ex;
860
861 if (part == NULL) {
862 lua_pushnil (L);
863 return 1;
864 }
865
866 for (cur = part->exceptions; cur != NULL; cur = g_list_next (cur)) {
867 ex = cur->data;
868
869 if (ex->type == RSPAMD_EXCEPTION_URL) {
870 total += ex->len;
871 }
872 }
873
874 lua_pushinteger (L, total);
875
876 return 1;
877 }
878
879 static gint
lua_textpart_get_lines_count(lua_State * L)880 lua_textpart_get_lines_count (lua_State * L)
881 {
882 LUA_TRACE_POINT;
883 struct rspamd_mime_text_part *part = lua_check_textpart (L);
884
885 if (part == NULL) {
886 lua_pushnil (L);
887 return 1;
888 }
889
890 if (IS_TEXT_PART_EMPTY (part)) {
891 lua_pushinteger (L, 0);
892 }
893 else {
894 lua_pushinteger (L, part->nlines);
895 }
896
897 return 1;
898 }
899
900 static gint
lua_textpart_get_words_count(lua_State * L)901 lua_textpart_get_words_count (lua_State *L)
902 {
903 LUA_TRACE_POINT;
904 struct rspamd_mime_text_part *part = lua_check_textpart (L);
905
906 if (part == NULL) {
907 lua_pushnil (L);
908 return 1;
909 }
910
911 if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
912 lua_pushinteger (L, 0);
913 }
914 else {
915 lua_pushinteger (L, part->nwords);
916 }
917
918 return 1;
919 }
920
921 static inline enum rspamd_lua_words_type
word_extract_type_from_string(const gchar * how_str)922 word_extract_type_from_string (const gchar *how_str)
923 {
924 enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_MAX;
925
926 if (strcmp (how_str, "stem") == 0) {
927 how = RSPAMD_LUA_WORDS_STEM;
928 }
929 else if (strcmp (how_str, "norm") == 0) {
930 how = RSPAMD_LUA_WORDS_NORM;
931 }
932 else if (strcmp (how_str, "raw") == 0) {
933 how = RSPAMD_LUA_WORDS_RAW;
934 }
935 else if (strcmp (how_str, "full") == 0) {
936 how = RSPAMD_LUA_WORDS_FULL;
937 }
938
939 return how;
940 }
941
942 static gint
lua_textpart_get_words(lua_State * L)943 lua_textpart_get_words (lua_State *L)
944 {
945 LUA_TRACE_POINT;
946 struct rspamd_mime_text_part *part = lua_check_textpart (L);
947 enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
948
949 if (part == NULL) {
950 return luaL_error (L, "invalid arguments");
951 }
952
953 if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
954 lua_createtable (L, 0, 0);
955 }
956 else {
957 if (lua_type (L, 2) == LUA_TSTRING) {
958 const gchar *how_str = lua_tostring (L, 2);
959
960 how = word_extract_type_from_string (how_str);
961
962 if (how == RSPAMD_LUA_WORDS_MAX) {
963 return luaL_error (L, "invalid extraction type: %s", how_str);
964 }
965 }
966
967 return rspamd_lua_push_words (L, part->utf_words, how);
968 }
969
970 return 1;
971 }
972
973 static gint
lua_textpart_filter_words(lua_State * L)974 lua_textpart_filter_words (lua_State *L)
975 {
976 LUA_TRACE_POINT;
977 struct rspamd_mime_text_part *part = lua_check_textpart (L);
978 struct rspamd_lua_regexp *re = lua_check_regexp (L, 2);
979 gint lim = -1;
980 enum rspamd_lua_words_type how = RSPAMD_LUA_WORDS_STEM;
981
982 if (part == NULL || re == NULL) {
983 return luaL_error (L, "invalid arguments");
984 }
985
986 if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
987 lua_createtable (L, 0, 0);
988 }
989 else {
990 if (lua_type (L, 3) == LUA_TSTRING) {
991 const gchar *how_str = lua_tostring (L, 3);
992
993 how = word_extract_type_from_string (how_str);
994
995 if (how == RSPAMD_LUA_WORDS_MAX) {
996 return luaL_error (L, "invalid extraction type: %s", how_str);
997 }
998 }
999
1000 if (lua_type (L, 4) == LUA_TNUMBER) {
1001 lim = lua_tointeger (L, 4);
1002 }
1003
1004 guint cnt, i;
1005
1006 lua_createtable (L, 8, 0);
1007
1008 for (i = 0, cnt = 1; i < part->utf_words->len; i ++) {
1009 rspamd_stat_token_t *w = &g_array_index (part->utf_words,
1010 rspamd_stat_token_t, i);
1011
1012 switch (how) {
1013 case RSPAMD_LUA_WORDS_STEM:
1014 if (w->stemmed.len > 0) {
1015 if (rspamd_regexp_match (re->re, w->stemmed.begin,
1016 w->stemmed.len, FALSE)) {
1017 lua_pushlstring (L, w->stemmed.begin, w->stemmed.len);
1018 lua_rawseti (L, -2, cnt++);
1019 }
1020 }
1021 break;
1022 case RSPAMD_LUA_WORDS_NORM:
1023 if (w->normalized.len > 0) {
1024 if (rspamd_regexp_match (re->re, w->normalized.begin,
1025 w->normalized.len, FALSE)) {
1026 lua_pushlstring (L, w->normalized.begin, w->normalized.len);
1027 lua_rawseti (L, -2, cnt++);
1028 }
1029 }
1030 break;
1031 case RSPAMD_LUA_WORDS_RAW:
1032 if (w->original.len > 0) {
1033 if (rspamd_regexp_match (re->re, w->original.begin,
1034 w->original.len, TRUE)) {
1035 lua_pushlstring (L, w->original.begin, w->original.len);
1036 lua_rawseti (L, -2, cnt++);
1037 }
1038 }
1039 break;
1040 case RSPAMD_LUA_WORDS_FULL:
1041 if (rspamd_regexp_match (re->re, w->normalized.begin,
1042 w->normalized.len, FALSE)) {
1043 rspamd_lua_push_full_word (L, w);
1044 /* Push to the resulting vector */
1045 lua_rawseti (L, -2, cnt++);
1046 }
1047 break;
1048 default:
1049 break;
1050 }
1051
1052 if (lim > 0 && cnt >= lim) {
1053 break;
1054 }
1055 }
1056 }
1057
1058 return 1;
1059 }
1060
1061 static gint
lua_textpart_is_empty(lua_State * L)1062 lua_textpart_is_empty (lua_State * L)
1063 {
1064 LUA_TRACE_POINT;
1065 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1066
1067 if (part == NULL) {
1068 lua_pushnil (L);
1069 return 1;
1070 }
1071
1072 lua_pushboolean (L, IS_TEXT_PART_EMPTY (part));
1073
1074 return 1;
1075 }
1076
1077 static gint
lua_textpart_is_html(lua_State * L)1078 lua_textpart_is_html (lua_State * L)
1079 {
1080 LUA_TRACE_POINT;
1081 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1082
1083 if (part == NULL) {
1084 lua_pushnil (L);
1085 return 1;
1086 }
1087
1088 lua_pushboolean (L, IS_TEXT_PART_HTML (part));
1089
1090 return 1;
1091 }
1092
1093 static gint
lua_textpart_get_html(lua_State * L)1094 lua_textpart_get_html (lua_State * L)
1095 {
1096 LUA_TRACE_POINT;
1097 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1098 struct html_content **phc;
1099
1100 if (part == NULL || part->html == NULL) {
1101 lua_pushnil (L);
1102 }
1103 else {
1104 phc = lua_newuserdata (L, sizeof (*phc));
1105 rspamd_lua_setclass (L, "rspamd{html}", -1);
1106 *phc = part->html;
1107 }
1108
1109 return 1;
1110 }
1111
1112 static gint
lua_textpart_get_language(lua_State * L)1113 lua_textpart_get_language (lua_State * L)
1114 {
1115 LUA_TRACE_POINT;
1116 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1117
1118 if (part != NULL) {
1119 if (part->language != NULL && part->language[0] != '\0') {
1120 lua_pushstring (L, part->language);
1121 return 1;
1122 }
1123 else {
1124 lua_pushnil (L);
1125 }
1126 }
1127 else {
1128 return luaL_error (L, "invalid arguments");
1129 }
1130
1131 return 1;
1132 }
1133
1134 static gint
lua_textpart_get_charset(lua_State * L)1135 lua_textpart_get_charset (lua_State * L)
1136 {
1137 LUA_TRACE_POINT;
1138 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1139
1140 if (part != NULL) {
1141 if (part->real_charset != NULL) {
1142 lua_pushstring (L, part->real_charset);
1143 return 1;
1144 }
1145 else {
1146 lua_pushnil (L);
1147 }
1148 }
1149 else {
1150 return luaL_error (L, "invalid arguments");
1151 }
1152
1153 return 1;
1154 }
1155
1156 static gint
lua_textpart_get_languages(lua_State * L)1157 lua_textpart_get_languages (lua_State * L)
1158 {
1159 LUA_TRACE_POINT;
1160 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1161 guint i;
1162 struct rspamd_lang_detector_res *cur;
1163
1164 if (part != NULL) {
1165 if (part->languages != NULL) {
1166 lua_createtable (L, part->languages->len, 0);
1167
1168 PTR_ARRAY_FOREACH (part->languages, i, cur) {
1169 lua_createtable (L, 0, 2);
1170 lua_pushstring (L, "code");
1171 lua_pushstring (L, cur->lang);
1172 lua_settable (L, -3);
1173 lua_pushstring (L, "prob");
1174 lua_pushnumber (L, cur->prob);
1175 lua_settable (L, -3);
1176
1177 lua_rawseti (L, -2, i + 1);
1178 }
1179 }
1180 else {
1181 lua_newtable (L);
1182 }
1183 }
1184 else {
1185 luaL_error (L, "invalid arguments");
1186 }
1187
1188 return 1;
1189 }
1190
1191 struct lua_shingle_data {
1192 guint64 hash;
1193 rspamd_ftok_t t1;
1194 rspamd_ftok_t t2;
1195 rspamd_ftok_t t3;
1196 };
1197
1198 struct lua_shingle_filter_cbdata {
1199 struct rspamd_mime_text_part *part;
1200 rspamd_mempool_t *pool;
1201 };
1202
1203 #define STORE_TOKEN(i, t) do { \
1204 if ((i) < part->utf_words->len) { \
1205 word = &g_array_index (part->utf_words, rspamd_stat_token_t, (i)); \
1206 sd->t.begin = word->stemmed.begin; \
1207 sd->t.len = word->stemmed.len; \
1208 } \
1209 }while (0)
1210
1211 static guint64
lua_shingles_filter(guint64 * input,gsize count,gint shno,const guchar * key,gpointer ud)1212 lua_shingles_filter (guint64 *input, gsize count,
1213 gint shno, const guchar *key, gpointer ud)
1214 {
1215 guint64 minimal = G_MAXUINT64;
1216 gsize i, min_idx = 0;
1217 struct lua_shingle_data *sd;
1218 rspamd_stat_token_t *word;
1219 struct lua_shingle_filter_cbdata *cbd = (struct lua_shingle_filter_cbdata *)ud;
1220 struct rspamd_mime_text_part *part;
1221
1222 part = cbd->part;
1223
1224 for (i = 0; i < count; i ++) {
1225 if (minimal > input[i]) {
1226 minimal = input[i];
1227 min_idx = i;
1228 }
1229 }
1230
1231 sd = rspamd_mempool_alloc0 (cbd->pool, sizeof (*sd));
1232 sd->hash = minimal;
1233
1234
1235 STORE_TOKEN (min_idx, t1);
1236 STORE_TOKEN (min_idx + 1, t2);
1237 STORE_TOKEN (min_idx + 2, t3);
1238
1239 return GPOINTER_TO_SIZE (sd);
1240 }
1241
1242 #undef STORE_TOKEN
1243
1244 static gint
lua_textpart_get_fuzzy_hashes(lua_State * L)1245 lua_textpart_get_fuzzy_hashes (lua_State * L)
1246 {
1247 LUA_TRACE_POINT;
1248 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1249 rspamd_mempool_t *pool = rspamd_lua_check_mempool (L, 2);
1250 guchar key[rspamd_cryptobox_HASHBYTES], digest[rspamd_cryptobox_HASHBYTES],
1251 hexdigest[rspamd_cryptobox_HASHBYTES * 2 + 1], numbuf[64];
1252 struct rspamd_shingle *sgl;
1253 guint i;
1254 struct lua_shingle_data *sd;
1255 rspamd_cryptobox_hash_state_t st;
1256 rspamd_stat_token_t *word;
1257 struct lua_shingle_filter_cbdata cbd;
1258
1259
1260 if (part == NULL || pool == NULL) {
1261 return luaL_error (L, "invalid arguments");
1262 }
1263
1264 if (IS_TEXT_PART_EMPTY (part) || part->utf_words == NULL) {
1265 lua_pushnil (L);
1266 lua_pushnil (L);
1267 }
1268 else {
1269 /* TODO: add keys and algorithms support */
1270 rspamd_cryptobox_hash (key, "rspamd", strlen ("rspamd"), NULL, 0);
1271
1272 /* TODO: add short text support */
1273
1274 /* Calculate direct hash */
1275 rspamd_cryptobox_hash_init (&st, key, rspamd_cryptobox_HASHKEYBYTES);
1276
1277 for (i = 0; i < part->utf_words->len; i ++) {
1278 word = &g_array_index (part->utf_words, rspamd_stat_token_t, i);
1279 rspamd_cryptobox_hash_update (&st,
1280 word->stemmed.begin, word->stemmed.len);
1281 }
1282
1283 rspamd_cryptobox_hash_final (&st, digest);
1284
1285 rspamd_encode_hex_buf (digest, sizeof (digest), hexdigest,
1286 sizeof (hexdigest));
1287 lua_pushlstring (L, hexdigest, sizeof (hexdigest) - 1);
1288
1289 cbd.pool = pool;
1290 cbd.part = part;
1291 sgl = rspamd_shingles_from_text (part->utf_words, key,
1292 pool, lua_shingles_filter, &cbd, RSPAMD_SHINGLES_MUMHASH);
1293
1294 if (sgl == NULL) {
1295 lua_pushnil (L);
1296 }
1297 else {
1298 lua_createtable (L, G_N_ELEMENTS (sgl->hashes), 0);
1299
1300 for (i = 0; i < G_N_ELEMENTS (sgl->hashes); i ++) {
1301 sd = GSIZE_TO_POINTER (sgl->hashes[i]);
1302
1303 lua_createtable (L, 4, 0);
1304 rspamd_snprintf (numbuf, sizeof (numbuf), "%uL", sd->hash);
1305 lua_pushstring (L, numbuf);
1306 lua_rawseti (L, -2, 1);
1307
1308 /* Tokens */
1309 lua_pushlstring (L, sd->t1.begin, sd->t1.len);
1310 lua_rawseti (L, -2, 2);
1311
1312 lua_pushlstring (L, sd->t2.begin, sd->t2.len);
1313 lua_rawseti (L, -2, 3);
1314
1315 lua_pushlstring (L, sd->t3.begin, sd->t3.len);
1316 lua_rawseti (L, -2, 4);
1317
1318 lua_rawseti (L, -2, i + 1); /* Store table */
1319 }
1320 }
1321 }
1322
1323 return 2;
1324 }
1325
1326 static gint
lua_textpart_get_mimepart(lua_State * L)1327 lua_textpart_get_mimepart (lua_State * L)
1328 {
1329 LUA_TRACE_POINT;
1330 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1331 struct rspamd_mime_part **pmime;
1332
1333 if (part != NULL) {
1334 if (part->mime_part != NULL) {
1335 pmime = lua_newuserdata (L, sizeof (struct rspamd_mime_part *));
1336 rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
1337 *pmime = part->mime_part;
1338
1339 return 1;
1340 }
1341 }
1342
1343 lua_pushnil (L);
1344 return 1;
1345 }
1346
1347 /***
1348 * @method mime_part:get_stats()
1349 * Returns a table with the following data:
1350 * -
1351 * - `lines`: number of lines
1352 * - `spaces`: number of spaces
1353 * - `double_spaces`: double spaces
1354 * - `empty_lines`: number of empty lines
1355 * - `non_ascii_characters`: number of non ascii characters
1356 * - `ascii_characters`: number of ascii characters
1357 * @return {table} table of stats
1358 */
1359 static gint
lua_textpart_get_stats(lua_State * L)1360 lua_textpart_get_stats (lua_State * L)
1361 {
1362 LUA_TRACE_POINT;
1363 struct rspamd_mime_text_part *part = lua_check_textpart (L);
1364
1365 if (part != NULL) {
1366 lua_createtable (L, 0, 9);
1367
1368 lua_pushstring (L, "lines");
1369 lua_pushinteger (L, part->nlines);
1370 lua_settable (L, -3);
1371 lua_pushstring (L, "empty_lines");
1372 lua_pushinteger (L, part->empty_lines);
1373 lua_settable (L, -3);
1374 lua_pushstring (L, "spaces");
1375 lua_pushinteger (L, part->spaces);
1376 lua_settable (L, -3);
1377 lua_pushstring (L, "non_spaces");
1378 lua_pushinteger (L, part->non_spaces);
1379 lua_settable (L, -3);
1380 lua_pushstring (L, "double_spaces");
1381 lua_pushinteger (L, part->double_spaces);
1382 lua_settable (L, -3);
1383 lua_pushstring (L, "ascii_characters");
1384 lua_pushinteger (L, part->ascii_chars);
1385 lua_settable (L, -3);
1386 lua_pushstring (L, "non_ascii_characters");
1387 lua_pushinteger (L, part->non_ascii_chars);
1388 lua_settable (L, -3);
1389 lua_pushstring (L, "capital_letters");
1390 lua_pushinteger (L, part->capital_letters);
1391 lua_settable (L, -3);
1392 lua_pushstring (L, "numeric_characters");
1393 lua_pushinteger (L, part->numeric_characters);
1394 lua_settable (L, -3);
1395 }
1396 else {
1397 return luaL_error (L, "invalid arguments");
1398 }
1399
1400 return 1;
1401 }
1402
1403 /* Mimepart implementation */
1404
1405 static gint
lua_mimepart_get_content(lua_State * L)1406 lua_mimepart_get_content (lua_State * L)
1407 {
1408 LUA_TRACE_POINT;
1409 struct rspamd_mime_part *part = lua_check_mimepart (L);
1410 struct rspamd_lua_text *t;
1411
1412 if (part == NULL) {
1413 lua_pushnil (L);
1414 return 1;
1415 }
1416
1417 t = lua_newuserdata (L, sizeof (*t));
1418 rspamd_lua_setclass (L, "rspamd{text}", -1);
1419 t->start = part->parsed_data.begin;
1420 t->len = part->parsed_data.len;
1421 t->flags = 0;
1422
1423 return 1;
1424 }
1425
1426 static gint
lua_mimepart_get_raw_content(lua_State * L)1427 lua_mimepart_get_raw_content (lua_State * L)
1428 {
1429 LUA_TRACE_POINT;
1430 struct rspamd_mime_part *part = lua_check_mimepart (L);
1431 struct rspamd_lua_text *t;
1432
1433 if (part == NULL) {
1434 lua_pushnil (L);
1435 return 1;
1436 }
1437
1438 t = lua_newuserdata (L, sizeof (*t));
1439 rspamd_lua_setclass (L, "rspamd{text}", -1);
1440 t->start = part->raw_data.begin;
1441 t->len = part->raw_data.len;
1442 t->flags = 0;
1443
1444 return 1;
1445 }
1446
1447 static gint
lua_mimepart_get_length(lua_State * L)1448 lua_mimepart_get_length (lua_State * L)
1449 {
1450 LUA_TRACE_POINT;
1451 struct rspamd_mime_part *part = lua_check_mimepart (L);
1452
1453 if (part == NULL) {
1454 lua_pushnil (L);
1455 return 1;
1456 }
1457
1458 lua_pushinteger (L, part->parsed_data.len);
1459
1460 return 1;
1461 }
1462
1463 static gint
lua_mimepart_get_type_common(lua_State * L,struct rspamd_content_type * ct,gboolean full)1464 lua_mimepart_get_type_common (lua_State * L, struct rspamd_content_type *ct,
1465 gboolean full)
1466 {
1467
1468 GHashTableIter it;
1469 gpointer k, v;
1470 struct rspamd_content_type_param *param;
1471
1472 if (ct == NULL) {
1473 lua_pushnil (L);
1474 lua_pushnil (L);
1475 return 2;
1476 }
1477
1478 lua_pushlstring (L, ct->type.begin, ct->type.len);
1479 lua_pushlstring (L, ct->subtype.begin, ct->subtype.len);
1480
1481 if (!full) {
1482 return 2;
1483 }
1484
1485 lua_createtable (L, 0, 2 + (ct->attrs ?
1486 g_hash_table_size (ct->attrs) : 0));
1487
1488 if (ct->charset.len > 0) {
1489 lua_pushstring (L, "charset");
1490 lua_pushlstring (L, ct->charset.begin, ct->charset.len);
1491 lua_settable (L, -3);
1492 }
1493
1494 if (ct->boundary.len > 0) {
1495 lua_pushstring (L, "boundary");
1496 lua_pushlstring (L, ct->boundary.begin, ct->boundary.len);
1497 lua_settable (L, -3);
1498 }
1499
1500 if (ct->attrs) {
1501 g_hash_table_iter_init (&it, ct->attrs);
1502
1503 while (g_hash_table_iter_next (&it, &k, &v)) {
1504 param = v;
1505
1506 if (param->name.len > 0 && param->value.len > 0) {
1507 /* TODO: think about multiple values here */
1508 lua_pushlstring (L, param->name.begin, param->name.len);
1509 lua_pushlstring (L, param->value.begin, param->value.len);
1510 lua_settable (L, -3);
1511 }
1512 }
1513 }
1514
1515 return 3;
1516 }
1517
1518 static gint
lua_mimepart_get_type(lua_State * L)1519 lua_mimepart_get_type (lua_State * L)
1520 {
1521 LUA_TRACE_POINT;
1522 struct rspamd_mime_part *part = lua_check_mimepart (L);
1523
1524 if (part == NULL) {
1525 return luaL_error (L, "invalid arguments");
1526 }
1527
1528 return lua_mimepart_get_type_common (L, part->ct, FALSE);
1529 }
1530
1531 static gint
lua_mimepart_get_type_full(lua_State * L)1532 lua_mimepart_get_type_full (lua_State * L)
1533 {
1534 LUA_TRACE_POINT;
1535 struct rspamd_mime_part *part = lua_check_mimepart (L);
1536
1537 if (part == NULL) {
1538 return luaL_error (L, "invalid arguments");
1539 }
1540
1541 return lua_mimepart_get_type_common (L, part->ct, TRUE);
1542 }
1543
1544 static gint
lua_mimepart_get_detected_type(lua_State * L)1545 lua_mimepart_get_detected_type (lua_State * L)
1546 {
1547 LUA_TRACE_POINT;
1548 struct rspamd_mime_part *part = lua_check_mimepart (L);
1549
1550 if (part == NULL) {
1551 return luaL_error (L, "invalid arguments");
1552 }
1553
1554 return lua_mimepart_get_type_common (L, part->detected_ct, FALSE);
1555 }
1556
1557 static gint
lua_mimepart_get_detected_type_full(lua_State * L)1558 lua_mimepart_get_detected_type_full (lua_State * L)
1559 {
1560 LUA_TRACE_POINT;
1561 struct rspamd_mime_part *part = lua_check_mimepart (L);
1562
1563 if (part == NULL) {
1564 return luaL_error (L, "invalid arguments");
1565 }
1566
1567 return lua_mimepart_get_type_common (L, part->detected_ct, TRUE);
1568 }
1569
1570 static gint
lua_mimepart_get_detected_ext(lua_State * L)1571 lua_mimepart_get_detected_ext (lua_State * L)
1572 {
1573 LUA_TRACE_POINT;
1574 struct rspamd_mime_part *part = lua_check_mimepart (L);
1575
1576 if (part == NULL) {
1577 return luaL_error (L, "invalid arguments");
1578 }
1579
1580 if (part->detected_ext) {
1581 lua_pushstring (L, part->detected_ext);
1582 }
1583 else {
1584 lua_pushnil (L);
1585 }
1586
1587 return 1;
1588 }
1589
1590 static gint
lua_mimepart_get_cte(lua_State * L)1591 lua_mimepart_get_cte (lua_State * L)
1592 {
1593 LUA_TRACE_POINT;
1594 struct rspamd_mime_part *part = lua_check_mimepart (L);
1595
1596 if (part == NULL) {
1597 lua_pushnil (L);
1598 return 1;
1599 }
1600
1601 lua_pushstring (L, rspamd_cte_to_string (part->cte));
1602
1603 return 1;
1604 }
1605
1606 static gint
lua_mimepart_get_filename(lua_State * L)1607 lua_mimepart_get_filename (lua_State * L)
1608 {
1609 LUA_TRACE_POINT;
1610 struct rspamd_mime_part *part = lua_check_mimepart (L);
1611
1612 if (part == NULL || part->cd == NULL || part->cd->filename.len == 0) {
1613 lua_pushnil (L);
1614 return 1;
1615 }
1616
1617 lua_pushlstring (L, part->cd->filename.begin, part->cd->filename.len);
1618
1619 return 1;
1620 }
1621
1622 static gint
lua_mimepart_get_boundary(lua_State * L)1623 lua_mimepart_get_boundary (lua_State * L)
1624 {
1625 LUA_TRACE_POINT;
1626 struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
1627
1628 if (part == NULL) {
1629 return luaL_error (L, "invalid arguments");
1630 }
1631
1632 if (IS_PART_MULTIPART (part)) {
1633 lua_pushlstring (L, part->specific.mp->boundary.begin,
1634 part->specific.mp->boundary.len);
1635 }
1636 else {
1637 parent = part->parent_part;
1638
1639 if (!parent || !IS_PART_MULTIPART (parent)) {
1640 lua_pushnil (L);
1641 }
1642 else {
1643 lua_pushlstring (L, parent->specific.mp->boundary.begin,
1644 parent->specific.mp->boundary.len);
1645 }
1646 }
1647
1648 return 1;
1649 }
1650
1651 static gint
lua_mimepart_get_enclosing_boundary(lua_State * L)1652 lua_mimepart_get_enclosing_boundary (lua_State * L)
1653 {
1654 LUA_TRACE_POINT;
1655 struct rspamd_mime_part *part = lua_check_mimepart (L), *parent;
1656
1657 if (part == NULL) {
1658 return luaL_error (L, "invalid arguments");
1659 }
1660
1661 parent = part->parent_part;
1662
1663 if (!parent || !IS_PART_MULTIPART (parent)) {
1664 lua_pushnil (L);
1665 }
1666 else {
1667 lua_pushlstring (L, parent->specific.mp->boundary.begin,
1668 parent->specific.mp->boundary.len);
1669 }
1670
1671 return 1;
1672 }
1673
1674 static gint
lua_mimepart_get_header_common(lua_State * L,enum rspamd_lua_task_header_type how)1675 lua_mimepart_get_header_common (lua_State *L, enum rspamd_lua_task_header_type how)
1676 {
1677 struct rspamd_mime_part *part = lua_check_mimepart (L);
1678 const gchar *name;
1679 gboolean strong = FALSE;
1680
1681 name = luaL_checkstring (L, 2);
1682
1683 if (name && part) {
1684
1685 if (lua_isboolean (L, 3)) {
1686 strong = lua_toboolean (L, 3);
1687 }
1688
1689 return rspamd_lua_push_header_array (L,
1690 name,
1691 rspamd_message_get_header_from_hash(part->raw_headers, name, FALSE),
1692 how,
1693 strong);
1694 }
1695
1696 lua_pushnil (L);
1697
1698 return 1;
1699 }
1700
1701 static gint
lua_mimepart_get_header_full(lua_State * L)1702 lua_mimepart_get_header_full (lua_State * L)
1703 {
1704 LUA_TRACE_POINT;
1705 return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_FULL);
1706 }
1707
1708 static gint
lua_mimepart_get_header(lua_State * L)1709 lua_mimepart_get_header (lua_State * L)
1710 {
1711 LUA_TRACE_POINT;
1712 return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_SIMPLE);
1713 }
1714
1715 static gint
lua_mimepart_get_header_raw(lua_State * L)1716 lua_mimepart_get_header_raw (lua_State * L)
1717 {
1718 LUA_TRACE_POINT;
1719 return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_RAW);
1720 }
1721
1722 static gint
lua_mimepart_get_header_count(lua_State * L)1723 lua_mimepart_get_header_count (lua_State * L)
1724 {
1725 LUA_TRACE_POINT;
1726 return lua_mimepart_get_header_common (L, RSPAMD_TASK_HEADER_PUSH_COUNT);
1727 }
1728
1729 static gint
lua_mimepart_get_raw_headers(lua_State * L)1730 lua_mimepart_get_raw_headers (lua_State *L)
1731 {
1732 LUA_TRACE_POINT;
1733 struct rspamd_mime_part *part = lua_check_mimepart (L);
1734 struct rspamd_lua_text *t;
1735
1736 if (part) {
1737 t = lua_newuserdata (L, sizeof (*t));
1738 rspamd_lua_setclass (L, "rspamd{text}", -1);
1739 t->start = part->raw_headers_str;
1740 t->len = part->raw_headers_len;
1741 t->flags = 0;
1742 }
1743 else {
1744 return luaL_error (L, "invalid arguments");
1745 }
1746
1747
1748 return 1;
1749 }
1750
1751 static gint
lua_mimepart_get_headers(lua_State * L)1752 lua_mimepart_get_headers (lua_State *L)
1753 {
1754 LUA_TRACE_POINT;
1755 struct rspamd_mime_part *part = lua_check_mimepart (L);
1756 bool need_modified = lua_isnoneornil(L, 2) ? false : lua_toboolean(L, 2);
1757
1758 if (part) {
1759 struct rspamd_mime_header *cur;
1760 int i = 1;
1761
1762 lua_createtable (L, rspamd_mime_headers_count(part->raw_headers), 0);
1763 LL_FOREACH2(part->headers_order, cur, ord_next) {
1764 if (need_modified && cur->modified_chain) {
1765 struct rspamd_mime_header *cur_modified;
1766
1767 LL_FOREACH(cur->modified_chain, cur_modified) {
1768 rspamd_lua_push_header(L, cur_modified, RSPAMD_TASK_HEADER_PUSH_FULL);
1769 lua_rawseti(L, -2, i++);
1770 }
1771 }
1772 else {
1773 rspamd_lua_push_header(L, cur, RSPAMD_TASK_HEADER_PUSH_FULL);
1774 lua_rawseti(L, -2, i++);
1775 }
1776
1777 }
1778 }
1779 else {
1780 return luaL_error (L, "invalid arguments");
1781 }
1782
1783
1784 return 1;
1785 }
1786
1787
1788 static gint
lua_mimepart_is_image(lua_State * L)1789 lua_mimepart_is_image (lua_State * L)
1790 {
1791 LUA_TRACE_POINT;
1792 struct rspamd_mime_part *part = lua_check_mimepart (L);
1793
1794 if (part == NULL) {
1795 return luaL_error (L, "invalid arguments");
1796 }
1797
1798 lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_IMAGE);
1799
1800 return 1;
1801 }
1802
1803 static gint
lua_mimepart_is_archive(lua_State * L)1804 lua_mimepart_is_archive (lua_State * L)
1805 {
1806 LUA_TRACE_POINT;
1807 struct rspamd_mime_part *part = lua_check_mimepart (L);
1808
1809 if (part == NULL) {
1810 return luaL_error (L, "invalid arguments");
1811 }
1812
1813 lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_ARCHIVE);
1814
1815 return 1;
1816 }
1817
1818 static gint
lua_mimepart_is_multipart(lua_State * L)1819 lua_mimepart_is_multipart (lua_State * L)
1820 {
1821 LUA_TRACE_POINT;
1822 struct rspamd_mime_part *part = lua_check_mimepart (L);
1823
1824 if (part == NULL) {
1825 return luaL_error (L, "invalid arguments");
1826 }
1827
1828 lua_pushboolean (L, IS_PART_MULTIPART (part) ? true : false);
1829
1830 return 1;
1831 }
1832
1833 static gint
lua_mimepart_is_message(lua_State * L)1834 lua_mimepart_is_message (lua_State * L)
1835 {
1836 LUA_TRACE_POINT;
1837 struct rspamd_mime_part *part = lua_check_mimepart (L);
1838
1839 if (part == NULL) {
1840 return luaL_error (L, "invalid arguments");
1841 }
1842
1843 lua_pushboolean (L, IS_PART_MESSAGE (part) ? true : false);
1844
1845 return 1;
1846 }
1847
1848 static gint
lua_mimepart_is_attachment(lua_State * L)1849 lua_mimepart_is_attachment (lua_State * L)
1850 {
1851 LUA_TRACE_POINT;
1852 struct rspamd_mime_part *part = lua_check_mimepart (L);
1853
1854 if (part == NULL) {
1855 return luaL_error (L, "invalid arguments");
1856 }
1857
1858 if (part->cd && part->cd->type == RSPAMD_CT_ATTACHMENT) {
1859 lua_pushboolean (L, true);
1860 }
1861 else {
1862 /* if has_name and not (image and Content-ID_header_present) */
1863 if (part->cd && part->cd->filename.len > 0) {
1864 if (part->part_type != RSPAMD_MIME_PART_IMAGE &&
1865 rspamd_message_get_header_from_hash(part->raw_headers,
1866 "Content-Id", FALSE) == NULL) {
1867 /* Filename is presented but no content id and not image */
1868 lua_pushboolean (L, true);
1869 }
1870 else {
1871 /* Image or an embeded object */
1872 lua_pushboolean (L, false);
1873 }
1874 }
1875 else {
1876 /* No filename */
1877 lua_pushboolean (L, false);
1878 }
1879 }
1880
1881 return 1;
1882 }
1883
1884 static gint
lua_mimepart_is_text(lua_State * L)1885 lua_mimepart_is_text (lua_State * L)
1886 {
1887 LUA_TRACE_POINT;
1888 struct rspamd_mime_part *part = lua_check_mimepart (L);
1889
1890 if (part == NULL) {
1891 return luaL_error (L, "invalid arguments");
1892 }
1893
1894 lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_TEXT);
1895
1896 return 1;
1897 }
1898
1899 static gint
lua_mimepart_is_broken(lua_State * L)1900 lua_mimepart_is_broken (lua_State * L)
1901 {
1902 LUA_TRACE_POINT;
1903 struct rspamd_mime_part *part = lua_check_mimepart (L);
1904
1905 if (part == NULL) {
1906 return luaL_error (L, "invalid arguments");
1907 }
1908
1909 if (part->ct) {
1910 lua_pushboolean (L, (part->ct->flags & RSPAMD_CONTENT_TYPE_BROKEN) ?
1911 true : false);
1912 }
1913 else {
1914 lua_pushboolean (L, false);
1915 }
1916
1917 return 1;
1918 }
1919
1920 static gint
lua_mimepart_get_image(lua_State * L)1921 lua_mimepart_get_image (lua_State * L)
1922 {
1923 LUA_TRACE_POINT;
1924 struct rspamd_mime_part *part = lua_check_mimepart (L);
1925 struct rspamd_image **pimg;
1926
1927 if (part == NULL) {
1928 return luaL_error (L, "invalid arguments");
1929 }
1930
1931 if (part->part_type != RSPAMD_MIME_PART_IMAGE || part->specific.img == NULL) {
1932 lua_pushnil (L);
1933 }
1934 else {
1935 pimg = lua_newuserdata (L, sizeof (*pimg));
1936 *pimg = part->specific.img;
1937 rspamd_lua_setclass (L, "rspamd{image}", -1);
1938 }
1939
1940 return 1;
1941 }
1942
1943 static gint
lua_mimepart_get_archive(lua_State * L)1944 lua_mimepart_get_archive (lua_State * L)
1945 {
1946 LUA_TRACE_POINT;
1947 struct rspamd_mime_part *part = lua_check_mimepart (L);
1948 struct rspamd_archive **parch;
1949
1950 if (part == NULL) {
1951 return luaL_error (L, "invalid arguments");
1952 }
1953
1954 if (part->part_type != RSPAMD_MIME_PART_ARCHIVE || part->specific.arch == NULL) {
1955 lua_pushnil (L);
1956 }
1957 else {
1958 parch = lua_newuserdata (L, sizeof (*parch));
1959 *parch = part->specific.arch;
1960 rspamd_lua_setclass (L, "rspamd{archive}", -1);
1961 }
1962
1963 return 1;
1964 }
1965
1966 static gint
lua_mimepart_get_children(lua_State * L)1967 lua_mimepart_get_children (lua_State * L)
1968 {
1969 LUA_TRACE_POINT;
1970 struct rspamd_mime_part *part = lua_check_mimepart (L);
1971 struct rspamd_mime_part **pcur, *cur;
1972 guint i;
1973
1974 if (part == NULL) {
1975 return luaL_error (L, "invalid arguments");
1976 }
1977
1978 if (!IS_PART_MULTIPART (part) || part->specific.mp->children == NULL) {
1979 lua_pushnil (L);
1980 }
1981 else {
1982 lua_createtable (L, part->specific.mp->children->len, 0);
1983
1984 PTR_ARRAY_FOREACH (part->specific.mp->children, i, cur) {
1985 pcur = lua_newuserdata (L, sizeof (*pcur));
1986 *pcur = cur;
1987 rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
1988 lua_rawseti (L, -2, i + 1);
1989 }
1990 }
1991
1992 return 1;
1993 }
1994
1995 static gint
lua_mimepart_get_parent(lua_State * L)1996 lua_mimepart_get_parent (lua_State * L)
1997 {
1998 LUA_TRACE_POINT;
1999 struct rspamd_mime_part *part = lua_check_mimepart (L);
2000 struct rspamd_mime_part **pparent;
2001
2002 if (part == NULL) {
2003 return luaL_error (L, "invalid arguments");
2004 }
2005
2006 if (part->parent_part) {
2007 pparent = lua_newuserdata (L, sizeof (*pparent));
2008 *pparent = part->parent_part;
2009 rspamd_lua_setclass (L, "rspamd{mimepart}", -1);
2010 }
2011 else {
2012 lua_pushnil (L);
2013 }
2014
2015 return 1;
2016 }
2017
2018
2019 static gint
lua_mimepart_get_text(lua_State * L)2020 lua_mimepart_get_text (lua_State * L)
2021 {
2022 LUA_TRACE_POINT;
2023 struct rspamd_mime_part *part = lua_check_mimepart (L);
2024 struct rspamd_mime_text_part **ppart;
2025
2026 if (part == NULL) {
2027 return luaL_error (L, "invalid arguments");
2028 }
2029
2030 if (part->part_type != RSPAMD_MIME_PART_TEXT || part->specific.txt == NULL) {
2031 lua_pushnil (L);
2032 }
2033 else {
2034 ppart = lua_newuserdata (L, sizeof (*ppart));
2035 *ppart = part->specific.txt;
2036 rspamd_lua_setclass (L, "rspamd{textpart}", -1);
2037 }
2038
2039 return 1;
2040 }
2041
2042 static gint
lua_mimepart_get_digest(lua_State * L)2043 lua_mimepart_get_digest (lua_State * L)
2044 {
2045 LUA_TRACE_POINT;
2046 struct rspamd_mime_part *part = lua_check_mimepart (L);
2047 gchar digestbuf[rspamd_cryptobox_HASHBYTES * 2 + 1];
2048
2049 if (part == NULL) {
2050 return luaL_error (L, "invalid arguments");
2051 }
2052
2053 memset (digestbuf, 0, sizeof (digestbuf));
2054 rspamd_encode_hex_buf (part->digest, sizeof (part->digest),
2055 digestbuf, sizeof (digestbuf));
2056 lua_pushstring (L, digestbuf);
2057
2058 return 1;
2059 }
2060
2061 static gint
lua_mimepart_get_id(lua_State * L)2062 lua_mimepart_get_id (lua_State * L)
2063 {
2064 LUA_TRACE_POINT;
2065 struct rspamd_mime_part *part = lua_check_mimepart (L);
2066
2067 if (part == NULL) {
2068 return luaL_error (L, "invalid arguments");
2069 }
2070
2071 lua_pushinteger (L, part->part_number);
2072
2073 return 1;
2074 }
2075
2076 static gint
lua_mimepart_headers_foreach(lua_State * L)2077 lua_mimepart_headers_foreach (lua_State *L)
2078 {
2079 LUA_TRACE_POINT;
2080 struct rspamd_mime_part *part = lua_check_mimepart (L);
2081 enum rspamd_lua_task_header_type how = RSPAMD_TASK_HEADER_PUSH_SIMPLE;
2082 struct rspamd_lua_regexp *re = NULL;
2083 struct rspamd_mime_header *hdr, *cur;
2084 gint old_top;
2085
2086 if (part && lua_isfunction (L, 2)) {
2087 if (lua_istable (L, 3)) {
2088 lua_pushstring (L, "full");
2089 lua_gettable (L, 3);
2090
2091 if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
2092 how = RSPAMD_TASK_HEADER_PUSH_FULL;
2093 }
2094
2095 lua_pop (L, 1);
2096
2097 lua_pushstring (L, "raw");
2098 lua_gettable (L, 3);
2099
2100 if (lua_isboolean (L, -1) && lua_toboolean (L, -1)) {
2101 how = RSPAMD_TASK_HEADER_PUSH_RAW;
2102 }
2103
2104 lua_pop (L, 1);
2105
2106 lua_pushstring (L, "regexp");
2107 lua_gettable (L, 3);
2108
2109 if (lua_isuserdata (L, -1)) {
2110 RSPAMD_LUA_CHECK_UDATA_PTR_OR_RETURN(L, -1, "rspamd{regexp}",
2111 struct rspamd_lua_regexp, re);
2112 }
2113
2114 lua_pop (L, 1);
2115 }
2116
2117 if (part->headers_order) {
2118 hdr = part->headers_order;
2119
2120 LL_FOREACH2 (hdr, cur, ord_next) {
2121 if (re && re->re) {
2122 if (!rspamd_regexp_match (re->re, cur->name,
2123 strlen (cur->name),FALSE)) {
2124 continue;
2125 }
2126 }
2127
2128 old_top = lua_gettop (L);
2129 lua_pushvalue (L, 2);
2130 lua_pushstring (L, cur->name);
2131 rspamd_lua_push_header (L, cur, how);
2132
2133 if (lua_pcall (L, 2, LUA_MULTRET, 0) != 0) {
2134 msg_err ("call to header_foreach failed: %s",
2135 lua_tostring (L, -1));
2136 lua_settop (L, old_top);
2137 break;
2138 }
2139 else {
2140 if (lua_gettop (L) > old_top) {
2141 if (lua_isboolean (L, old_top + 1)) {
2142 if (lua_toboolean (L, old_top + 1)) {
2143 lua_settop (L, old_top);
2144 break;
2145 }
2146 }
2147 }
2148 }
2149
2150 lua_settop (L, old_top);
2151 }
2152 }
2153 }
2154
2155 return 0;
2156 }
2157
2158 static gint
lua_mimepart_get_specific(lua_State * L)2159 lua_mimepart_get_specific (lua_State * L)
2160 {
2161 LUA_TRACE_POINT;
2162 struct rspamd_mime_part *part = lua_check_mimepart (L);
2163
2164 if (part == NULL) {
2165 return luaL_error (L, "invalid arguments");
2166 }
2167
2168 if (part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
2169 lua_pushnil (L);
2170 }
2171 else {
2172 lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2173 }
2174
2175 return 1;
2176 }
2177
2178 static gint
lua_mimepart_get_urls(lua_State * L)2179 lua_mimepart_get_urls (lua_State * L)
2180 {
2181 LUA_TRACE_POINT;
2182 struct rspamd_mime_part *part = lua_check_mimepart (L);
2183
2184 if (part == NULL) {
2185 return luaL_error (L, "invalid arguments");
2186 }
2187
2188 struct lua_tree_cb_data cb;
2189 struct rspamd_url *u;
2190 static const gint default_protocols_mask = PROTOCOL_HTTP|PROTOCOL_HTTPS|
2191 PROTOCOL_FILE|PROTOCOL_FTP;
2192 gsize sz, max_urls = 0, i;
2193
2194 if (part->urls == NULL) {
2195 lua_newtable (L);
2196
2197 return 1;
2198 }
2199
2200 if (!lua_url_cbdata_fill (L, 2, &cb, default_protocols_mask,
2201 ~(0), max_urls)) {
2202 return luaL_error (L, "invalid arguments");
2203 }
2204
2205 sz = part->urls->len;
2206
2207 lua_createtable (L, sz, 0);
2208
2209 PTR_ARRAY_FOREACH (part->urls, i, u) {
2210 lua_tree_url_callback (u, u, &cb);
2211 }
2212
2213 lua_url_cbdata_dtor (&cb);
2214
2215 return 1;
2216 }
2217
2218 static gint
lua_mimepart_is_specific(lua_State * L)2219 lua_mimepart_is_specific (lua_State * L)
2220 {
2221 LUA_TRACE_POINT;
2222 struct rspamd_mime_part *part = lua_check_mimepart (L);
2223
2224 if (part == NULL) {
2225 return luaL_error (L, "invalid arguments");
2226 }
2227
2228 lua_pushboolean (L, part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA);
2229
2230 return 1;
2231 }
2232
2233 static gint
lua_mimepart_set_specific(lua_State * L)2234 lua_mimepart_set_specific (lua_State * L)
2235 {
2236 LUA_TRACE_POINT;
2237 struct rspamd_mime_part *part = lua_check_mimepart (L);
2238
2239 if (part == NULL || lua_isnil (L, 2)) {
2240 return luaL_error (L, "invalid arguments");
2241 }
2242
2243 if (part->part_type != RSPAMD_MIME_PART_UNDEFINED &&
2244 part->part_type != RSPAMD_MIME_PART_CUSTOM_LUA) {
2245 return luaL_error (L,
2246 "internal error: trying to set specific lua content on part of type %d",
2247 part->part_type);
2248 }
2249
2250 if (part->part_type == RSPAMD_MIME_PART_CUSTOM_LUA) {
2251 /* Push old specific data */
2252 lua_rawgeti (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2253 luaL_unref (L, LUA_REGISTRYINDEX, part->specific.lua_specific.cbref);
2254 }
2255 else {
2256 part->part_type = RSPAMD_MIME_PART_CUSTOM_LUA;
2257 lua_pushnil (L);
2258 }
2259
2260 /* Now, we push argument on the position 2 and save its reference */
2261 lua_pushvalue (L, 2);
2262 part->specific.lua_specific.cbref = luaL_ref (L, LUA_REGISTRYINDEX);
2263 /* Now stack has just a return value as luaL_ref removes value from stack */
2264
2265 gint ltype = lua_type (L, 2);
2266
2267 switch (ltype) {
2268 case LUA_TTABLE:
2269 part->specific.lua_specific.type = RSPAMD_LUA_PART_TABLE;
2270 break;
2271 case LUA_TSTRING:
2272 part->specific.lua_specific.type = RSPAMD_LUA_PART_STRING;
2273 break;
2274 case LUA_TUSERDATA:
2275 if (rspamd_lua_check_udata_maybe (L, 2, "rspamd{text}")) {
2276 part->specific.lua_specific.type = RSPAMD_LUA_PART_TEXT;
2277 }
2278 else {
2279 part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
2280 }
2281 break;
2282 case LUA_TFUNCTION:
2283 part->specific.lua_specific.type = RSPAMD_LUA_PART_FUNCTION;
2284 break;
2285 default:
2286 part->specific.lua_specific.type = RSPAMD_LUA_PART_UNKNOWN;
2287 break;
2288 }
2289
2290 return 1;
2291 }
2292
2293 void
luaopen_textpart(lua_State * L)2294 luaopen_textpart (lua_State * L)
2295 {
2296 rspamd_lua_new_class (L, "rspamd{textpart}", textpartlib_m);
2297 lua_pop (L, 1);
2298 }
2299
2300 void
luaopen_mimepart(lua_State * L)2301 luaopen_mimepart (lua_State * L)
2302 {
2303 rspamd_lua_new_class (L, "rspamd{mimepart}", mimepartlib_m);
2304 lua_pop (L, 1);
2305 }
2306
2307