1 /*-
2  * Copyright 2020 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef RSPAMD_LUA_PARSERS_H
18 #define RSPAMD_LUA_PARSERS_H
19 
20 #include "lua_common.h"
21 
22 /***
23  * @function parsers.tokenize_text(input[, exceptions])
24  * Create tokens from a text using optional exceptions list
25  * @param {text/string} input input data
26  * @param {table} exceptions, a table of pairs containing <start_pos,length> of exceptions in the input
27  * @return {table/strings} list of strings representing words in the text
28  */
29 LUA_PUBLIC_FUNCTION_DEF (parsers, tokenize_text);
30 
31 /***
32  * @function parsers.parse_html(input)
33  * Parses HTML and returns the according text
34  * @param {string|text} in input HTML
35  * @return {rspamd_text} processed text with no HTML tags
36  */
37 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_html);
38 
39 /***
40  * @function parsers.parse_mail_address(str, [pool])
41  * Parses email address and returns a table of tables in the following format:
42  *
43  * - `raw` - the original value without any processing
44  * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov`
45  * - `addr` - address part of the address
46  * - `user` - user part (if present) of the address, e.g. `blah`
47  * - `domain` - domain part (if present), e.g. `foo.com`
48  * - `flags` - table with following keys set to true if given condition fulfilled:
49  *   - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1.
50  *   - [ip] - domain is IPv4/IPv6 address
51  *   - [braced] - angled `<blah@foo.com>` address
52  *   - [quoted] - quoted user part
53  *   - [empty] - empty address
54  *   - [backslash] - user part contains backslash
55  *   - [8bit] - contains 8bit characters
56  *
57  * @param {string} str input string
58  * @param {rspamd_mempool} pool memory pool to use
59  * @return {table/tables} parsed list of mail addresses
60  */
61 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_mail_address);
62 
63 /***
64  *  @function parsers.parse_content_type(ct_string, mempool)
65  * Parses content-type string to a table:
66  * - `type`
67  * - `subtype`
68  * - `charset`
69  * - `boundary`
70  * - other attributes
71  *
72  * @param {string} ct_string content type as string
73  * @param {rspamd_mempool} mempool needed to store temporary data (e.g. task pool)
74  * @return table or nil if cannot parse content type
75  */
76 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_content_type);
77 
78 /***
79  * @function parsers.parse_smtp_date(str[, local_tz])
80  * Converts an SMTP date string to unix timestamp
81  * @param {string} str input string
82  * @param {boolean} local_tz convert to local tz if `true`
83  * @return {number} time as unix timestamp (converted to float)
84  */
85 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_smtp_date);
86 
87 
88 #endif //RSPAMD_LUA_PARSERS_H
89