1 /*- 2 * Copyright 2020 Vsevolod Stakhov 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef RSPAMD_LUA_PARSERS_H 18 #define RSPAMD_LUA_PARSERS_H 19 20 #include "lua_common.h" 21 22 /*** 23 * @function parsers.tokenize_text(input[, exceptions]) 24 * Create tokens from a text using optional exceptions list 25 * @param {text/string} input input data 26 * @param {table} exceptions, a table of pairs containing <start_pos,length> of exceptions in the input 27 * @return {table/strings} list of strings representing words in the text 28 */ 29 LUA_PUBLIC_FUNCTION_DEF (parsers, tokenize_text); 30 31 /*** 32 * @function parsers.parse_html(input) 33 * Parses HTML and returns the according text 34 * @param {string|text} in input HTML 35 * @return {rspamd_text} processed text with no HTML tags 36 */ 37 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_html); 38 39 /*** 40 * @function parsers.parse_mail_address(str, [pool]) 41 * Parses email address and returns a table of tables in the following format: 42 * 43 * - `raw` - the original value without any processing 44 * - `name` - name of internet address in UTF8, e.g. for `Vsevolod Stakhov <blah@foo.com>` it returns `Vsevolod Stakhov` 45 * - `addr` - address part of the address 46 * - `user` - user part (if present) of the address, e.g. `blah` 47 * - `domain` - domain part (if present), e.g. `foo.com` 48 * - `flags` - table with following keys set to true if given condition fulfilled: 49 * - [valid] - valid SMTP address in conformity with https://tools.ietf.org/html/rfc5321#section-4.1. 50 * - [ip] - domain is IPv4/IPv6 address 51 * - [braced] - angled `<blah@foo.com>` address 52 * - [quoted] - quoted user part 53 * - [empty] - empty address 54 * - [backslash] - user part contains backslash 55 * - [8bit] - contains 8bit characters 56 * 57 * @param {string} str input string 58 * @param {rspamd_mempool} pool memory pool to use 59 * @return {table/tables} parsed list of mail addresses 60 */ 61 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_mail_address); 62 63 /*** 64 * @function parsers.parse_content_type(ct_string, mempool) 65 * Parses content-type string to a table: 66 * - `type` 67 * - `subtype` 68 * - `charset` 69 * - `boundary` 70 * - other attributes 71 * 72 * @param {string} ct_string content type as string 73 * @param {rspamd_mempool} mempool needed to store temporary data (e.g. task pool) 74 * @return table or nil if cannot parse content type 75 */ 76 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_content_type); 77 78 /*** 79 * @function parsers.parse_smtp_date(str[, local_tz]) 80 * Converts an SMTP date string to unix timestamp 81 * @param {string} str input string 82 * @param {boolean} local_tz convert to local tz if `true` 83 * @return {number} time as unix timestamp (converted to float) 84 */ 85 LUA_PUBLIC_FUNCTION_DEF (parsers, parse_smtp_date); 86 87 88 #endif //RSPAMD_LUA_PARSERS_H 89