1 /*- 2 * Copyright 2016 Vsevolod Stakhov 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef STAT_API_H_ 17 #define STAT_API_H_ 18 19 #include "config.h" 20 #include "task.h" 21 #include "lua/lua_common.h" 22 #include "contrib/libev/ev.h" 23 24 #ifdef __cplusplus 25 extern "C" { 26 #endif 27 28 /** 29 * @file stat_api.h 30 * High level statistics API 31 */ 32 33 #define RSPAMD_STAT_TOKEN_FLAG_TEXT (1u << 0) 34 #define RSPAMD_STAT_TOKEN_FLAG_META (1u << 1) 35 #define RSPAMD_STAT_TOKEN_FLAG_LUA_META (1u << 2) 36 #define RSPAMD_STAT_TOKEN_FLAG_EXCEPTION (1u << 3) 37 #define RSPAMD_STAT_TOKEN_FLAG_HEADER (1u << 4) 38 #define RSPAMD_STAT_TOKEN_FLAG_UNIGRAM (1u << 5) 39 #define RSPAMD_STAT_TOKEN_FLAG_UTF (1u << 6) 40 #define RSPAMD_STAT_TOKEN_FLAG_NORMALISED (1u << 7) 41 #define RSPAMD_STAT_TOKEN_FLAG_STEMMED (1u << 8) 42 #define RSPAMD_STAT_TOKEN_FLAG_BROKEN_UNICODE (1u << 9) 43 #define RSPAMD_STAT_TOKEN_FLAG_STOP_WORD (1u << 10) 44 #define RSPAMD_STAT_TOKEN_FLAG_SKIPPED (1u << 11) 45 #define RSPAMD_STAT_TOKEN_FLAG_INVISIBLE_SPACES (1u << 12) 46 #define RSPAMD_STAT_TOKEN_FLAG_EMOJI (1u << 13) 47 48 typedef struct rspamd_stat_token_s { 49 rspamd_ftok_t original; /* utf8 raw */ 50 rspamd_ftok_unicode_t unicode; /* array of unicode characters, normalized, lowercased */ 51 rspamd_ftok_t normalized; /* normalized and lowercased utf8 */ 52 rspamd_ftok_t stemmed; /* stemmed utf8 */ 53 guint flags; 54 } rspamd_stat_token_t; 55 56 typedef struct token_node_s { 57 guint64 data; 58 guint window_idx; 59 guint flags; 60 rspamd_stat_token_t *t1; 61 rspamd_stat_token_t *t2; 62 float values[]; 63 } rspamd_token_t; 64 65 struct rspamd_stat_ctx; 66 67 /** 68 * The results of statistics processing: 69 * - error 70 * - need to do additional job for processing 71 * - all processed 72 */ 73 typedef enum rspamd_stat_result_e { 74 RSPAMD_STAT_PROCESS_ERROR = 0, 75 RSPAMD_STAT_PROCESS_DELAYED = 1, 76 RSPAMD_STAT_PROCESS_OK 77 } rspamd_stat_result_t; 78 79 /** 80 * Initialise statistics modules 81 * @param cfg 82 */ 83 void rspamd_stat_init (struct rspamd_config *cfg, struct ev_loop *ev_base); 84 85 /** 86 * Finalize statistics 87 */ 88 void rspamd_stat_close (void); 89 90 /** 91 * Tokenize task 92 * @param st_ctx 93 * @param task 94 */ 95 void rspamd_stat_process_tokenize (struct rspamd_stat_ctx *st_ctx, 96 struct rspamd_task *task); 97 98 /** 99 * Classify the task specified and insert symbols if needed 100 * @param task 101 * @param L lua state 102 * @param err error returned 103 * @return TRUE if task has been classified 104 */ 105 rspamd_stat_result_t rspamd_stat_classify (struct rspamd_task *task, 106 lua_State *L, guint stage, GError **err); 107 108 109 /** 110 * Check if a task should be learned and set the appropriate flags for it 111 * @param task 112 * @return 113 */ 114 gboolean rspamd_stat_check_autolearn (struct rspamd_task *task); 115 116 /** 117 * Learn task as spam or ham, task must be processed prior to this call 118 * @param task task to learn 119 * @param spam if TRUE learn spam, otherwise learn ham 120 * @param L lua state 121 * @param classifier NULL to learn all classifiers, name to learn a specific one 122 * @param err error returned 123 * @return TRUE if task has been learned 124 */ 125 rspamd_stat_result_t rspamd_stat_learn (struct rspamd_task *task, 126 gboolean spam, lua_State *L, const gchar *classifier, 127 guint stage, 128 GError **err); 129 130 /** 131 * Get the overall statistics for all statfile backends 132 * @param cfg configuration 133 * @param total_learns the total number of learns is stored here 134 * @return array of statistical information 135 */ 136 rspamd_stat_result_t rspamd_stat_statistics (struct rspamd_task *task, 137 struct rspamd_config *cfg, 138 guint64 *total_learns, 139 ucl_object_t **res); 140 141 void rspamd_stat_unload (void); 142 143 #ifdef __cplusplus 144 } 145 #endif 146 147 #endif /* STAT_API_H_ */ 148