1/* This file is part of Mailfromd. -*- c -*- 2 Copyright (C) 2006-2021 Sergey Poznyakoff 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17MF_BUILTIN_MODULE 18MF_COND(WITH_DSPAM) 19 20#include "srvcfg.h" 21#undef HAVE_CONFIG_H 22#define CONFIG_DEFAULT "/dev/null" 23#define LOGDIR "/dev/null" 24#include <libdspam.h> 25#include "mflib/dspam.h" 26#include "msg.h" 27 28/* User parameters */ 29MF_VAR(dspam_user, STRING, SYM_PRECIOUS); 30MF_VAR(dspam_group, STRING, SYM_PRECIOUS); 31MF_VAR(dspam_config, STRING, SYM_PRECIOUS); 32MF_VAR(dspam_profile, STRING, SYM_PRECIOUS); 33/* Output variables */ 34MF_VAR(dspam_signature, STRING, SYM_PRECIOUS); 35MF_VAR(dspam_probability, NUMBER); 36MF_VAR(dspam_confidence, NUMBER); 37MF_VAR(dspam_prec, NUMBER); 38#define DEFAULT_DSPAM_PREC 3 39 40static int _dspam_initialized; 41 42static void 43_dspam_shutdown() 44{ 45 dspam_shutdown_driver(NULL); 46} 47 48struct transtab 49{ 50 int trans_from; 51 int trans_to; 52}; 53 54static struct builtin_const_trans mode_trans[] = { 55 MF_TRANS(DSM_PROCESS), 56 MF_TRANS(DSM_CLASSIFY) 57}; 58 59static struct builtin_const_trans flag_trans[] = { 60 MF_TRANS(DSF_SIGNATURE), 61 MF_TRANS(DSF_NOISE), 62 MF_TRANS(DSF_WHITELIST) 63}; 64 65static struct builtin_const_trans tokenizer_trans[] = { 66 MF_TRANS(DSZ_WORD), 67 MF_TRANS(DSZ_CHAIN), 68 MF_TRANS(DSZ_SBPH), 69 MF_TRANS(DSZ_OSB), 70}; 71 72static struct builtin_const_trans tmod_trans[] = { 73 MF_TRANS(DST_TEFT), 74 MF_TRANS(DST_TOE), 75 MF_TRANS(DST_TUM) 76}; 77 78static struct builtin_const_trans class_trans[] = { 79 MF_TRANS(DSR_ISSPAM), 80 MF_TRANS(DSR_ISINNOCENT), 81 MF_TRANS(DSR_NONE) 82}; 83 84static struct builtin_const_trans source_trans[] = { 85 MF_TRANS(DSS_ERROR), 86 MF_TRANS(DSS_CORPUS), 87 MF_TRANS(DSS_INOCULATION), 88 MF_TRANS(DSS_NONE) 89}; 90 91static void 92ctx_cleanup(void *ptr) 93{ 94 DSPAM_CTX *ctx = ptr; 95 dspam_destroy(ctx); 96} 97 98 99struct config_entry { 100 int argc; 101 char **argv; 102# define config_keyword argv[0] 103# define config_value argv[1] 104}; 105 106static void 107free_config_entry(void *data) 108{ 109 struct config_entry *entry = data; 110 mu_argcv_free(entry->argc, entry->argv); 111} 112 113static int 114compare_config_entry(const void *a, const void *b) 115{ 116 struct config_entry const *ent_a = a; 117 struct config_entry const *ent_b = b; 118 return strcasecmp(ent_a->config_keyword, ent_b->config_keyword); 119} 120 121struct config_entry * 122config_find(mu_list_t config, const char *kw) 123{ 124 if (config) { 125 struct config_entry key, *ret; 126 key.argc = 1; 127 key.argv = (char **)&kw; 128 if (mu_list_locate(config, &key, (void **)&ret) == 0) 129 return ret; 130 } 131 return NULL; 132} 133 134const char * 135config_find_value(mu_list_t config, const char *kw) 136{ 137 struct config_entry *ent = config_find(config, kw); 138 if (ent) 139 return ent->config_value; 140 return NULL; 141} 142 143static int 144read_config(mu_list_t config, const char *file_name) 145{ 146 int rc; 147 mu_stream_t str, flt; 148 char *buf = NULL; 149 size_t size = 0, n; 150 static const char *args[] = { "INLINE-COMMENT", "#", "-r" }; 151 152 if ((rc = mu_file_stream_create(&str, file_name, MU_STREAM_READ))) { 153 mu_error(_("cannot open configuration file `%s': %s"), 154 file_name, mu_strerror(rc)); 155 return rc; 156 } 157 158 rc = mu_filter_create_args(&flt, str, 159 "INLINE-COMMENT", 160 MU_ARRAY_SIZE(args), args, 161 MU_FILTER_DECODE, 162 MU_STREAM_READ); 163 mu_stream_unref(str); 164 if (rc) { 165 mu_error (_("cannot open filter stream for `%s': %s"), 166 file_name, mu_strerror (rc)); 167 return rc; 168 } 169 str = flt; 170 171 while (mu_stream_getline(str, &buf, &size, &n) == 0 && n > 0) { 172 struct config_entry *ent; 173 struct mu_wordsplit ws; 174 175 if (mu_wordsplit(buf, &ws, MU_WRDSF_DEFFLAGS)) { 176 mu_error("mu_wordsplit: %s", 177 mu_wordsplit_strerror(&ws)); 178 break; 179 } 180 181 if (ws.ws_wordc) { 182 ent = mu_alloc(sizeof(*ent)); 183 ent->argc = ws.ws_wordc; 184 ent->argv = ws.ws_wordv; 185 mu_list_append(config, ent); 186 ws.ws_wordc = 0; 187 ws.ws_wordv = NULL; 188 } /* FIXME: diagnostics */ 189 mu_wordsplit_free(&ws); 190 } 191 free(buf); 192 mu_stream_close(str); 193 mu_stream_destroy(&str); 194 return 0; 195} 196 197 198static void * 199alloc_config() 200{ 201 mu_list_t config; 202 203 mu_list_create(&config); 204 mu_list_set_destroy_item(config, free_config_entry); 205 mu_list_set_comparator(config, compare_config_entry); 206 return config; 207} 208 209static void 210destroy_config(void *data) 211{ 212 mu_list_t config = data; 213 mu_list_destroy(&config); 214} 215 216MF_DECLARE_DATA(DSPAM_CONFIG, alloc_config, destroy_config) 217 218MF_DSEXP_SUPPRESS([<get_config>],[< 219static mu_list_t 220get_config(eval_environ_t env, mu_list_t config) 221{ 222 /* Initialize dspam library and set up global variables, if 223 needed */ 224 if (!_dspam_initialized) { 225 const char *config_file = MF_VAR_STRING(dspam_config); 226 if (config_file && config_file[0]) 227 read_config(config, config_file); 228 229 MF_ASSERT(libdspam_init(config_find_value(config, 230 "StorageDriver")) 231 == 0, 232 mfe_failure, 233 "libdspam_init failed"); 234 235 dspam_init_driver(NULL); 236 atexit(_dspam_shutdown); 237 _dspam_initialized = 1; 238 239 if (MF_VAR_STRING(dspam_user) == NULL) 240 MF_VAR_SET_STRING(dspam_user, mf_server_user); 241 242 if (MF_VAR_REF(dspam_prec, uint) == 0) 243 MF_VAR_REF(dspam_prec, uint, DEFAULT_DSPAM_PREC); 244 } 245 246 return config; 247} 248>]) 249 250struct keyword_prop { 251 char *name; 252 int len; 253 int flag; 254}; 255 256#define PROP_ATTACH 1 257#define PROP_ALGORITHM 2 258#define PROP_TOKENIZER 3 259#define PROP_PVALUE 4 260 261static struct keyword_prop keyword_prop[] = { 262 { "IgnoreHeader", 0, PROP_ATTACH }, 263 { "MySQL", 5, PROP_ATTACH }, 264 { "PgSQL", 5, PROP_ATTACH }, 265 { "SQLite", 6, PROP_ATTACH }, 266 { "LocalMX", 0, PROP_ATTACH }, 267 { "Storage", 7, PROP_ATTACH }, 268 { "Processor", 9, PROP_ATTACH }, 269 { "Hash", 4, PROP_ATTACH }, 270 { "Algorithm", 0, PROP_ALGORITHM }, 271 { "PValue", 0, PROP_PVALUE }, 272 { "Tokenizer", 0, PROP_TOKENIZER }, 273 { NULL } 274}; 275 276static struct mu_kwd algorithm_kwd[] = { 277 { "graham", DSA_GRAHAM }, 278 { "burton", DSA_BURTON }, 279 { "robinson", DSA_ROBINSON }, 280 { "naive", DSA_NAIVE }, 281 { "chi-square", DSA_CHI_SQUARE }, 282 { NULL } 283}; 284 285static struct mu_kwd pvalue_kwd[] = { 286 { "robinson", DSP_ROBINSON }, 287 { "markov", DSP_MARKOV }, 288 { NULL } 289}; 290 291static struct mu_kwd tokenizer_kwd[] = { 292 { "word", DSZ_WORD }, 293 { "chain", DSZ_CHAIN }, 294 { "chained", DSZ_CHAIN }, 295 { "sbph", DSZ_SBPH }, 296 { "osb", DSZ_OSB }, 297 { NULL } 298}; 299 300static void 301set_context_attributes(DSPAM_CTX *ctx, mu_list_t config, const char *profile, 302 int ignore_tokenizer) 303{ 304 mu_iterator_t itr; 305 int algo = 0; 306 int algo_set = 0; 307 int pvalue = 0; 308 int pvalue_set = 0; 309 int tokenizer = 0; 310 int tokenizer_set = 0; 311 int n; 312 313 if (!profile || !profile[0]) 314 profile = config_find_value(config, "DefaultProfile"); 315 316 mu_list_get_iterator(config, &itr); 317 for (mu_iterator_first(itr); !mu_iterator_is_done(itr); 318 mu_iterator_next(itr)) { 319 struct config_entry *ent; 320 struct keyword_prop *prop; 321 322 mu_iterator_current (itr, (void **)&ent); 323 for (prop = keyword_prop; prop->name; prop++) { 324 char *p; 325 326 if ((prop->len ? 327 strncasecmp(ent->config_keyword, prop->name, 328 prop->len) : 329 strcasecmp(ent->config_keyword, prop->name)) 330 == 0) { 331 switch (prop->flag) { 332 case PROP_ATTACH: 333 dspam_addattribute(ctx, 334 ent->config_keyword, 335 ent->config_value); 336 break; 337 338 case PROP_ALGORITHM: 339 algo_set = 1; 340 if (mu_kwd_xlat_name_ci(algorithm_kwd, 341 ent->config_value, 342 &n) == 0) 343 algo |= n; 344 break; 345 346 case PROP_PVALUE: 347 if (pvalue_set) 348 continue; 349 if (mu_kwd_xlat_name_ci(pvalue_kwd, 350 ent->config_value, 351 &n) == 0) { 352 pvalue = n; 353 pvalue_set = 1; 354 } 355 break; 356 357 case PROP_TOKENIZER: 358 tokenizer_set = 1; 359 if (mu_kwd_xlat_name_ci(tokenizer_kwd, 360 ent->config_value, 361 &n) == 0) 362 tokenizer |= n; 363 break; 364 } 365 } else if (profile && 366 (p = strchr(ent->config_keyword, '.')) && 367 strcasecmp(p + 1, profile) == 0) { 368 size_t len = p - ent->config_keyword; 369 char *key = mu_alloc(len + 1); 370 memcpy(key, ent->config_keyword, len); 371 key[len] = 0; 372 dspam_addattribute(ctx, key, 373 ent->config_value); 374 free(key); 375 } 376 } 377 } 378 mu_iterator_destroy(&itr); 379 380 if (algo_set) 381 ctx->algorithms = algo | (pvalue_set ? pvalue : DSP_GRAHAM); 382 383 if (!ignore_tokenizer && tokenizer_set) 384 ctx->tokenizer = tokenizer; 385 386 if ((ctx->algorithms & DSA_CHI_SQUARE) && 387 !(ctx->algorithms & DSP_ROBINSON)) 388 mu_diag_output(MU_DIAG_WARNING, 389 "Chi-Square algorithm enabled with other " 390 "algorithms: false positives may ensue"); 391} 392 393/* number dspam(number msg, number flags; number class_source) */ 394MF_DSEXP 395MF_DEFUN(dspam, NUMBER, NUMBER nmsg, NUMBER mode_flags, OPTIONAL, NUMBER class_src) 396{ 397 int rc; 398 DSPAM_CTX *ctx; /* DSPAM Context */ 399 int mode; 400 int flags; 401 mu_message_t msg; 402 mu_stream_t msgstr, instr; 403 const char *msgbuf; 404 size_t msgsize; 405 unsigned prec; 406 mu_transport_t trans[2]; 407 mu_list_t config = get_config(env, MF_GET_DATA); 408 int tokenizer; 409 410 /* Prepare message buffer */ 411 msg = bi_message_from_descr(env, nmsg); 412 rc = mu_message_size(msg, &msgsize); 413 MF_ASSERT(rc == 0, 414 mfe_failure, 415 "mu_message_size: %s", mu_strerror(rc)); 416 417 rc = mu_memory_stream_create(&msgstr, MU_STREAM_RDWR); 418 MF_ASSERT(rc == 0, 419 mfe_failure, 420 "mu_static_memory_stream_create: %s", 421 mu_strerror(rc)); 422 MF_DCL_CLEANUP(msgstr, _builtin_stream_cleanup); 423 424 rc = mu_message_get_streamref(msg, &instr); 425 MF_ASSERT(rc == 0, 426 mfe_failure, 427 "mu_message_get_streamref: %s", 428 mu_strerror(rc)); 429 MF_DCL_CLEANUP(instr, _builtin_stream_cleanup); 430 431 rc = mu_stream_copy(msgstr, instr, msgsize, NULL); 432 MF_ASSERT(rc == 0, 433 mfe_failure, 434 "mu_stream_copy: %s", 435 mu_strerror(rc)); 436 437 MF_CLEANUP(instr); 438 439 mu_stream_ioctl(msgstr, MU_IOCTL_TRANSPORT, MU_IOCTL_OP_GET, trans); 440 msgbuf = (const char*)trans[0]; 441 442 /* Prepare DSPAM context */ 443 MF_ASSERT(_builtin_const_to_c(mode_trans, MU_ARRAY_SIZE(mode_trans), 444 mode_flags & _MFL__DSM_MASK, &mode) == 0, 445 mfe_failure, 446 "bad dspam mode"); 447 flags = _builtin_const_to_bitmap(flag_trans, MU_ARRAY_SIZE(flag_trans), 448 mode_flags); 449 450 /* Create the DSPAM context */ 451 ctx = dspam_create(MF_VAR_STRING(dspam_user), 452 MF_VAR_STRING(dspam_group), 453 config_find_value(config, "Home"), mode, 454 flags); 455 MF_ASSERT(ctx != NULL, 456 mfe_failure, 457 "dspam_create failed"); 458 MF_DCL_CLEANUP(ctx, ctx_cleanup); 459 460 /* Use graham and robinson algorithms, graham's p-values */ 461 ctx->algorithms = DSA_GRAHAM | DSA_BURTON | DSP_GRAHAM; 462 463 tokenizer = mode_flags & _MFL__DSZ_MASK; 464 set_context_attributes(ctx, config, MF_VAR_STRING(dspam_profile), 465 tokenizer); 466 467 MF_ASSERT(dspam_attach(ctx, NULL) == 0, 468 mfe_failure, 469 "dspam_attach failed"); 470 471 /* Configure tokenizer */ 472 if (tokenizer) 473 MF_ASSERT(_builtin_const_to_c(tokenizer_trans, 474 MU_ARRAY_SIZE(tokenizer_trans), 475 tokenizer, &ctx->tokenizer) == 0, 476 mfe_failure, 477 "bad dspam tokenizer"); 478 479 /* Set training mode */ 480 MF_ASSERT(_builtin_const_to_c(tmod_trans, MU_ARRAY_SIZE(tmod_trans), 481 mode_flags & _MFL__DST_MASK, &ctx->training_mode) 482 == 0, 483 mfe_failure, 484 "bad dspam training mode"); 485 486 /* Set up classification and source */ 487 if (MF_DEFINED(class_src)) { 488 MF_ASSERT(_builtin_const_to_c(class_trans, 489 MU_ARRAY_SIZE(class_trans), 490 class_src & _MFL__DSR_MASK, 491 &ctx->classification) == 0, 492 mfe_failure, 493 "bad dspam classification flag"); 494 MF_ASSERT(_builtin_const_to_c(source_trans, 495 MU_ARRAY_SIZE(source_trans), 496 class_src & _MFL__DSS_MASK, 497 &ctx->source) == 0, 498 mfe_failure, 499 "bad dspam source flag"); 500 } 501 502 /* Process the message */ 503 MF_ASSERT(dspam_process(ctx, msgbuf) == 0, 504 mfe_failure, 505 "dspam_process failed"); 506 507 rc = MF_VAR_REF(dspam_prec, uint); 508 prec = 1; 509 while (rc--) 510 prec *= 10; 511 MF_VAR_REF(dspam_probability, ulong, ctx->probability * prec); 512 MF_VAR_REF(dspam_confidence, ulong, ctx->confidence * prec); 513 if (flags & DSF_SIGNATURE) { 514 char signame[128]; 515 _ds_create_signature_id(ctx, signame, sizeof(signame)); 516 _ds_set_signature(ctx, ctx->signature, signame); 517 MF_VAR_SET_STRING(dspam_signature, signame); 518 } 519 MF_ASSERT(_builtin_c_to_const(class_trans, 520 MU_ARRAY_SIZE(class_trans), 521 ctx->result, 522 &rc) == 0, 523 mfe_failure, 524 "unrecognized dspam result"); 525 MF_CLEANUP(ctx); 526 527 /* FIXME: Any additional processing? */ 528 529 MF_RETURN(rc); 530} 531END 532 533