1 /*-
2 * Copyright 2021 Vsevolod Stakhov
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <mempool_vars_internal.h>
18 #include "config.h"
19 #include "libserver/url.h"
20 #include "lua/lua_common.h"
21 #include "libserver/cfg_file.h"
22 #include "mime_string.hxx"
23 #include "smtp_parsers.h"
24 #include "message.h"
25 #include "received.hxx"
26 #include "frozen/string.h"
27 #include "frozen/unordered_map.h"
28
29 namespace rspamd::mime {
30
31 enum class received_part_type {
32 RSPAMD_RECEIVED_PART_FROM,
33 RSPAMD_RECEIVED_PART_BY,
34 RSPAMD_RECEIVED_PART_FOR,
35 RSPAMD_RECEIVED_PART_WITH,
36 RSPAMD_RECEIVED_PART_ID,
37 RSPAMD_RECEIVED_PART_UNKNOWN,
38 };
39
40 struct received_part {
41 received_part_type type;
42 mime_string data;
43 std::vector<mime_string> comments;
44
received_partrspamd::mime::received_part45 explicit received_part(received_part_type t)
46 : type(t),
47 data(received_char_filter) {}
48 };
49
50 static inline auto
received_part_set_or_append(const gchar * begin,gsize len,mime_string & dest)51 received_part_set_or_append(const gchar *begin,
52 gsize len,
53 mime_string &dest) -> void
54 {
55 if (len == 0) {
56 return;
57 }
58
59 dest.append(begin, len);
60 dest.trim(" \t");
61 }
62
63 static auto
received_process_part(const std::string_view & data,received_part_type type,std::ptrdiff_t & last,received_part & npart)64 received_process_part(const std::string_view &data,
65 received_part_type type,
66 std::ptrdiff_t &last,
67 received_part &npart) -> bool
68 {
69 auto obraces = 0, ebraces = 0;
70 auto seen_tcpinfo = false;
71 enum _parse_state {
72 skip_spaces,
73 in_comment,
74 read_data,
75 read_tcpinfo,
76 all_done
77 } state, next_state;
78
79 /* In this function, we just process comments and data separately */
80 const auto *p = data.data();
81 const auto *end = p + data.size();
82 const auto *c = p;
83
84 state = skip_spaces;
85 next_state = read_data;
86
87 while (p < end) {
88 switch (state) {
89 case skip_spaces:
90 if (!g_ascii_isspace(*p)) {
91 c = p;
92 state = next_state;
93 }
94 else {
95 p++;
96 }
97 break;
98 case in_comment:
99 if (*p == '(') {
100 obraces++;
101 }
102 else if (*p == ')') {
103 ebraces++;
104
105 if (ebraces >= obraces) {
106 if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
107 if (p > c) {
108 npart.comments.emplace_back(received_char_filter);
109 auto &comment = npart.comments.back();
110 received_part_set_or_append(c, p - c,
111 comment);
112 }
113 }
114
115 p++;
116 c = p;
117 state = skip_spaces;
118 next_state = read_data;
119
120 continue;
121 }
122 }
123
124 p++;
125 break;
126 case read_data:
127 if (*p == '(') {
128 if (p > c) {
129 if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
130 received_part_set_or_append(c, p - c,
131 npart.data);
132 }
133 }
134
135 state = in_comment;
136 obraces = 1;
137 ebraces = 0;
138 p++;
139 c = p;
140 }
141 else if (g_ascii_isspace (*p)) {
142 if (p > c) {
143 if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
144 received_part_set_or_append(c, p - c,
145 npart.data);
146 }
147 }
148
149 state = skip_spaces;
150 next_state = read_data;
151 c = p;
152 }
153 else if (*p == ';') {
154 /* It is actually delimiter of date part if not in the comments */
155 if (p > c) {
156 if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
157 received_part_set_or_append(c, p - c,
158 npart.data);
159 }
160 }
161
162 state = all_done;
163 continue;
164 }
165 else if (npart.data.size() > 0) {
166 /* We have already received data and find something with no ( */
167 if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
168 /* Check if we have something special here, such as TCPinfo */
169 if (*c == '[') {
170 state = read_tcpinfo;
171 p++;
172 }
173 else {
174 state = all_done;
175 continue;
176 }
177 }
178 else {
179 state = all_done;
180 continue;
181 }
182 }
183 else {
184 p++;
185 }
186 break;
187 case read_tcpinfo:
188 if (*p == ']') {
189 received_part_set_or_append(c, p - c + 1,
190 npart.data);
191 seen_tcpinfo = TRUE;
192 state = skip_spaces;
193 next_state = read_data;
194 c = p;
195 }
196 p++;
197 break;
198 case all_done:
199 if (p > data.data()) {
200 last = p - data.data();
201 return true;
202 }
203 else {
204 /* Empty element */
205 return false;
206 }
207 break;
208 }
209 }
210
211 /* Leftover */
212 switch (state) {
213 case read_data:
214 if (p > c) {
215 if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
216 received_part_set_or_append(c, p - c,
217 npart.data);
218 }
219
220 last = p - data.data();
221
222 return true;
223 }
224 break;
225 case skip_spaces:
226 if (p > data.data()) {
227 last = p - data.data();
228
229 return true;
230 }
231 default:
232 break;
233 }
234
235 return false;
236 }
237
238 template <std::size_t N>
lit_compare_lowercase(const char lit[N],const char * in)239 constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
240 {
241 for (auto i = 0; i < N; i ++) {
242 if (lc_map[(unsigned char)in[i]] != lit[i]) {
243 return false;
244 }
245 }
246
247 return true;
248 }
249
250 static auto
received_spill(const std::string_view & in,std::ptrdiff_t & date_pos)251 received_spill(const std::string_view &in,
252 std::ptrdiff_t &date_pos) -> std::vector<received_part>
253 {
254 std::vector<received_part> parts;
255 std::ptrdiff_t pos = 0;
256 auto seen_from = false, seen_by = false;
257
258 const auto *p = in.data();
259 const auto *end = p + in.size();
260
261 auto skip_spaces = [&p, end]() {
262 while (p < end && g_ascii_isspace (*p)) {
263 p++;
264 }
265 };
266
267 skip_spaces();
268
269 /* Skip SMTP comments */
270 if (*p == '(') {
271 auto obraces = 0, ebraces = 0;
272
273 while (p < end) {
274 if (*p == ')') {
275 ebraces ++;
276 }
277 else if (*p == '(') {
278 obraces ++;
279 }
280
281 p ++;
282
283 if (obraces == ebraces) {
284 /* Skip spaces after */
285 skip_spaces();
286 break;
287 }
288 }
289 }
290
291 auto len = end - p;
292
293 if (len == 0) {
294 return parts;
295 }
296
297 auto maybe_process_part = [&](received_part_type what) -> bool {
298 parts.emplace_back(what);
299 auto &rcvd_part = parts.back();
300 auto chunk = std::string_view{p, (std::size_t)(end - p)};
301
302 if (!received_process_part(chunk, what, pos, rcvd_part)) {
303 parts.pop_back();
304
305 return false;
306 }
307
308 return true;
309 };
310
311 if (len > 4 && lit_compare_lowercase<4>("from", p)) {
312 p += sizeof("from") - 1;
313
314 /* We can now store from part */
315 if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
316 /* Do not accept malformed from */
317 return {};
318 }
319
320 g_assert (pos != 0);
321 p += pos;
322 len = end > p ? end - p : 0;
323 seen_from = true;
324 }
325
326 if (len > 2 && lit_compare_lowercase<2>("by", p)) {
327 p += sizeof("by") - 1;
328
329 if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
330 return {};
331 }
332
333 g_assert (pos != 0);
334 p += pos;
335 len = end > p ? end - p : 0;
336 seen_by = true;
337 }
338
339 if (!seen_from && !seen_by) {
340 /* Useless received */
341 return {};
342 }
343
344 while (p < end) {
345 bool got_part = false;
346 if (*p == ';') {
347 /* We are at the date separator, stop here */
348 date_pos = p - in.data() + 1;
349 break;
350 }
351 else {
352 if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
353 p += sizeof("with") - 1;
354
355 got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
356 }
357 else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
358 p += sizeof("for") - 1;
359 got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
360 }
361 else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
362 p += sizeof("id") - 1;
363 got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
364 }
365 else {
366 while (p < end) {
367 if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
368 p++;
369 }
370 else {
371 break;
372 }
373 }
374
375 if (p == end) {
376 return {};
377 }
378 else if (*p == ';') {
379 date_pos = p - in.data() + 1;
380 break;
381 }
382 else {
383 got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
384 }
385 }
386
387 if (!got_part) {
388 p++;
389 len = end > p ? end - p : 0;
390 }
391 else {
392 g_assert (pos != 0);
393 p += pos;
394 len = end > p ? end - p : 0;
395 }
396 }
397 }
398
399 return parts;
400 }
401
402 #define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
403 (rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
404
405 static auto
received_process_rdns(rspamd_mempool_t * pool,const std::string_view & in,mime_string & dest)406 received_process_rdns(rspamd_mempool_t *pool,
407 const std::string_view &in,
408 mime_string &dest) -> bool
409 {
410 auto seen_dot = false;
411
412 const auto *p = in.data();
413 const auto *end = p + in.size();
414
415 if (in.empty()) {
416 return false;
417 }
418
419 if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
420 /* We have enclosed ip address */
421 auto *addr = rspamd_parse_inet_address_pool(p + 1,
422 (end - p) - 2,
423 pool,
424 RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
425
426 if (addr) {
427 const gchar *addr_str;
428
429 if (rspamd_inet_address_get_port(addr) != 0) {
430 addr_str = rspamd_inet_address_to_string_pretty(addr);
431 }
432 else {
433 addr_str = rspamd_inet_address_to_string(addr);
434 }
435
436 dest.assign_copy(std::string_view{addr_str});
437
438 return true;
439 }
440 }
441
442 auto hlen = 0u;
443
444 while (p < end) {
445 if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
446 if (*p == '.') {
447 seen_dot = true;
448 }
449
450 hlen++;
451 }
452 else {
453 break;
454 }
455
456 p++;
457 }
458
459 if (hlen > 0) {
460 if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
461 /* All data looks like a hostname */
462 dest.assign_copy(std::string_view{in.data(), hlen});
463
464 return true;
465 }
466 }
467
468 return false;
469 }
470
471 static auto
received_process_host_tcpinfo(rspamd_mempool_t * pool,received_header & rh,const std::string_view & in)472 received_process_host_tcpinfo(rspamd_mempool_t *pool,
473 received_header &rh,
474 const std::string_view &in) -> bool
475 {
476 rspamd_inet_addr_t *addr = nullptr;
477 auto ret = false;
478
479 if (in.empty()) {
480 return false;
481 }
482
483 if (in[0] == '[') {
484 /* Likely Exim version */
485
486 auto brace_pos = in.find(']');
487
488 if (brace_pos != std::string_view::npos) {
489 auto substr_addr = in.substr(1, brace_pos - 1);
490 addr = rspamd_parse_inet_address_pool(substr_addr.data(),
491 substr_addr.size(),
492 pool,
493 RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
494
495 if (addr) {
496 rh.addr = addr;
497 rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
498 }
499 }
500 }
501 else {
502 if (g_ascii_isxdigit(in[0])) {
503 /* Try to parse IP address */
504 addr = rspamd_parse_inet_address_pool(in.data(),
505 in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
506 if (addr) {
507 rh.addr = addr;
508 rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
509 }
510 }
511
512 if (!addr) {
513 /* Try canonical Postfix version: rdns [ip] */
514 auto obrace_pos = in.find('[');
515
516 if (obrace_pos != std::string_view::npos) {
517 auto ebrace_pos = in.rfind(']');
518
519 if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
520 auto substr_addr = in.substr(obrace_pos + 1,
521 ebrace_pos - obrace_pos - 1);
522 addr = rspamd_parse_inet_address_pool(substr_addr.data(),
523 substr_addr.size(),
524 pool,
525 RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
526
527 if (addr) {
528 rh.addr = addr;
529 rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
530
531 /* Process with rDNS */
532 auto rdns_substr = in.substr(0, obrace_pos);
533
534 if (received_process_rdns(pool,rdns_substr,rh.real_hostname)) {
535 ret = true;
536 }
537 }
538 }
539 }
540 else {
541 /* Hostname or some crap, sigh... */
542 if (received_process_rdns(pool, in, rh.real_hostname)) {
543 ret = true;
544 }
545 }
546 }
547 }
548
549 return ret;
550 }
551
552 static void
received_process_from(rspamd_mempool_t * pool,const received_part & rpart,received_header & rh)553 received_process_from(rspamd_mempool_t *pool,
554 const received_part &rpart,
555 received_header &rh)
556 {
557 if (rpart.data.size() > 0) {
558 /* We have seen multiple cases:
559 * - [ip] (hostname/unknown [real_ip])
560 * - helo (hostname/unknown [real_ip])
561 * - [ip]
562 * - hostname
563 * - hostname ([ip]:port helo=xxx)
564 * Maybe more...
565 */
566 auto seen_ip_in_data = false;
567
568 if (!rpart.comments.empty()) {
569 /* We can have info within comment as part of RFC */
570 received_process_host_tcpinfo(
571 pool, rh,
572 rpart.comments[0].as_view());
573 }
574
575 if (rh.real_ip.size() == 0) {
576 /* Try to do the same with data */
577 if (received_process_host_tcpinfo(
578 pool, rh,
579 rpart.data.as_view())) {
580 seen_ip_in_data = true;
581 }
582 }
583
584 if (!seen_ip_in_data) {
585 if (rh.real_ip.size() != 0) {
586 /* Get anounced hostname (usually helo) */
587 received_process_rdns(pool,
588 rpart.data.as_view(),
589 rh.from_hostname);
590 }
591 else {
592 received_process_host_tcpinfo(pool,
593 rh, rpart.data.as_view());
594 }
595 }
596 }
597 else {
598 /* rpart->dlen = 0 */
599 if (!rpart.comments.empty()) {
600 received_process_host_tcpinfo(
601 pool, rh,
602 rpart.comments[0].as_view());
603 }
604 }
605 }
606
607 static auto
received_header_parse(received_header_chain & chain,rspamd_mempool_t * pool,const std::string_view & in,struct rspamd_mime_header * hdr)608 received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
609 const std::string_view &in,
610 struct rspamd_mime_header *hdr) -> bool
611 {
612 std::ptrdiff_t date_pos = -1;
613
614 static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({
615 {"smtp", received_flags::SMTP},
616 {"esmtp", received_flags::ESMTP},
617 {"esmtpa", received_flags::ESMTPA |
618 received_flags::AUTHENTICATED},
619 {"esmtpsa", received_flags::ESMTPSA |
620 received_flags::SSL |
621 received_flags::AUTHENTICATED},
622 {"esmtps", received_flags::ESMTPS |
623 received_flags::SSL},
624 {"lmtp", received_flags::LMTP},
625 {"imap", received_flags::IMAP},
626 {"imaps", received_flags::IMAP |
627 received_flags::SSL},
628 {"http", received_flags::HTTP},
629 {"https", received_flags::HTTP |
630 received_flags::SSL},
631 {"local", received_flags::LOCAL}
632 });
633
634 auto parts = received_spill(in, date_pos);
635
636 if (parts.empty()) {
637 return false;
638 }
639
640 auto &rh = chain.new_received();
641
642 rh.flags = received_flags::UNKNOWN;
643 rh.hdr = hdr;
644
645 for (const auto &part : parts) {
646 switch (part.type) {
647 case received_part_type::RSPAMD_RECEIVED_PART_FROM:
648 received_process_from(pool, part, rh);
649 break;
650 case received_part_type::RSPAMD_RECEIVED_PART_BY:
651 received_process_rdns(pool,
652 part.data.as_view(),
653 rh.by_hostname);
654 break;
655 case received_part_type::RSPAMD_RECEIVED_PART_WITH:
656 if (part.data.size() > 0) {
657 auto proto_flag_it = protos_map.find(part.data.as_view());
658
659 if (proto_flag_it != protos_map.end()) {
660 rh.flags = proto_flag_it->second;
661 }
662 }
663 break;
664 case received_part_type::RSPAMD_RECEIVED_PART_FOR:
665 rh.for_mbox.assign_copy(part.data);
666 rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
667 rh.for_mbox.size());
668 break;
669 default:
670 /* Do nothing */
671 break;
672 }
673 }
674
675 if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
676 rh.from_hostname.assign_copy(rh.real_hostname);
677 }
678
679 if (date_pos > 0 && date_pos < in.size()) {
680 auto date_sub = in.substr(date_pos);
681 rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(),
682 date_sub.size(), nullptr);
683 }
684
685 return true;
686 }
687
688 static auto
received_maybe_fix_task(struct rspamd_task * task)689 received_maybe_fix_task(struct rspamd_task *task) -> bool
690 {
691 auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
692
693 if (recv_chain_ptr) {
694 auto need_recv_correction = false;
695
696 auto top_recv_maybe = recv_chain_ptr->get_received(0);
697
698 if (top_recv_maybe.has_value()) {
699 auto &top_recv = top_recv_maybe.value().get();
700
701 const auto *raddr = top_recv.addr;
702 if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
703 need_recv_correction = true;
704 }
705 else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
706 if (!raddr) {
707 need_recv_correction = true;
708 }
709 else {
710 if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
711 need_recv_correction = true;
712 }
713 }
714 }
715
716 if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP)
717 && task->from_addr) {
718 msg_debug_task ("the first received seems to be"
719 " not ours, prepend it with fake one");
720
721 auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
722 trecv.flags |= received_flags::ARTIFICIAL;
723
724 if (task->flags & RSPAMD_TASK_FLAG_SSL) {
725 trecv.flags |= received_flags::SSL;
726 }
727
728 if (task->user) {
729 trecv.flags |= received_flags::AUTHENTICATED;
730 }
731
732 trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
733
734 const auto *mta_name = (const char*)rspamd_mempool_get_variable(task->task_pool,
735 RSPAMD_MEMPOOL_MTA_NAME);
736
737 if (mta_name) {
738 trecv.by_hostname.assign_copy(std::string_view(mta_name));
739 }
740 trecv.addr = rspamd_inet_address_copy(task->from_addr);
741
742 if (task->hostname) {
743 trecv.real_hostname.assign_copy(std::string_view(task->hostname));
744 trecv.from_hostname.assign_copy(trecv.real_hostname);
745 }
746
747 return true;
748 }
749
750 /* Extract data from received header if we were not given IP */
751 if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
752 (task->cfg && !task->cfg->ignore_received)) {
753 if (!top_recv.real_ip.empty()) {
754 if (!rspamd_parse_inet_address (&task->from_addr,
755 top_recv.real_ip.data(),
756 top_recv.real_ip.size(),
757 RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
758 msg_warn_task ("cannot get IP from received header: '%s'",
759 top_recv.real_ip.data());
760 task->from_addr = nullptr;
761 }
762 }
763 if (!top_recv.real_hostname.empty()) {
764 task->hostname = top_recv.real_hostname.data();
765 }
766
767 return true;
768 }
769 }
770 }
771
772 return false;
773 }
774
775 static auto
received_export_to_lua(received_header_chain * chain,lua_State * L)776 received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
777 {
778 if (chain == nullptr) {
779 return false;
780 }
781
782 lua_createtable(L, chain->size(), 0);
783
784 auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
785 lua_pushboolean(L, !!(rh.flags & fl));
786 lua_setfield(L, -2, name);
787 };
788
789 auto i = 1;
790
791 for (const auto &rh : chain->as_vector()) {
792 lua_createtable (L, 0, 10);
793
794 if (rh.hdr && rh.hdr->decoded) {
795 rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
796 }
797
798 lua_createtable(L, 0, 3);
799 push_flag(rh, received_flags::ARTIFICIAL, "artificial");
800 push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
801 push_flag(rh, received_flags::SSL, "ssl");
802 lua_setfield(L, -2, "flags");
803
804 auto push_nullable_string = [L](const mime_string &st, const char *field) {
805 if (st.empty()) {
806 lua_pushnil(L);
807 }
808 else {
809 lua_pushlstring(L, st.data(), st.size());
810 }
811 lua_setfield(L, -2, field);
812 };
813
814 push_nullable_string(rh.from_hostname, "from_hostname");
815 push_nullable_string(rh.real_hostname, "real_hostname");
816 push_nullable_string(rh.real_ip, "from_ip");
817 push_nullable_string(rh.by_hostname, "by_hostname");
818 push_nullable_string(rh.for_mbox, "for");
819
820 if (rh.addr) {
821 rspamd_lua_ip_push(L, rh.addr);
822 }
823 else {
824 lua_pushnil(L);
825 }
826 lua_setfield(L, -2, "real_ip");
827
828 lua_pushstring(L, received_protocol_to_string(rh.flags));
829 lua_setfield(L, -2, "proto");
830
831 lua_pushinteger(L, rh.timestamp);
832 lua_setfield(L, -2, "timestamp");
833
834 lua_rawseti(L, -2, i++);
835 }
836
837 return true;
838 }
839
840 } // namespace rspamd::mime
841
842 bool
rspamd_received_header_parse(struct rspamd_task * task,const char * data,size_t sz,struct rspamd_mime_header * hdr)843 rspamd_received_header_parse(struct rspamd_task *task,
844 const char *data, size_t sz,
845 struct rspamd_mime_header *hdr)
846 {
847 auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>
848 (MESSAGE_FIELD(task, received_headers));
849
850 if (recv_chain_ptr == nullptr) {
851 /* This constructor automatically registers dtor in mempool */
852 recv_chain_ptr = new rspamd::mime::received_header_chain(task);
853 MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr;
854 }
855 return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
856 std::string_view{data, sz}, hdr);
857 }
858
859 bool
rspamd_received_maybe_fix_task(struct rspamd_task * task)860 rspamd_received_maybe_fix_task(struct rspamd_task *task)
861 {
862 return rspamd::mime::received_maybe_fix_task(task);
863 }
864
865 bool
rspamd_received_export_to_lua(struct rspamd_task * task,lua_State * L)866 rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
867 {
868 return rspamd::mime::received_export_to_lua(
869 static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
870 L);
871 }
872
873 /* Tests part */
874 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
875 #include "doctest/doctest.h"
876
877 TEST_SUITE("received") {
878 TEST_CASE("parse received")
879 {
880 using namespace std::string_view_literals;
881 using map_type = robin_hood::unordered_flat_map<std::string_view, std::string_view>;
882 std::vector<std::pair<std::string_view, map_type>> cases{
883 // Simple received
884 {"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
885 {
886 {"real_ip", "185.243.30.90"},
887 {"real_hostname", "smtp11.mailtrack.pl"},
888 {"from_hostname", "smtp11.mailtrack.pl"}
889 }
890 },
891 // Real Postfix IPv6 received
892 {"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
893 "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
894 "\t(Client did not present a certificate)\n"
895 "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
896 "\tfor <test@example.com>; Mon, 6 Jul 2015 09:01:20 +0000 (UTC)\n"
897 "\t(envelope-from upwest201diana@outlook.com)"sv,
898 {
899 {"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
900 {"from_hostname", "server.chat-met-vreemden.nl"},
901 {"by_hostname", "mx1.freebsd.org"},
902 {"for_mbox", "<test@example.com>"}
903 }
904 },
905 // Exim IPv4 received
906 {"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
907 " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
908 " (envelope-from <exim-dev-bounces@exim.org>)\n"
909 " id 1fZ55o-0006DP-3H\n"
910 " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
911 {
912 {"from_hostname", "localhost"},
913 {"real_ip", "127.0.0.1"},
914 {"for_mbox", "<xxx@xxx.xxx>"},
915 {"by_hostname", "hummus.csx.cam.ac.uk"},
916 }
917 },
918 // Exim IPv6 received
919 {"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
920 " helo=mx.spodhuis.org)\n"
921 " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
922 " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
923 " id 1fZ55k-0006CO-9M\n"
924 " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
925 {
926 {"from_hostname", "smtp.spodhuis.org"},
927 {"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
928 {"for_mbox", "exim-dev@exim.org"},
929 {"by_hostname", "hummus.csx.cam.ac.uk"},
930 }
931 },
932 // Haraka received
933 {"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
934 "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
935 "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
936 {
937 {"from_hostname", "aaa.cn"},
938 {"real_ip", "1.1.1.1"},
939 {"by_hostname", "localhost.localdomain"},
940 }
941 },
942 // Invalid by
943 {"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
944 "by guovswzqkvry051@sohu.com with gg login "
945 "by AOL 6.0 for Windows US sub 008 SMTP ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
946 {
947 {"from_hostname", "192.83.172.101"},
948 {"real_ip", "192.83.172.101"},
949 }
950 },
951 // Invalid hostinfo
952 {"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
953 " Thu, 26 Sep 2019 11:19:07 +0200"sv,
954 {
955 {"by_hostname", "example.com"},
956 {"from_hostname", "example.com"},
957 {"real_hostname", "example.com"},
958 }
959 },
960 // Different real and announced hostnames + broken crap
961 {"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
962 "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
963 ": <g @yi.br>"sv,
964 {
965 {"real_ip", "1.1.1.1"},
966 {"from_hostname", "171-29.br"},
967 {"real_hostname", "1-1-1-1.z.com.br"},
968 {"by_hostname", "x.com.br"},
969 }
970 },
971 // Different real and announced ips + no hostname
972 {"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
973 {
974 {"real_ip", "127.0.0.2"},
975 {"from_hostname", "127.0.0.1"},
976 {"by_hostname", "smtp.gmail.com"},
977 }
978 },
979 // Different real and hostanes
980 {"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
981 "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
982 {
983 {"real_ip", "185.118.166.127"},
984 {"from_hostname", "185.118.166.127"},
985 {"real_hostname", "steven2.zhou01.pserver.ru"},
986 {"by_hostname", "mail.832zsu.cn"},
987 }
988 },
989 // \0 in received must be filtered
990 {"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
991 {
992 {"real_ip", "185.243.30.90"},
993 {"real_hostname", "smtp11.mailtrack.pl"},
994 {"from_hostname", "smtp11.mailtrack.pl"}
995 }
996 },
997 // No from part
998 {"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
999 {
1000 {"by_hostname", "mail.832zsu.cn"},
1001 }
1002 },
1003 // From part is in the comment
1004 {"(from asterisk@localhost)\n"
1005 " by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
1006 " Thu, 6 Aug 2020 11:50:04 -0500"sv,
1007 {
1008 {"by_hostname", "pbx.xxx.com"},
1009 }
1010 },
1011 };
1012 rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
1013
1014 for (auto &&c : cases) {
1015 SUBCASE(c.first.data()) {
1016 rspamd::mime::received_header_chain chain;
1017 auto ret = rspamd::mime::received_header_parse(chain, pool,
1018 c.first, nullptr);
1019 CHECK(ret == true);
1020 auto &&rh = chain.get_received(0);
1021 CHECK(rh.has_value());
1022 auto res = rh.value().get().as_map();
1023
1024 for (const auto &expected : c.second) {
1025 CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
1026 CHECK(res[expected.first] == expected.second);
1027 }
1028 for (const auto &existing : res) {
1029 CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
1030 CHECK(c.second[existing.first] == existing.second);
1031 }
1032 }
1033 }
1034
1035 rspamd_mempool_delete(pool);
1036 }
1037 }