1 /*-
2  * Copyright 2021 Vsevolod Stakhov
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *   http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <mempool_vars_internal.h>
18 #include "config.h"
19 #include "libserver/url.h"
20 #include "lua/lua_common.h"
21 #include "libserver/cfg_file.h"
22 #include "mime_string.hxx"
23 #include "smtp_parsers.h"
24 #include "message.h"
25 #include "received.hxx"
26 #include "frozen/string.h"
27 #include "frozen/unordered_map.h"
28 
29 namespace rspamd::mime {
30 
31 enum class received_part_type {
32 	RSPAMD_RECEIVED_PART_FROM,
33 	RSPAMD_RECEIVED_PART_BY,
34 	RSPAMD_RECEIVED_PART_FOR,
35 	RSPAMD_RECEIVED_PART_WITH,
36 	RSPAMD_RECEIVED_PART_ID,
37 	RSPAMD_RECEIVED_PART_UNKNOWN,
38 };
39 
40 struct received_part {
41 	received_part_type type;
42 	mime_string data;
43 	std::vector<mime_string> comments;
44 
received_partrspamd::mime::received_part45 	explicit received_part(received_part_type t)
46 								  : type(t),
47 									data(received_char_filter) {}
48 };
49 
50 static inline auto
received_part_set_or_append(const gchar * begin,gsize len,mime_string & dest)51 received_part_set_or_append(const gchar *begin,
52 							gsize len,
53 							mime_string &dest) -> void
54 {
55 	if (len == 0) {
56 		return;
57 	}
58 
59 	dest.append(begin, len);
60 	dest.trim(" \t");
61 }
62 
63 static auto
received_process_part(const std::string_view & data,received_part_type type,std::ptrdiff_t & last,received_part & npart)64 received_process_part(const std::string_view &data,
65 					  received_part_type type,
66 					  std::ptrdiff_t &last,
67 					  received_part &npart) -> bool
68 {
69 	auto obraces = 0, ebraces = 0;
70 	auto seen_tcpinfo = false;
71 	enum _parse_state {
72 		skip_spaces,
73 		in_comment,
74 		read_data,
75 		read_tcpinfo,
76 		all_done
77 	} state, next_state;
78 
79 	/* In this function, we just process comments and data separately */
80 	const auto *p = data.data();
81 	const auto *end = p + data.size();
82 	const auto *c = p;
83 
84 	state = skip_spaces;
85 	next_state = read_data;
86 
87 	while (p < end) {
88 		switch (state) {
89 		case skip_spaces:
90 			if (!g_ascii_isspace(*p)) {
91 				c = p;
92 				state = next_state;
93 			}
94 			else {
95 				p++;
96 			}
97 			break;
98 		case in_comment:
99 			if (*p == '(') {
100 				obraces++;
101 			}
102 			else if (*p == ')') {
103 				ebraces++;
104 
105 				if (ebraces >= obraces) {
106 					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
107 						if (p > c) {
108 							npart.comments.emplace_back(received_char_filter);
109 							auto &comment = npart.comments.back();
110 							received_part_set_or_append(c, p - c,
111 									comment);
112 						}
113 					}
114 
115 					p++;
116 					c = p;
117 					state = skip_spaces;
118 					next_state = read_data;
119 
120 					continue;
121 				}
122 			}
123 
124 			p++;
125 			break;
126 		case read_data:
127 			if (*p == '(') {
128 				if (p > c) {
129 					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
130 						received_part_set_or_append(c, p - c,
131 								npart.data);
132 					}
133 				}
134 
135 				state = in_comment;
136 				obraces = 1;
137 				ebraces = 0;
138 				p++;
139 				c = p;
140 			}
141 			else if (g_ascii_isspace (*p)) {
142 				if (p > c) {
143 					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
144 						received_part_set_or_append(c, p - c,
145 								npart.data);
146 					}
147 				}
148 
149 				state = skip_spaces;
150 				next_state = read_data;
151 				c = p;
152 			}
153 			else if (*p == ';') {
154 				/* It is actually delimiter of date part if not in the comments */
155 				if (p > c) {
156 					if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
157 						received_part_set_or_append(c, p - c,
158 								npart.data);
159 					}
160 				}
161 
162 				state = all_done;
163 				continue;
164 			}
165 			else if (npart.data.size() > 0) {
166 				/* We have already received data and find something with no ( */
167 				if (!seen_tcpinfo && type == received_part_type::RSPAMD_RECEIVED_PART_FROM) {
168 					/* Check if we have something special here, such as TCPinfo */
169 					if (*c == '[') {
170 						state = read_tcpinfo;
171 						p++;
172 					}
173 					else {
174 						state = all_done;
175 						continue;
176 					}
177 				}
178 				else {
179 					state = all_done;
180 					continue;
181 				}
182 			}
183 			else {
184 				p++;
185 			}
186 			break;
187 		case read_tcpinfo:
188 			if (*p == ']') {
189 				received_part_set_or_append(c, p - c + 1,
190 						npart.data);
191 				seen_tcpinfo = TRUE;
192 				state = skip_spaces;
193 				next_state = read_data;
194 				c = p;
195 			}
196 			p++;
197 			break;
198 		case all_done:
199 			if (p > data.data()) {
200 				last = p - data.data();
201 				return true;
202 			}
203 			else {
204 				/* Empty element */
205 				return false;
206 			}
207 			break;
208 		}
209 	}
210 
211 	/* Leftover */
212 	switch (state) {
213 	case read_data:
214 		if (p > c) {
215 			if (type != received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN) {
216 				received_part_set_or_append(c, p - c,
217 						npart.data);
218 			}
219 
220 			last = p - data.data();
221 
222 			return true;
223 		}
224 		break;
225 	case skip_spaces:
226 		if (p > data.data()) {
227 			last = p - data.data();
228 
229 			return true;
230 		}
231 	default:
232 		break;
233 	}
234 
235 	return false;
236 }
237 
238 template <std::size_t N>
lit_compare_lowercase(const char lit[N],const char * in)239 constexpr auto lit_compare_lowercase(const char lit[N], const char *in) -> bool
240 {
241 	for (auto i = 0; i < N; i ++) {
242 		if (lc_map[(unsigned char)in[i]] != lit[i]) {
243 			return false;
244 		}
245 	}
246 
247 	return true;
248 }
249 
250 static auto
received_spill(const std::string_view & in,std::ptrdiff_t & date_pos)251 received_spill(const std::string_view &in,
252 			   std::ptrdiff_t &date_pos) -> std::vector<received_part>
253 {
254 	std::vector<received_part> parts;
255 	std::ptrdiff_t pos = 0;
256 	auto seen_from = false, seen_by = false;
257 
258 	const auto *p = in.data();
259 	const auto *end = p + in.size();
260 
261 	auto skip_spaces = [&p, end]() {
262 		while (p < end && g_ascii_isspace (*p)) {
263 			p++;
264 		}
265 	};
266 
267 	skip_spaces();
268 
269 	/* Skip SMTP comments */
270 	if (*p == '(') {
271 		auto obraces = 0, ebraces = 0;
272 
273 		while (p < end) {
274 			if (*p == ')') {
275 				ebraces ++;
276 			}
277 			else if (*p == '(') {
278 				obraces ++;
279 			}
280 
281 			p ++;
282 
283 			if (obraces == ebraces) {
284 				/* Skip spaces after  */
285 				skip_spaces();
286 				break;
287 			}
288 		}
289 	}
290 
291 	auto len = end - p;
292 
293 	if (len == 0) {
294 		return parts;
295 	}
296 
297 	auto maybe_process_part = [&](received_part_type what) -> bool {
298 		parts.emplace_back(what);
299 		auto &rcvd_part = parts.back();
300 		auto chunk = std::string_view{p, (std::size_t)(end - p)};
301 
302 		if (!received_process_part(chunk, what, pos, rcvd_part)) {
303 			parts.pop_back();
304 
305 			return false;
306 		}
307 
308 		return true;
309 	};
310 
311 	if (len > 4 && lit_compare_lowercase<4>("from", p)) {
312 		p += sizeof("from") - 1;
313 
314 		/* We can now store from part */
315 		if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FROM)) {
316 			/* Do not accept malformed from */
317 			return {};
318 		}
319 
320 		g_assert (pos != 0);
321 		p += pos;
322 		len = end > p ? end - p : 0;
323 		seen_from = true;
324 	}
325 
326 	if (len > 2 && lit_compare_lowercase<2>("by", p)) {
327 		p += sizeof("by") - 1;
328 
329 		if (!maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_BY)) {
330 			return {};
331 		}
332 
333 		g_assert (pos != 0);
334 		p += pos;
335 		len = end > p ? end - p : 0;
336 		seen_by = true;
337 	}
338 
339 	if (!seen_from && !seen_by) {
340 		/* Useless received */
341 		return {};
342 	}
343 
344 	while (p < end) {
345 		bool got_part = false;
346 		if (*p == ';') {
347 			/* We are at the date separator, stop here */
348 			date_pos = p - in.data() + 1;
349 			break;
350 		}
351 		else {
352 			if (len > sizeof("with") && lit_compare_lowercase<4>("with", p)) {
353 				p += sizeof("with") - 1;
354 
355 				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_WITH);
356 			}
357 			else if (len > sizeof("for") && lit_compare_lowercase<3>("for", p)) {
358 				p += sizeof("for") - 1;
359 				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_FOR);
360 			}
361 			else if (len > sizeof("id") && lit_compare_lowercase<2>("id", p)) {
362 				p += sizeof("id") - 1;
363 				got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_ID);
364 			}
365 			else {
366 				while (p < end) {
367 					if (!(g_ascii_isspace (*p) || *p == '(' || *p == ';')) {
368 						p++;
369 					}
370 					else {
371 						break;
372 					}
373 				}
374 
375 				if (p == end) {
376 					return {};
377 				}
378 				else if (*p == ';') {
379 					date_pos = p - in.data() + 1;
380 					break;
381 				}
382 				else {
383 					got_part = maybe_process_part(received_part_type::RSPAMD_RECEIVED_PART_UNKNOWN);
384 				}
385 			}
386 
387 			if (!got_part) {
388 				p++;
389 				len = end > p ? end - p : 0;
390 			}
391 			else {
392 				g_assert (pos != 0);
393 				p += pos;
394 				len = end > p ? end - p : 0;
395 			}
396 		}
397 	}
398 
399 	return parts;
400 }
401 
402 #define RSPAMD_INET_ADDRESS_PARSE_RECEIVED \
403 	(rspamd_inet_address_parse_flags)(RSPAMD_INET_ADDRESS_PARSE_REMOTE|RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)
404 
405 static auto
received_process_rdns(rspamd_mempool_t * pool,const std::string_view & in,mime_string & dest)406 received_process_rdns(rspamd_mempool_t *pool,
407 					  const std::string_view &in,
408 					  mime_string &dest) -> bool
409 {
410 	auto seen_dot = false;
411 
412 	const auto *p = in.data();
413 	const auto *end = p + in.size();
414 
415 	if (in.empty()) {
416 		return false;
417 	}
418 
419 	if (*p == '[' && *(end - 1) == ']' && in.size() > 2) {
420 		/* We have enclosed ip address */
421 		auto *addr = rspamd_parse_inet_address_pool(p + 1,
422 				(end - p) - 2,
423 				pool,
424 				RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
425 
426 		if (addr) {
427 			const gchar *addr_str;
428 
429 			if (rspamd_inet_address_get_port(addr) != 0) {
430 				addr_str = rspamd_inet_address_to_string_pretty(addr);
431 			}
432 			else {
433 				addr_str = rspamd_inet_address_to_string(addr);
434 			}
435 
436 			dest.assign_copy(std::string_view{addr_str});
437 
438 			return true;
439 		}
440 	}
441 
442 	auto hlen = 0u;
443 
444 	while (p < end) {
445 		if (!g_ascii_isspace(*p) && rspamd_url_is_domain(*p)) {
446 			if (*p == '.') {
447 				seen_dot = true;
448 			}
449 
450 			hlen++;
451 		}
452 		else {
453 			break;
454 		}
455 
456 		p++;
457 	}
458 
459 	if (hlen > 0) {
460 		if (p == end || (seen_dot && (g_ascii_isspace(*p) || *p == '[' || *p == '('))) {
461 			/* All data looks like a hostname */
462 			dest.assign_copy(std::string_view{in.data(), hlen});
463 
464 			return true;
465 		}
466 	}
467 
468 	return false;
469 }
470 
471 static auto
received_process_host_tcpinfo(rspamd_mempool_t * pool,received_header & rh,const std::string_view & in)472 received_process_host_tcpinfo(rspamd_mempool_t *pool,
473 							  received_header &rh,
474 							  const std::string_view &in) -> bool
475 {
476 	rspamd_inet_addr_t *addr = nullptr;
477 	auto ret = false;
478 
479 	if (in.empty()) {
480 		return false;
481 	}
482 
483 	if (in[0] == '[') {
484 		/* Likely Exim version */
485 
486 		auto brace_pos = in.find(']');
487 
488 		if (brace_pos != std::string_view::npos) {
489 			auto substr_addr = in.substr(1, brace_pos - 1);
490 			addr = rspamd_parse_inet_address_pool(substr_addr.data(),
491 					substr_addr.size(),
492 					pool,
493 					RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
494 
495 			if (addr) {
496 				rh.addr = addr;
497 				rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
498 			}
499 		}
500 	}
501 	else {
502 		if (g_ascii_isxdigit(in[0])) {
503 			/* Try to parse IP address */
504 			addr = rspamd_parse_inet_address_pool(in.data(),
505 					in.size(), pool, RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
506 			if (addr) {
507 				rh.addr = addr;
508 				rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
509 			}
510 		}
511 
512 		if (!addr) {
513 			/* Try canonical Postfix version: rdns [ip] */
514 			auto obrace_pos = in.find('[');
515 
516 			if (obrace_pos != std::string_view::npos) {
517 				auto ebrace_pos = in.rfind(']');
518 
519 				if (ebrace_pos != std::string_view::npos && ebrace_pos > obrace_pos) {
520 					auto substr_addr = in.substr(obrace_pos + 1,
521 							ebrace_pos - obrace_pos - 1);
522 					addr = rspamd_parse_inet_address_pool(substr_addr.data(),
523 							substr_addr.size(),
524 							pool,
525 							RSPAMD_INET_ADDRESS_PARSE_RECEIVED);
526 
527 					if (addr) {
528 						rh.addr = addr;
529 						rh.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(addr)));
530 
531 						/* Process with rDNS */
532 						auto rdns_substr = in.substr(0, obrace_pos);
533 
534 						if (received_process_rdns(pool,rdns_substr,rh.real_hostname)) {
535 							ret = true;
536 						}
537 					}
538 				}
539 			}
540 			else {
541 				/* Hostname or some crap, sigh... */
542 				if (received_process_rdns(pool, in, rh.real_hostname)) {
543 					ret = true;
544 				}
545 			}
546 		}
547 	}
548 
549 	return ret;
550 }
551 
552 static void
received_process_from(rspamd_mempool_t * pool,const received_part & rpart,received_header & rh)553 received_process_from(rspamd_mempool_t *pool,
554 					  const received_part &rpart,
555 					  received_header &rh)
556 {
557 	if (rpart.data.size() > 0) {
558 		/* We have seen multiple cases:
559 		 * - [ip] (hostname/unknown [real_ip])
560 		 * - helo (hostname/unknown [real_ip])
561 		 * - [ip]
562 		 * - hostname
563 		 * - hostname ([ip]:port helo=xxx)
564 		 * Maybe more...
565 		 */
566 		auto seen_ip_in_data = false;
567 
568 		if (!rpart.comments.empty()) {
569 			/* We can have info within comment as part of RFC */
570 			received_process_host_tcpinfo(
571 					pool, rh,
572 					rpart.comments[0].as_view());
573 		}
574 
575 		if (rh.real_ip.size() == 0) {
576 			/* Try to do the same with data */
577 			if (received_process_host_tcpinfo(
578 					pool, rh,
579 					rpart.data.as_view())) {
580 				seen_ip_in_data = true;
581 			}
582 		}
583 
584 		if (!seen_ip_in_data) {
585 			if (rh.real_ip.size() != 0) {
586 				/* Get anounced hostname (usually helo) */
587 				received_process_rdns(pool,
588 						rpart.data.as_view(),
589 						rh.from_hostname);
590 			}
591 			else {
592 				received_process_host_tcpinfo(pool,
593 						rh, rpart.data.as_view());
594 			}
595 		}
596 	}
597 	else {
598 		/* rpart->dlen = 0 */
599 		if (!rpart.comments.empty()) {
600 			received_process_host_tcpinfo(
601 					pool, rh,
602 					rpart.comments[0].as_view());
603 		}
604 	}
605 }
606 
607 static auto
received_header_parse(received_header_chain & chain,rspamd_mempool_t * pool,const std::string_view & in,struct rspamd_mime_header * hdr)608 received_header_parse(received_header_chain &chain, rspamd_mempool_t *pool,
609 					  const std::string_view &in,
610 					  struct rspamd_mime_header *hdr) -> bool
611 {
612 	std::ptrdiff_t date_pos = -1;
613 
614 	static constexpr const auto protos_map = frozen::make_unordered_map<frozen::string, received_flags>({
615 			{"smtp",    received_flags::SMTP},
616 			{"esmtp",   received_flags::ESMTP},
617 			{"esmtpa",  received_flags::ESMTPA |
618 						received_flags::AUTHENTICATED},
619 			{"esmtpsa", received_flags::ESMTPSA |
620 						received_flags::SSL |
621 						received_flags::AUTHENTICATED},
622 			{"esmtps",  received_flags::ESMTPS |
623 						received_flags::SSL},
624 			{"lmtp",    received_flags::LMTP},
625 			{"imap",    received_flags::IMAP},
626 			{"imaps",   received_flags::IMAP |
627 						received_flags::SSL},
628 			{"http",    received_flags::HTTP},
629 			{"https",   received_flags::HTTP |
630 						received_flags::SSL},
631 			{"local",   received_flags::LOCAL}
632 	});
633 
634 	auto parts = received_spill(in, date_pos);
635 
636 	if (parts.empty()) {
637 		return false;
638 	}
639 
640 	auto &rh = chain.new_received();
641 
642 	rh.flags = received_flags::UNKNOWN;
643 	rh.hdr = hdr;
644 
645 	for (const auto &part : parts) {
646 		switch (part.type) {
647 		case received_part_type::RSPAMD_RECEIVED_PART_FROM:
648 			received_process_from(pool, part, rh);
649 			break;
650 		case received_part_type::RSPAMD_RECEIVED_PART_BY:
651 			received_process_rdns(pool,
652 					part.data.as_view(),
653 					rh.by_hostname);
654 			break;
655 		case received_part_type::RSPAMD_RECEIVED_PART_WITH:
656 			if (part.data.size() > 0) {
657 				auto proto_flag_it = protos_map.find(part.data.as_view());
658 
659 				if (proto_flag_it != protos_map.end()) {
660 					rh.flags = proto_flag_it->second;
661 				}
662 			}
663 			break;
664 		case received_part_type::RSPAMD_RECEIVED_PART_FOR:
665 			rh.for_mbox.assign_copy(part.data);
666 			rh.for_addr = rspamd_email_address_from_smtp(rh.for_mbox.data(),
667 					rh.for_mbox.size());
668 			break;
669 		default:
670 			/* Do nothing */
671 			break;
672 		}
673 	}
674 
675 	if (!rh.real_hostname.empty() && rh.from_hostname.empty()) {
676 		rh.from_hostname.assign_copy(rh.real_hostname);
677 	}
678 
679 	if (date_pos > 0 && date_pos < in.size()) {
680 		auto date_sub = in.substr(date_pos);
681 		rh.timestamp = rspamd_parse_smtp_date((const unsigned char*)date_sub.data(),
682 				date_sub.size(), nullptr);
683 	}
684 
685 	return true;
686 }
687 
688 static auto
received_maybe_fix_task(struct rspamd_task * task)689 received_maybe_fix_task(struct rspamd_task *task) -> bool
690 {
691 	auto *recv_chain_ptr = static_cast<received_header_chain *>(MESSAGE_FIELD(task, received_headers));
692 
693 	if (recv_chain_ptr) {
694 		auto need_recv_correction = false;
695 
696 		auto top_recv_maybe = recv_chain_ptr->get_received(0);
697 
698 		if (top_recv_maybe.has_value()) {
699 			auto &top_recv = top_recv_maybe.value().get();
700 
701 			const auto *raddr = top_recv.addr;
702 			if (top_recv.real_ip.size() == 0 || (task->cfg && task->cfg->ignore_received)) {
703 				need_recv_correction = true;
704 			}
705 			else if (!(task->flags & RSPAMD_TASK_FLAG_NO_IP) && task->from_addr) {
706 				if (!raddr) {
707 					need_recv_correction = true;
708 				}
709 				else {
710 					if (rspamd_inet_address_compare(raddr, task->from_addr, FALSE) != 0) {
711 						need_recv_correction = true;
712 					}
713 				}
714 			}
715 
716 			if (need_recv_correction && !(task->flags & RSPAMD_TASK_FLAG_NO_IP)
717 					&& task->from_addr) {
718 				msg_debug_task ("the first received seems to be"
719 								" not ours, prepend it with fake one");
720 
721 				auto &trecv = recv_chain_ptr->new_received(received_header_chain::append_type::append_head);
722 				trecv.flags |= received_flags::ARTIFICIAL;
723 
724 				if (task->flags & RSPAMD_TASK_FLAG_SSL) {
725 					trecv.flags |= received_flags::SSL;
726 				}
727 
728 				if (task->user) {
729 					trecv.flags |= received_flags::AUTHENTICATED;
730 				}
731 
732 				trecv.real_ip.assign_copy(std::string_view(rspamd_inet_address_to_string(task->from_addr)));
733 
734 				const auto *mta_name = (const char*)rspamd_mempool_get_variable(task->task_pool,
735 						RSPAMD_MEMPOOL_MTA_NAME);
736 
737 				if (mta_name) {
738 					trecv.by_hostname.assign_copy(std::string_view(mta_name));
739 				}
740 				trecv.addr = rspamd_inet_address_copy(task->from_addr);
741 
742 				if (task->hostname) {
743 					trecv.real_hostname.assign_copy(std::string_view(task->hostname));
744 					trecv.from_hostname.assign_copy(trecv.real_hostname);
745 				}
746 
747 				return true;
748 			}
749 
750 			/* Extract data from received header if we were not given IP */
751 			if (!need_recv_correction && (task->flags & RSPAMD_TASK_FLAG_NO_IP) &&
752 				(task->cfg && !task->cfg->ignore_received)) {
753 				if (!top_recv.real_ip.empty()) {
754 					if (!rspamd_parse_inet_address (&task->from_addr,
755 							top_recv.real_ip.data(),
756 							top_recv.real_ip.size(),
757 							RSPAMD_INET_ADDRESS_PARSE_NO_UNIX)) {
758 						msg_warn_task ("cannot get IP from received header: '%s'",
759 								top_recv.real_ip.data());
760 						task->from_addr = nullptr;
761 					}
762 				}
763 				if (!top_recv.real_hostname.empty()) {
764 					task->hostname = top_recv.real_hostname.data();
765 				}
766 
767 				return true;
768 			}
769 		}
770 	}
771 
772 	return false;
773 }
774 
775 static auto
received_export_to_lua(received_header_chain * chain,lua_State * L)776 received_export_to_lua(received_header_chain *chain, lua_State *L) -> bool
777 {
778 	if (chain == nullptr) {
779 		return false;
780 	}
781 
782 	lua_createtable(L, chain->size(), 0);
783 
784 	auto push_flag = [L](const received_header &rh, received_flags fl, const char *name) {
785 		lua_pushboolean(L, !!(rh.flags & fl));
786 		lua_setfield(L, -2, name);
787 	};
788 
789 	auto i = 1;
790 
791 	for (const auto &rh : chain->as_vector()) {
792 		lua_createtable (L, 0, 10);
793 
794 		if (rh.hdr && rh.hdr->decoded) {
795 			rspamd_lua_table_set(L, "raw", rh.hdr->decoded);
796 		}
797 
798 		lua_createtable(L, 0, 3);
799 		push_flag(rh, received_flags::ARTIFICIAL, "artificial");
800 		push_flag(rh, received_flags::AUTHENTICATED, "authenticated");
801 		push_flag(rh, received_flags::SSL, "ssl");
802 		lua_setfield(L, -2, "flags");
803 
804 		auto push_nullable_string = [L](const mime_string &st, const char *field) {
805 			if (st.empty()) {
806 				lua_pushnil(L);
807 			}
808 			else {
809 				lua_pushlstring(L, st.data(), st.size());
810 			}
811 			lua_setfield(L, -2, field);
812 		};
813 
814 		push_nullable_string(rh.from_hostname, "from_hostname");
815 		push_nullable_string(rh.real_hostname, "real_hostname");
816 		push_nullable_string(rh.real_ip, "from_ip");
817 		push_nullable_string(rh.by_hostname, "by_hostname");
818 		push_nullable_string(rh.for_mbox, "for");
819 
820 		if (rh.addr) {
821 			rspamd_lua_ip_push(L, rh.addr);
822 		}
823 		else {
824 			lua_pushnil(L);
825 		}
826 		lua_setfield(L, -2, "real_ip");
827 
828 		lua_pushstring(L, received_protocol_to_string(rh.flags));
829 		lua_setfield(L, -2, "proto");
830 
831 		lua_pushinteger(L, rh.timestamp);
832 		lua_setfield(L, -2, "timestamp");
833 
834 		lua_rawseti(L, -2, i++);
835 	}
836 
837 	return true;
838 }
839 
840 } // namespace rspamd::mime
841 
842 bool
rspamd_received_header_parse(struct rspamd_task * task,const char * data,size_t sz,struct rspamd_mime_header * hdr)843 rspamd_received_header_parse(struct rspamd_task *task,
844 							 const char *data, size_t sz,
845 							 struct rspamd_mime_header *hdr)
846 {
847 	auto *recv_chain_ptr = static_cast<rspamd::mime::received_header_chain *>
848 			(MESSAGE_FIELD(task, received_headers));
849 
850 	if (recv_chain_ptr == nullptr) {
851 		/* This constructor automatically registers dtor in mempool */
852 		recv_chain_ptr = new rspamd::mime::received_header_chain(task);
853 		MESSAGE_FIELD(task, received_headers) = (void *)recv_chain_ptr;
854 	}
855 	return rspamd::mime::received_header_parse(*recv_chain_ptr, task->task_pool,
856 			std::string_view{data, sz}, hdr);
857 }
858 
859 bool
rspamd_received_maybe_fix_task(struct rspamd_task * task)860 rspamd_received_maybe_fix_task(struct rspamd_task *task)
861 {
862 	return rspamd::mime::received_maybe_fix_task(task);
863 }
864 
865 bool
rspamd_received_export_to_lua(struct rspamd_task * task,lua_State * L)866 rspamd_received_export_to_lua(struct rspamd_task *task, lua_State *L)
867 {
868 	return rspamd::mime::received_export_to_lua(
869 			static_cast<rspamd::mime::received_header_chain *>(MESSAGE_FIELD(task, received_headers)),
870 			L);
871 }
872 
873 /* Tests part */
874 #define DOCTEST_CONFIG_IMPLEMENTATION_IN_DLL
875 #include "doctest/doctest.h"
876 
877 TEST_SUITE("received") {
878 TEST_CASE("parse received")
879 {
880 	using namespace std::string_view_literals;
881 	using map_type = robin_hood::unordered_flat_map<std::string_view, std::string_view>;
882 	std::vector<std::pair<std::string_view, map_type>> cases{
883 			// Simple received
884 			{"from smtp11.mailtrack.pl (smtp11.mailtrack.pl [185.243.30.90])"sv,
885 					{
886 							{"real_ip", "185.243.30.90"},
887 							{"real_hostname", "smtp11.mailtrack.pl"},
888 							{"from_hostname", "smtp11.mailtrack.pl"}
889 					}
890 			},
891 			// Real Postfix IPv6 received
892 			{"from server.chat-met-vreemden.nl (unknown [IPv6:2a01:7c8:aab6:26d:5054:ff:fed1:1da2])\n"
893 			 "\t(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))\n"
894 			 "\t(Client did not present a certificate)\n"
895 			 "\tby mx1.freebsd.org (Postfix) with ESMTPS id CF0171862\n"
896 			 "\tfor <test@example.com>; Mon,  6 Jul 2015 09:01:20 +0000 (UTC)\n"
897 			 "\t(envelope-from upwest201diana@outlook.com)"sv,
898 					{
899 							{"real_ip", "2a01:7c8:aab6:26d:5054:ff:fed1:1da2"},
900 							{"from_hostname", "server.chat-met-vreemden.nl"},
901 							{"by_hostname", "mx1.freebsd.org"},
902 							{"for_mbox", "<test@example.com>"}
903 					}
904 			},
905 			// Exim IPv4 received
906 			{"from localhost ([127.0.0.1]:49019 helo=hummus.csx.cam.ac.uk)\n"
907 			 " by hummus.csx.cam.ac.uk with esmtp (Exim 4.91-pdpfix1)\n"
908 			 " (envelope-from <exim-dev-bounces@exim.org>)\n"
909 			 " id 1fZ55o-0006DP-3H\n"
910 			 " for <xxx@xxx.xxx>; Sat, 30 Jun 2018 02:54:28 +0100"sv,
911 					{
912 							{"from_hostname", "localhost"},
913 							{"real_ip", "127.0.0.1"},
914 							{"for_mbox", "<xxx@xxx.xxx>"},
915 							{"by_hostname", "hummus.csx.cam.ac.uk"},
916 					}
917 			},
918 			// Exim IPv6 received
919 			{"from smtp.spodhuis.org ([2a02:898:31:0:48:4558:736d:7470]:38689\n"
920 			 " helo=mx.spodhuis.org)\n"
921 			 " by hummus.csx.cam.ac.uk with esmtpsa (TLSv1.3:TLS_AES_256_GCM_SHA384:256)\n"
922 			 " (Exim 4.91-pdpfix1+cc) (envelope-from <xxx@exim.org>)\n"
923 			 " id 1fZ55k-0006CO-9M\n"
924 			 " for exim-dev@exim.org; Sat, 30 Jun 2018 02:54:24 +0100"sv,
925 					{
926 							{"from_hostname", "smtp.spodhuis.org"},
927 							{"real_ip", "2a02:898:31:0:48:4558:736d:7470"},
928 							{"for_mbox", "exim-dev@exim.org"},
929 							{"by_hostname", "hummus.csx.cam.ac.uk"},
930 					}
931 			},
932 			// Haraka received
933 			{"from aaa.cn ([1.1.1.1]) by localhost.localdomain (Haraka/2.8.18) with "
934 			 "ESMTPA id 349C9C2B-491A-4925-A687-3EF14038C344.1 envelope-from <huxin@xxx.com> "
935 			 "(authenticated bits=0); Tue, 03 Jul 2018 14:18:13 +0200"sv,
936 					{
937 							{"from_hostname", "aaa.cn"},
938 							{"real_ip", "1.1.1.1"},
939 							{"by_hostname", "localhost.localdomain"},
940 					}
941 			},
942 			// Invalid by
943 			{"from [192.83.172.101] (HELLO 148.251.238.35) (148.251.238.35) "
944 			 "by guovswzqkvry051@sohu.com with gg login "
945 			 "by AOL 6.0 for Windows US sub 008 SMTP  ; Tue, 03 Jul 2018 09:01:47 -0300"sv,
946 					{
947 							{"from_hostname", "192.83.172.101"},
948 							{"real_ip", "192.83.172.101"},
949 					}
950 			},
951 			// Invalid hostinfo
952 			{"from example.com ([]) by example.com with ESMTP id 2019091111 ;"
953 			 " Thu, 26 Sep 2019 11:19:07 +0200"sv,
954 					{
955 							{"by_hostname", "example.com"},
956 							{"from_hostname", "example.com"},
957 							{"real_hostname", "example.com"},
958 					}
959 			},
960 			// Different real and announced hostnames + broken crap
961 			{"from 171-29.br (1-1-1-1.z.com.br [1.1.1.1]) by x.com.br (Postfix) "
962 			 "with;ESMTP id 44QShF6xj4z1X for <hey@y.br>; Thu, 21 Mar 2019 23:45:46 -0300 "
963 			 ": <g @yi.br>"sv,
964 					{
965 							{"real_ip", "1.1.1.1"},
966 							{"from_hostname", "171-29.br"},
967 							{"real_hostname", "1-1-1-1.z.com.br"},
968 							{"by_hostname", "x.com.br"},
969 					}
970 			},
971 			// Different real and announced ips + no hostname
972 			{"from [127.0.0.1] ([127.0.0.2]) by smtp.gmail.com with ESMTPSA id xxxololo"sv,
973 					{
974 							{"real_ip", "127.0.0.2"},
975 							{"from_hostname", "127.0.0.1"},
976 							{"by_hostname", "smtp.gmail.com"},
977 					}
978 			},
979 			// Different real and hostanes
980 			{"from 185.118.166.127 (steven2.zhou01.pserver.ru [185.118.166.127]) "
981 			 "by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
982 					{
983 							{"real_ip", "185.118.166.127"},
984 							{"from_hostname", "185.118.166.127"},
985 							{"real_hostname", "steven2.zhou01.pserver.ru"},
986 							{"by_hostname", "mail.832zsu.cn"},
987 					}
988 			},
989 			// \0 in received must be filtered
990 			{"from smtp11.mailt\0rack.pl (smtp11.mail\0track.pl [1\085.243.30.90])"sv,
991 					{
992 							{"real_ip", "185.243.30.90"},
993 							{"real_hostname", "smtp11.mailtrack.pl"},
994 							{"from_hostname", "smtp11.mailtrack.pl"}
995 					}
996 			},
997 			// No from part
998 			{"by mail.832zsu.cn (Postfix) with ESMTPA id AAD722133E34"sv,
999 					{
1000 							{"by_hostname", "mail.832zsu.cn"},
1001 					}
1002 			},
1003 			// From part is in the comment
1004 			{"(from asterisk@localhost)\n"
1005 			 "        by pbx.xxx.com (8.14.7/8.14.7/Submit) id 076Go4wD014562;\n"
1006 			 "        Thu, 6 Aug 2020 11:50:04 -0500"sv,
1007 					{
1008 							{"by_hostname", "pbx.xxx.com"},
1009 					}
1010 			},
1011 	};
1012 	rspamd_mempool_t *pool = rspamd_mempool_new_default("rcvd test", 0);
1013 
1014 	for (auto &&c : cases) {
1015 		SUBCASE(c.first.data()) {
1016 			rspamd::mime::received_header_chain chain;
1017 			auto ret = rspamd::mime::received_header_parse(chain, pool,
1018 					c.first, nullptr);
1019 			CHECK(ret == true);
1020 			auto &&rh = chain.get_received(0);
1021 			CHECK(rh.has_value());
1022 			auto res = rh.value().get().as_map();
1023 
1024 			for (const auto &expected : c.second) {
1025 				CHECK_MESSAGE(res.contains(expected.first), expected.first.data());
1026 				CHECK(res[expected.first] == expected.second);
1027 			}
1028 			for (const auto &existing : res) {
1029 				CHECK_MESSAGE(c.second.contains(existing.first), existing.first.data());
1030 				CHECK(c.second[existing.first] == existing.second);
1031 			}
1032 		}
1033 	}
1034 
1035 	rspamd_mempool_delete(pool);
1036 }
1037 }