1# Copyright © 2019-2020 CZ.NIC, z. s. p. o.
2# SPDX-License-Identifier: GPL-3.0-or-later
3#
4# This file is part of dns-crawler.
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19import json
20import re
21from copy import deepcopy
22from datetime import datetime
23from socket import gethostname
24
25from rq import get_current_connection
26
27from .config_loader import default_config_filename, load_config
28from .dns_utils import (annotate_dns_algorithm, check_dnssec,
29                        get_local_resolver, get_ns_info, get_record,
30                        get_record_parser, get_txt, parse_dmarc, parse_spf,
31                        parse_tlsa)
32from .geoip_utils import annotate_geoip, init_geoip
33from .hsts_utils import get_hsts_status
34from .ip_utils import get_source_addresses
35from .mail_utils import get_mx_info
36from .web_utils import get_webserver_info
37
38
39def get_dns_local(domain, config, local_resolver, geoip_dbs):
40    result = {}
41    txt = get_record(domain, "TXT", local_resolver)
42    result["NS_AUTH"] = get_record(domain, "NS", local_resolver)
43    result["MAIL"] = get_record(domain, "MX", local_resolver)
44    result["WEB4"] = annotate_geoip(get_record(domain, "A", local_resolver), geoip_dbs)
45    if config["dns"]["check_www"]:
46        result["WEB4_www"] = annotate_geoip(get_record("www." + domain, "A", local_resolver), geoip_dbs)
47    result["WEB6"] = annotate_geoip(get_record(domain, "AAAA", local_resolver), geoip_dbs)
48    if config["dns"]["check_www"]:
49        result["WEB6_www"] = annotate_geoip(get_record("www." + domain, "AAAA", local_resolver), geoip_dbs)
50    result["WEB_TLSA"] = get_record("_443._tcp." + domain, "TLSA", local_resolver)
51    if config["dns"]["check_www"]:
52        result["WEB_TLSA_www"] = parse_tlsa(get_record("_443._tcp.www." + domain, "TLSA", local_resolver))
53    result["TXT"] = txt
54    if txt:
55        result["TXT_SPF"] = parse_spf(get_txt(re.compile('^"?v=spf'), deepcopy(txt)))
56    result["TXT_DMARC"] = parse_dmarc(get_record("_dmarc." + domain, "TXT", local_resolver))
57    result["TXT_openid"] = get_record("_openid." + domain, "TXT", local_resolver)
58    result["DS"] = annotate_dns_algorithm(get_record(domain, "DS", local_resolver), 1)
59    result["DNSKEY"] = annotate_dns_algorithm(get_record(domain, "DNSKEY", local_resolver), 2)
60    result["DNSSEC"] = check_dnssec(domain, local_resolver)
61    additional = {}
62    for record in config["dns"]["additional"]:
63        values = get_record(domain, record, local_resolver)
64        parser = get_record_parser(record)
65        if parser is not None:
66            additional[record] = parser(values)
67        else:
68            additional[record] = values
69    return dict(result, **additional)
70
71
72def get_dns_auth(domain, nameservers, redis, config, local_resolver, geoip_dbs):
73    source_ipv4, source_ipv6 = get_source_addresses(redis=redis, config=config)
74    timeout = config["timeouts"]["dns"]
75    cache_timeout = config["timeouts"]["cache"]
76    chaosrecords = config["dns"]["auth_chaos_txt"]
77    if not nameservers or len(nameservers) < 1:
78        return None
79    results = []
80    for item in nameservers:
81        ns = item["value"]
82        if not ns:
83            continue
84        a = get_record(ns, "A", local_resolver)
85        aaaa = get_record(ns, "AAAA", local_resolver)
86        ipv4_results = []
87        ipv6_results = []
88        if a is not None and source_ipv4 is not None:
89            for ipv4 in a:
90                ns_info = get_ns_info(ipv4, chaosrecords, geoip_dbs, timeout, cache_timeout, redis)
91                if ns_info:
92                    ipv4_results.append(ns_info)
93        if aaaa is not None and source_ipv6 is not None:
94            for ipv6 in aaaa:
95                ns_info = get_ns_info(ipv6, chaosrecords, geoip_dbs, timeout, cache_timeout, redis)
96                if ns_info:
97                    ipv6_results.append(ns_info)
98        result = {
99            "ns": ns,
100        }
101        if len(ipv4_results) > 0:
102            result["ipv4"] = ipv4_results
103        if len(ipv6_results) > 0:
104            result["ipv6"] = ipv6_results
105        results.append(result)
106    return results
107
108
109def get_web_status(domain, dns, config, source_ipv4, source_ipv6):
110    result = {}
111    if config["web"]["check_ipv4"] and source_ipv4:
112        if config["web"]["check_http"]:
113            result["WEB4_80"] = get_webserver_info(domain, dns["WEB4"], config, source_ipv4)
114        if config["dns"]["check_www"] and config["web"]["check_http"]:
115            result["WEB4_80_www"] = get_webserver_info(f"www.{domain}", dns["WEB4_www"], config, source_ipv4)
116        if config["web"]["check_https"]:
117            result["WEB4_443"] = get_webserver_info(domain, dns["WEB4"], config, source_ipv4, tls=True)
118        if config["dns"]["check_www"] and config["web"]["check_https"]:
119            result["WEB4_443_www"] = get_webserver_info(f"www.{domain}", dns["WEB4_www"], config, source_ipv4, tls=True)
120    if config["web"]["check_ipv6"] and source_ipv6:
121        if config["web"]["check_http"]:
122            result["WEB6_80"] = get_webserver_info(domain, dns["WEB6"], config, source_ipv6, ipv6=True)
123        if config["dns"]["check_www"] and config["web"]["check_http"]:
124            result["WEB6_80_www"] = get_webserver_info(f"www.{domain}", dns["WEB6_www"], config, source_ipv6, ipv6=True)
125        if config["web"]["check_https"]:
126            result["WEB6_443"] = get_webserver_info(domain, dns["WEB6"], config, source_ipv6, ipv6=True, tls=True)
127        if config["dns"]["check_www"] and config["web"]["check_https"]:
128            result["WEB6_443_www"] = get_webserver_info(f"www.{domain}", dns["WEB6_www"],
129                                                        config, source_ipv6, ipv6=True, tls=True)
130    return result
131
132
133def process_domain(domain):
134    redis = get_current_connection()
135    config = load_config(default_config_filename, redis, hostname=gethostname())
136    source_ipv4, source_ipv6 = get_source_addresses(redis=redis, config=config)
137    geoip_dbs = init_geoip(config)
138    local_resolver = get_local_resolver(config)
139    dns_local = get_dns_local(domain, config, local_resolver, geoip_dbs)
140    dns_auth = get_dns_auth(domain, dns_local["NS_AUTH"], redis, config, local_resolver, geoip_dbs)
141    if dns_local["MAIL"]:
142        mail = get_mx_info(dns_local["MAIL"], config["mail"]["ports"], config["timeouts"]["mail"],
143                           config["mail"]["get_banners"], config["timeouts"]["cache"],
144                           local_resolver, redis, source_ipv4, source_ipv6)
145    else:
146        mail = get_mx_info([{"value": domain}], config["mail"]["ports"], config["timeouts"]["mail"],
147                           config["mail"]["get_banners"], config["timeouts"]["cache"],
148                           local_resolver, redis, source_ipv4, source_ipv6)
149    web = get_web_status(domain, dns_local, config, source_ipv4, source_ipv6)
150    hsts = get_hsts_status(domain)
151
152    return {
153        "domain": domain,
154        "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S"),
155        "results": {
156            "DNS_LOCAL": dns_local,
157            "DNS_AUTH": dns_auth,
158            "MAIL": mail,
159            "WEB": web,
160            "HSTS": hsts
161        }
162    }
163
164
165def get_json_result(domain):
166    return json.dumps(process_domain(domain), ensure_ascii=False, check_circular=False, separators=(",", ":"))
167