1// Copyright 2020 Istio Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package dns 16 17import ( 18 "crypto/tls" 19 "crypto/x509" 20 "net" 21 "strings" 22 "sync" 23 "time" 24 25 "github.com/miekg/dns" 26 27 "istio.io/pkg/monitoring" 28 29 "istio.io/pkg/env" 30 "istio.io/pkg/log" 31) 32 33// Based on istio-ecosystem/istio-coredns-plugin 34// Changes from original: 35// - runs inside istiod, using Istio main gRPC server 36// - instead of directly reading from K8S, use istiod store. 37// - removed "log" - switching to istio log. 38// - refactored Query, so both DNS native interface and coredns grpc plugin are implemented 39// - added parts of istio-ecosystem/dns-discovery, to provide in process DNS 40 41// TODO: 42// - add metrics, ideally using same names as kubedns/coredns ( Doug ?) 43// - config options on what suffix to capture in agent 44 45// IstioDNS exposes a DNS interface to internal Istiod service database. 46// This can be used: 47// - as a CoreDNS gRPC plugin 48// - as a DNS-over-TLS resolver, with support for forwarding to k8s or upstream 49// - for debug - a plain DNS-over-UDP interface. 50// 51// The code is currently targeted for Istiod, with a per/pod or per/VM coreDNS 52// forwarding to it, and using the same XDS grpc server and cert. 53// 54// In future we may embed it in istio-agent as well, using XDS to sync the config store. 55type IstioDNS struct { 56 mux *dns.ServeMux 57 58 // local DNS-UDP server. 59 // Active in agent and istiod. 60 server *dns.Server 61 62 // TODO: add a dns-over-TCP server and capture, for istio-agent. 63 64 // local DNS-TLS server. This is active only in istiod. 65 tlsServer *dns.Server 66 67 resolvConfServers []string 68 69 running bool 70 71 client *dns.Client 72 73 // tlsClient is a client initialized with DNS-over-TLS certificates. 74 // If nil, no DNS-TLS requests will be made. 75 tlsClient *dns.Client 76 tlsUpstream string 77 backoff time.Duration 78 79 // m protects pending, conn and outID 80 m sync.Mutex 81 pending map[uint16]chan *dns.Msg 82 conn *dns.Conn 83 // outID is used to match requests to responses in the DNS-TCP. 84 outID uint16 85 dnsTLSSuffix []string 86} 87 88var ( 89 // DNSAddr is the env controlling the DNS-over-TLS server in istiod. 90 // By default will be active, set to empty string to disable DNS functionality. 91 // Do not change in prod - it must match the Service. Used for testing or running 92 // on VMs. 93 DNSAddr = env.RegisterStringVar("DNS_ADDR", ":15053", "DNS listen address") 94 95 // DNSAgentAddr is the listener address on the agent. 96 // This is in the range of hardcoded address used by agent - not customizable 97 // except for tests. 98 // By default will be active, set to empty string to disable DNS functionality. 99 // Iptables interception matches this. 100 DNSAgentAddr = ":15013" 101 102 // DNSTLSEnableAgent activates the DNS-over-TLS in istio-agent. 103 // This will just attempt to connect to Istiod and start the DNS server on the default port - 104 // DNS_CAPTURE controls capturing port 53. 105 // Not using a bool - it's error prone in template, annotations, helm. 106 // For now any non-empty value will enable TLS in the agent - we may further customize 107 // the mode, for example specify DNS-HTTPS vs DNS-TLS 108 DNSTLSEnableAgent = env.RegisterStringVar("DNS_AGENT", "", "DNS-over-TLS upstream server") 109 110 // DNSUpstream allows overriding the upstream server. By default we use [discovery-address]:853 111 // If a secure DNS server is available - set this to point to the server. 112 // It is assumed that the server has certificates signed by Istio. 113 // TODO: add option to indicate the expected root CA, or use of public certs. 114 // TODO: also trust k8s and root CAs 115 // TODO: add https and grpc ( tcp-tls, https are the Net names used in miekg library ) 116 DNSUpstream = env.RegisterStringVar("DNS_SERVER", "", 117 "Protocol and DNS server to use. Currently only tcp-tls: is supported.") 118 119 pendingTLS = monitoring.NewGauge( 120 "dns_tls_pending", 121 "Number of pending DNS-over-TLS requests") 122 123 dnsTLS = monitoring.NewSum("dns_tls_req", "DNS-over-TLS requests") 124) 125 126func InitDNS() *IstioDNS { 127 h := &IstioDNS{ 128 mux: dns.NewServeMux(), 129 pending: map[uint16]chan *dns.Msg{}, 130 backoff: 1 * time.Second, 131 } 132 133 h.mux.Handle(".", h) 134 135 // TODO: use a custom dialer 136 h.client = &dns.Client{Net: "udp"} 137 // Attempt to find the 'upstream' DNS server, used for entries not known by Istiod 138 // That includes external names, may also include stateful sets (not clear we want 139 // to cache the entire database if dns is running in agent). Istiod does have all 140 // endpoints, including stateful sets - and could resolve over TLS - but not 141 // in the initial implementation. 142 // TODO: allow env override 143 dnsConfig, err := dns.ClientConfigFromFile("/etc/resolv.conf") 144 if err != nil { 145 // K8S provides one, as well as most VMs. 146 log.Warna("Unexpected missing resolv.conf - no upstream DNS", err) 147 return h 148 } 149 if dnsConfig != nil && len(dnsConfig.Servers) > 0 { 150 for _, s := range dnsConfig.Servers { 151 h.resolvConfServers = append(h.resolvConfServers, s+":53") 152 } 153 } 154 return h 155} 156 157// InitDNS will create the IstioDNS and initialize the agent: 158// - /etc/resolv.conf will be parsed, and nameservers added to resolvConf list 159// - discoveryAddress is the XDS server address, including port. The DNS-TLS server 160// is by default on the same host, port 853 (standard). 161// - domain is the same as "--domain" passed to agent. 162func InitDNSAgent(discoveryAddress string, domain string, cert []byte, suffixes []string) *IstioDNS { 163 dnsTLSServer, discoveryPort, err := net.SplitHostPort(discoveryAddress) 164 if err != nil { 165 log.Errora("Invalid discovery address, defaulting ", discoveryAddress, " ", err) 166 dnsTLSServer = "istiod.istio-system.svc" 167 } 168 169 dnsDomainL := strings.Split(domain, ".") 170 clusterLocal := "cluster.local" 171 if len(dnsDomainL) > 3 { 172 clusterLocal = strings.Join(dnsDomainL[2:], ".") 173 } 174 suffixes = append(suffixes, clusterLocal+".") 175 176 h := InitDNS() 177 178 h.dnsTLSSuffix = suffixes 179 if dnsTLSServer != "" && cert != nil { 180 certPool := x509.NewCertPool() 181 ok := certPool.AppendCertsFromPEM(cert) 182 if !ok { 183 log.Warna("Failed to load certificate ", cert) 184 } else { 185 h.tlsClient = &dns.Client{ 186 Net: "tcp-tls", 187 TLSConfig: &tls.Config{ 188 RootCAs: certPool, 189 }, 190 DialTimeout: 2 * time.Second, 191 } 192 if strings.HasPrefix(dnsTLSServer, "127.0.0.1") { 193 // test/debug 194 h.tlsClient.TLSConfig.ServerName = "istiod.istio-system.svc" 195 // preserve the port 196 h.tlsUpstream = dnsTLSServer + ":" + discoveryPort 197 } else if DNSUpstream.Get() != "" && 198 strings.HasSuffix(DNSUpstream.Get(), "tcp-tls:") { 199 h.tlsUpstream = strings.TrimPrefix(DNSUpstream.Get(), "tcp-tls:") 200 } else { 201 h.tlsUpstream = dnsTLSServer + ":853" 202 } 203 // Maintain a connection to the TLS server. 204 h.openTLS() 205 } 206 } 207 return h 208} 209 210// StartDNS starts the DNS-over-UDP and DNS-over-TLS. 211func (h *IstioDNS) StartDNS(udpAddr string, tlsListener net.Listener) { 212 var err error 213 if tlsListener != nil { 214 // In istiod, using same TLS certificate as the main server. 215 h.tlsServer = &dns.Server{ 216 Handler: h.mux, 217 IdleTimeout: func() time.Duration { 218 // large timeout 219 return 60 * time.Second 220 }, 221 ReadTimeout: 60 * time.Second, 222 MaxTCPQueries: -1, 223 } 224 h.tlsServer.Listener = tlsListener 225 log.Infoa("Started DNS-TLS", tlsListener.Addr()) 226 go func() { 227 err := h.tlsServer.ActivateAndServe() 228 if err != nil { 229 log.Errora("Failed to activate DNS-TLS ", err) 230 } 231 }() 232 233 } 234 // UDP 235 h.server = &dns.Server{Handler: h.mux} 236 h.server.PacketConn, err = net.ListenPacket("udp", udpAddr) 237 if err != nil { 238 log.Warna("Failed to start DNS UDP", udpAddr, err) 239 } 240 241 log.Infoa("Started DNS ", udpAddr) 242 go func() { 243 err := h.server.ActivateAndServe() 244 if err != nil { 245 log.Errora("Failed to activate DNS-UDP ", err) 246 } 247 }() 248} 249 250// ServerDNS is the implementation of DNS interface 251// 252// - 253func (h *IstioDNS) ServeDNS(w dns.ResponseWriter, r *dns.Msg) { 254 t0 := time.Now() 255 var err error 256 var response *dns.Msg 257 258 useTLS := false 259 if len(h.dnsTLSSuffix) > 0 { 260 for _, q := range r.Question { 261 for _, s := range h.dnsTLSSuffix { 262 if strings.HasSuffix(q.Name, s) { 263 useTLS = true 264 break 265 } 266 } 267 if useTLS { 268 break 269 } 270 } 271 } 272 273 // Nothing found - forward to upstream DNS. 274 // TODO: this is NOT secured - pilot to k8s will need to use TLS or run a local coredns 275 // replica, using k8s plugin ( so this is over localhost ) 276 if !useTLS || h.tlsClient == nil { 277 response, _, err = h.client.Exchange(r, h.resolvConfServers[0]) 278 if err != nil { 279 log.Debuga("DNS error ", r, err) 280 // cResponse will be nil - leave the original response object 281 // TODO: increment counter - not clear if DNS client has metrics. 282 response = new(dns.Msg) 283 response.SetReply(r) 284 response.Rcode = dns.RcodeNameError 285 } 286 if len(response.Answer) == 0 { 287 response.Rcode = dns.RcodeNameError 288 } 289 290 if false { 291 log.Infoa("DNS:", time.Since(t0), r.Question, response.Answer) 292 } 293 err = w.WriteMsg(response) 294 if err != nil { 295 log.Debuga("DNS write error ", r, err) 296 } 297 return 298 } 299 300 h.ServeDNSTLS(w, r) 301} 302 303// Handles a request using DNS-over-TLS. 304func (h *IstioDNS) ServeDNSTLS(w dns.ResponseWriter, r *dns.Msg) { 305 t0 := time.Now() 306 // DNS-over-TLS - using client.Exchange has horrible performance - the 307 // client is not optimized (8ms RTT instead of <1ms). CoreDNS uses a 308 //connection pool, which is also not optimal. The RFC recommends pipelining. 309 310 // By using this code, the latency is around 800us - with ~400 us in istiod 311 origID := r.MsgHdr.Id 312 var key uint16 313 ch := make(chan *dns.Msg) 314 h.m.Lock() 315 h.outID++ 316 key = h.outID 317 // The ID on the TLS connection is different from the one in the UDS. 318 r.MsgHdr.Id = h.outID 319 h.pending[h.outID] = ch 320 pendingTLS.Increment() 321 h.m.Unlock() 322 323 defer func() { 324 h.m.Lock() 325 delete(h.pending, key) 326 pendingTLS.Decrement() 327 h.m.Unlock() 328 }() 329 330 var response *dns.Msg 331 currentConn := h.connTLS() 332 if currentConn == nil { 333 // Not connected - don't write anything, client will retry 334 log.Infoa("DNS timeout, no TLS connection") 335 response = new(dns.Msg) 336 response.SetRcode(r, dns.RcodeServerFailure) 337 response.MsgHdr.Id = origID 338 _ = w.WriteMsg(response) 339 return 340 } 341 342 // TODO: optimize - when close restart immediately the connect, maybe 343 // keep 2-3 connections open ? 344 dnsTLS.Increment() 345 err := currentConn.WriteMsg(r) 346 if err != nil { 347 return 348 } 349 350 to := time.After(2 * time.Second) 351 select { 352 case m := <-ch: 353 m.MsgHdr.Id = origID 354 response = m 355 _ = w.WriteMsg(m) 356 case <-to: 357 return 358 } 359 if false { 360 log.Infoa("DNS-TLS:", time.Since(t0), r.Question, response.Answer) 361 } 362} 363 364func (h *IstioDNS) Close() { 365 h.m.Lock() 366 h.running = false 367 if h.conn != nil { 368 h.conn.Close() 369 } 370 h.m.Unlock() 371} 372 373func (h *IstioDNS) connTLS() *dns.Conn { 374 h.m.Lock() 375 conn := h.conn 376 h.m.Unlock() 377 378 if conn != nil { 379 return conn 380 } 381 // TODO: use a Cond to return when connection succeeded 382 time.Sleep(100 * time.Millisecond) 383 h.m.Lock() 384 conn = h.conn 385 h.m.Unlock() 386 387 return conn 388} 389 390func (h *IstioDNS) openTLS() { 391 h.running = true 392 t0 := time.Now() 393 conn, err := h.tlsClient.Dial(h.tlsUpstream) 394 h.m.Lock() 395 h.conn = conn 396 h.m.Unlock() 397 log.Infoa("DNS: Opened TLS connection to ", h.tlsUpstream, " ", time.Since(t0), err) 398 if err != nil { 399 log.Warna("Initial failure to open DNS-TLS connection, will retry", err) 400 } 401 // Maintain the connection 402 go func() { 403 tclose := time.Now() 404 for h.running { 405 if conn == nil { 406 log.Infoa("DNS: TLS connection reopen ", time.Since(t0)) 407 conn, err = h.tlsClient.Dial(h.tlsUpstream) 408 h.m.Lock() 409 h.conn = conn 410 h.m.Unlock() 411 if err != nil { 412 // TODO: exponential backoff 413 // TODO: if we are not in strict mode, fallback to UDP 414 time.Sleep(h.backoff) 415 if h.backoff < 33*time.Second { 416 h.backoff = 2 * h.backoff 417 } 418 continue 419 } 420 h.backoff = 1 * time.Second 421 log.Infoa("DNS: Opened TLS connection to ", h.tlsUpstream, " ", 422 time.Since(t0), " ", time.Since(tclose), err) 423 } 424 425 msg, err := h.conn.ReadMsg() 426 if err != nil { 427 log.Infoa("DNS read error, reconnect ", err) 428 conn = nil 429 h.m.Lock() 430 h.conn = nil 431 h.m.Unlock() 432 tclose = time.Now() 433 continue 434 } 435 uid := msg.MsgHdr.Id 436 h.m.Lock() 437 pr := h.pending[uid] 438 h.m.Unlock() 439 if pr != nil { 440 pr <- msg 441 } 442 } 443 }() 444} 445