1 /*
2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3 *
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
5 *
6 * This software is open source.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
14 *
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 *
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 /**
37 * \file
38 *
39 * This file contains the infrastructure cache.
40 */
41 #include "config.h"
42 #include "sldns/rrdef.h"
43 #include "sldns/str2wire.h"
44 #include "sldns/sbuffer.h"
45 #include "sldns/wire2str.h"
46 #include "services/cache/infra.h"
47 #include "util/storage/slabhash.h"
48 #include "util/storage/lookup3.h"
49 #include "util/data/dname.h"
50 #include "util/log.h"
51 #include "util/net_help.h"
52 #include "util/config_file.h"
53 #include "iterator/iterator.h"
54
55 /** Timeout when only a single probe query per IP is allowed. */
56 #define PROBE_MAXRTO 12000 /* in msec */
57
58 /** number of timeouts for a type when the domain can be blocked ;
59 * even if another type has completely rtt maxed it, the different type
60 * can do this number of packets (until those all timeout too) */
61 #define TIMEOUT_COUNT_MAX 3
62
63 /** ratelimit value for delegation point */
64 int infra_dp_ratelimit = 0;
65
66 /** ratelimit value for client ip addresses,
67 * in queries per second. */
68 int infra_ip_ratelimit = 0;
69
70 size_t
infra_sizefunc(void * k,void * ATTR_UNUSED (d))71 infra_sizefunc(void* k, void* ATTR_UNUSED(d))
72 {
73 struct infra_key* key = (struct infra_key*)k;
74 return sizeof(*key) + sizeof(struct infra_data) + key->namelen
75 + lock_get_mem(&key->entry.lock);
76 }
77
78 int
infra_compfunc(void * key1,void * key2)79 infra_compfunc(void* key1, void* key2)
80 {
81 struct infra_key* k1 = (struct infra_key*)key1;
82 struct infra_key* k2 = (struct infra_key*)key2;
83 int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
84 if(r != 0)
85 return r;
86 if(k1->namelen != k2->namelen) {
87 if(k1->namelen < k2->namelen)
88 return -1;
89 return 1;
90 }
91 return query_dname_compare(k1->zonename, k2->zonename);
92 }
93
94 void
infra_delkeyfunc(void * k,void * ATTR_UNUSED (arg))95 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
96 {
97 struct infra_key* key = (struct infra_key*)k;
98 if(!key)
99 return;
100 lock_rw_destroy(&key->entry.lock);
101 free(key->zonename);
102 free(key);
103 }
104
105 void
infra_deldatafunc(void * d,void * ATTR_UNUSED (arg))106 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
107 {
108 struct infra_data* data = (struct infra_data*)d;
109 free(data);
110 }
111
112 size_t
rate_sizefunc(void * k,void * ATTR_UNUSED (d))113 rate_sizefunc(void* k, void* ATTR_UNUSED(d))
114 {
115 struct rate_key* key = (struct rate_key*)k;
116 return sizeof(*key) + sizeof(struct rate_data) + key->namelen
117 + lock_get_mem(&key->entry.lock);
118 }
119
120 int
rate_compfunc(void * key1,void * key2)121 rate_compfunc(void* key1, void* key2)
122 {
123 struct rate_key* k1 = (struct rate_key*)key1;
124 struct rate_key* k2 = (struct rate_key*)key2;
125 if(k1->namelen != k2->namelen) {
126 if(k1->namelen < k2->namelen)
127 return -1;
128 return 1;
129 }
130 return query_dname_compare(k1->name, k2->name);
131 }
132
133 void
rate_delkeyfunc(void * k,void * ATTR_UNUSED (arg))134 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
135 {
136 struct rate_key* key = (struct rate_key*)k;
137 if(!key)
138 return;
139 lock_rw_destroy(&key->entry.lock);
140 free(key->name);
141 free(key);
142 }
143
144 void
rate_deldatafunc(void * d,void * ATTR_UNUSED (arg))145 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
146 {
147 struct rate_data* data = (struct rate_data*)d;
148 free(data);
149 }
150
151 /** find or create element in domainlimit tree */
domain_limit_findcreate(struct infra_cache * infra,char * name)152 static struct domain_limit_data* domain_limit_findcreate(
153 struct infra_cache* infra, char* name)
154 {
155 uint8_t* nm;
156 int labs;
157 size_t nmlen;
158 struct domain_limit_data* d;
159
160 /* parse name */
161 nm = sldns_str2wire_dname(name, &nmlen);
162 if(!nm) {
163 log_err("could not parse %s", name);
164 return NULL;
165 }
166 labs = dname_count_labels(nm);
167
168 /* can we find it? */
169 d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
170 nm, nmlen, labs, LDNS_RR_CLASS_IN);
171 if(d) {
172 free(nm);
173 return d;
174 }
175
176 /* create it */
177 d = (struct domain_limit_data*)calloc(1, sizeof(*d));
178 if(!d) {
179 free(nm);
180 return NULL;
181 }
182 d->node.node.key = &d->node;
183 d->node.name = nm;
184 d->node.len = nmlen;
185 d->node.labs = labs;
186 d->node.dclass = LDNS_RR_CLASS_IN;
187 d->lim = -1;
188 d->below = -1;
189 if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
190 labs, LDNS_RR_CLASS_IN)) {
191 log_err("duplicate element in domainlimit tree");
192 free(nm);
193 free(d);
194 return NULL;
195 }
196 return d;
197 }
198
199 /** insert rate limit configuration into lookup tree */
infra_ratelimit_cfg_insert(struct infra_cache * infra,struct config_file * cfg)200 static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
201 struct config_file* cfg)
202 {
203 struct config_str2list* p;
204 struct domain_limit_data* d;
205 for(p = cfg->ratelimit_for_domain; p; p = p->next) {
206 d = domain_limit_findcreate(infra, p->str);
207 if(!d)
208 return 0;
209 d->lim = atoi(p->str2);
210 }
211 for(p = cfg->ratelimit_below_domain; p; p = p->next) {
212 d = domain_limit_findcreate(infra, p->str);
213 if(!d)
214 return 0;
215 d->below = atoi(p->str2);
216 }
217 return 1;
218 }
219
220 /** setup domain limits tree (0 on failure) */
221 static int
setup_domain_limits(struct infra_cache * infra,struct config_file * cfg)222 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
223 {
224 name_tree_init(&infra->domain_limits);
225 if(!infra_ratelimit_cfg_insert(infra, cfg)) {
226 return 0;
227 }
228 name_tree_init_parents(&infra->domain_limits);
229 return 1;
230 }
231
232 struct infra_cache*
infra_create(struct config_file * cfg)233 infra_create(struct config_file* cfg)
234 {
235 struct infra_cache* infra = (struct infra_cache*)calloc(1,
236 sizeof(struct infra_cache));
237 size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
238 sizeof(struct infra_data)+INFRA_BYTES_NAME);
239 if(!infra) {
240 return NULL;
241 }
242 infra->hosts = slabhash_create(cfg->infra_cache_slabs,
243 INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
244 &infra_delkeyfunc, &infra_deldatafunc, NULL);
245 if(!infra->hosts) {
246 free(infra);
247 return NULL;
248 }
249 infra->host_ttl = cfg->host_ttl;
250 infra->infra_keep_probing = cfg->infra_keep_probing;
251 infra_dp_ratelimit = cfg->ratelimit;
252 infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
253 INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
254 &rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
255 &rate_deldatafunc, NULL);
256 if(!infra->domain_rates) {
257 infra_delete(infra);
258 return NULL;
259 }
260 /* insert config data into ratelimits */
261 if(!setup_domain_limits(infra, cfg)) {
262 infra_delete(infra);
263 return NULL;
264 }
265 infra_ip_ratelimit = cfg->ip_ratelimit;
266 infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
267 INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
268 &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
269 if(!infra->client_ip_rates) {
270 infra_delete(infra);
271 return NULL;
272 }
273 return infra;
274 }
275
276 /** delete domain_limit entries */
domain_limit_free(rbnode_type * n,void * ATTR_UNUSED (arg))277 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
278 {
279 if(n) {
280 free(((struct domain_limit_data*)n)->node.name);
281 free(n);
282 }
283 }
284
285 void
infra_delete(struct infra_cache * infra)286 infra_delete(struct infra_cache* infra)
287 {
288 if(!infra)
289 return;
290 slabhash_delete(infra->hosts);
291 slabhash_delete(infra->domain_rates);
292 traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
293 slabhash_delete(infra->client_ip_rates);
294 free(infra);
295 }
296
297 struct infra_cache*
infra_adjust(struct infra_cache * infra,struct config_file * cfg)298 infra_adjust(struct infra_cache* infra, struct config_file* cfg)
299 {
300 size_t maxmem;
301 if(!infra)
302 return infra_create(cfg);
303 infra->host_ttl = cfg->host_ttl;
304 infra->infra_keep_probing = cfg->infra_keep_probing;
305 infra_dp_ratelimit = cfg->ratelimit;
306 infra_ip_ratelimit = cfg->ip_ratelimit;
307 maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
308 sizeof(struct infra_data)+INFRA_BYTES_NAME);
309 /* divide cachesize by slabs and multiply by slabs, because if the
310 * cachesize is not an even multiple of slabs, that is the resulting
311 * size of the slabhash */
312 if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
313 !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
314 cfg->ratelimit_slabs) ||
315 !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
316 cfg->ip_ratelimit_slabs)) {
317 infra_delete(infra);
318 infra = infra_create(cfg);
319 } else {
320 /* reapply domain limits */
321 traverse_postorder(&infra->domain_limits, domain_limit_free,
322 NULL);
323 if(!setup_domain_limits(infra, cfg)) {
324 infra_delete(infra);
325 return NULL;
326 }
327 }
328 return infra;
329 }
330
331 /** calculate the hash value for a host key
332 * set use_port to a non-0 number to use the port in
333 * the hash calculation; 0 to ignore the port.*/
334 static hashvalue_type
hash_addr(struct sockaddr_storage * addr,socklen_t addrlen,int use_port)335 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
336 int use_port)
337 {
338 hashvalue_type h = 0xab;
339 /* select the pieces to hash, some OS have changing data inside */
340 if(addr_is_ip6(addr, addrlen)) {
341 struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
342 h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
343 if(use_port){
344 h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
345 }
346 h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
347 } else {
348 struct sockaddr_in* in = (struct sockaddr_in*)addr;
349 h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
350 if(use_port){
351 h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
352 }
353 h = hashlittle(&in->sin_addr, INET_SIZE, h);
354 }
355 return h;
356 }
357
358 /** calculate infra hash for a key */
359 static hashvalue_type
hash_infra(struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * name)360 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
361 {
362 return dname_query_hash(name, hash_addr(addr, addrlen, 1));
363 }
364
365 /** lookup version that does not check host ttl (you check it) */
366 struct lruhash_entry*
infra_lookup_nottl(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * name,size_t namelen,int wr)367 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
368 socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
369 {
370 struct infra_key k;
371 k.addrlen = addrlen;
372 memcpy(&k.addr, addr, addrlen);
373 k.namelen = namelen;
374 k.zonename = name;
375 k.entry.hash = hash_infra(addr, addrlen, name);
376 k.entry.key = (void*)&k;
377 k.entry.data = NULL;
378 return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
379 }
380
381 /** init the data elements */
382 static void
data_entry_init(struct infra_cache * infra,struct lruhash_entry * e,time_t timenow)383 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
384 time_t timenow)
385 {
386 struct infra_data* data = (struct infra_data*)e->data;
387 data->ttl = timenow + infra->host_ttl;
388 rtt_init(&data->rtt);
389 data->edns_version = 0;
390 data->edns_lame_known = 0;
391 data->probedelay = 0;
392 data->isdnsseclame = 0;
393 data->rec_lame = 0;
394 data->lame_type_A = 0;
395 data->lame_other = 0;
396 data->timeout_A = 0;
397 data->timeout_AAAA = 0;
398 data->timeout_other = 0;
399 }
400
401 /**
402 * Create and init a new entry for a host
403 * @param infra: infra structure with config parameters.
404 * @param addr: host address.
405 * @param addrlen: length of addr.
406 * @param name: name of zone
407 * @param namelen: length of name.
408 * @param tm: time now.
409 * @return: the new entry or NULL on malloc failure.
410 */
411 static struct lruhash_entry*
new_entry(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * name,size_t namelen,time_t tm)412 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
413 socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
414 {
415 struct infra_data* data;
416 struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
417 if(!key)
418 return NULL;
419 data = (struct infra_data*)malloc(sizeof(struct infra_data));
420 if(!data) {
421 free(key);
422 return NULL;
423 }
424 key->zonename = memdup(name, namelen);
425 if(!key->zonename) {
426 free(key);
427 free(data);
428 return NULL;
429 }
430 key->namelen = namelen;
431 lock_rw_init(&key->entry.lock);
432 key->entry.hash = hash_infra(addr, addrlen, name);
433 key->entry.key = (void*)key;
434 key->entry.data = (void*)data;
435 key->addrlen = addrlen;
436 memcpy(&key->addr, addr, addrlen);
437 data_entry_init(infra, &key->entry, tm);
438 return &key->entry;
439 }
440
441 int
infra_host(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen,time_t timenow,int * edns_vs,uint8_t * edns_lame_known,int * to)442 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
443 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
444 int* edns_vs, uint8_t* edns_lame_known, int* to)
445 {
446 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
447 nm, nmlen, 0);
448 struct infra_data* data;
449 int wr = 0;
450 if(e && ((struct infra_data*)e->data)->ttl < timenow) {
451 /* it expired, try to reuse existing entry */
452 int old = ((struct infra_data*)e->data)->rtt.rto;
453 time_t tprobe = ((struct infra_data*)e->data)->probedelay;
454 uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
455 uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
456 uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
457 lock_rw_unlock(&e->lock);
458 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
459 if(e) {
460 /* if its still there we have a writelock, init */
461 /* re-initialise */
462 /* do not touch lameness, it may be valid still */
463 data_entry_init(infra, e, timenow);
464 wr = 1;
465 /* TOP_TIMEOUT remains on reuse */
466 if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
467 ((struct infra_data*)e->data)->rtt.rto
468 = USEFUL_SERVER_TOP_TIMEOUT;
469 ((struct infra_data*)e->data)->probedelay = tprobe;
470 ((struct infra_data*)e->data)->timeout_A = tA;
471 ((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
472 ((struct infra_data*)e->data)->timeout_other = tother;
473 }
474 }
475 }
476 if(!e) {
477 /* insert new entry */
478 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
479 return 0;
480 data = (struct infra_data*)e->data;
481 *edns_vs = data->edns_version;
482 *edns_lame_known = data->edns_lame_known;
483 *to = rtt_timeout(&data->rtt);
484 slabhash_insert(infra->hosts, e->hash, e, data, NULL);
485 return 1;
486 }
487 /* use existing entry */
488 data = (struct infra_data*)e->data;
489 *edns_vs = data->edns_version;
490 *edns_lame_known = data->edns_lame_known;
491 *to = rtt_timeout(&data->rtt);
492 if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing ||
493 rtt_notimeout(&data->rtt)*4 <= *to)) {
494 /* delay other queries, this is the probe query */
495 if(!wr) {
496 lock_rw_unlock(&e->lock);
497 e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
498 if(!e) { /* flushed from cache real fast, no use to
499 allocate just for the probedelay */
500 return 1;
501 }
502 data = (struct infra_data*)e->data;
503 }
504 /* add 999 to round up the timeout value from msec to sec,
505 * then add a whole second so it is certain that this probe
506 * has timed out before the next is allowed */
507 data->probedelay = timenow + ((*to)+1999)/1000;
508 }
509 lock_rw_unlock(&e->lock);
510 return 1;
511 }
512
513 int
infra_set_lame(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen,time_t timenow,int dnsseclame,int reclame,uint16_t qtype)514 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
515 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
516 int dnsseclame, int reclame, uint16_t qtype)
517 {
518 struct infra_data* data;
519 struct lruhash_entry* e;
520 int needtoinsert = 0;
521 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
522 if(!e) {
523 /* insert it */
524 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
525 log_err("set_lame: malloc failure");
526 return 0;
527 }
528 needtoinsert = 1;
529 } else if( ((struct infra_data*)e->data)->ttl < timenow) {
530 /* expired, reuse existing entry */
531 data_entry_init(infra, e, timenow);
532 }
533 /* got an entry, now set the zone lame */
534 data = (struct infra_data*)e->data;
535 /* merge data (if any) */
536 if(dnsseclame)
537 data->isdnsseclame = 1;
538 if(reclame)
539 data->rec_lame = 1;
540 if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
541 data->lame_type_A = 1;
542 if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A)
543 data->lame_other = 1;
544 /* done */
545 if(needtoinsert)
546 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
547 else { lock_rw_unlock(&e->lock); }
548 return 1;
549 }
550
551 void
infra_update_tcp_works(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen)552 infra_update_tcp_works(struct infra_cache* infra,
553 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
554 size_t nmlen)
555 {
556 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
557 nm, nmlen, 1);
558 struct infra_data* data;
559 if(!e)
560 return; /* doesn't exist */
561 data = (struct infra_data*)e->data;
562 if(data->rtt.rto >= RTT_MAX_TIMEOUT)
563 /* do not disqualify this server altogether, it is better
564 * than nothing */
565 data->rtt.rto = RTT_MAX_TIMEOUT-1000;
566 lock_rw_unlock(&e->lock);
567 }
568
569 int
infra_rtt_update(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen,int qtype,int roundtrip,int orig_rtt,time_t timenow)570 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
571 socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
572 int roundtrip, int orig_rtt, time_t timenow)
573 {
574 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
575 nm, nmlen, 1);
576 struct infra_data* data;
577 int needtoinsert = 0, expired = 0;
578 int rto = 1;
579 time_t oldprobedelay = 0;
580 if(!e) {
581 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
582 return 0;
583 needtoinsert = 1;
584 } else if(((struct infra_data*)e->data)->ttl < timenow) {
585 oldprobedelay = ((struct infra_data*)e->data)->probedelay;
586 data_entry_init(infra, e, timenow);
587 expired = 1;
588 }
589 /* have an entry, update the rtt */
590 data = (struct infra_data*)e->data;
591 if(roundtrip == -1) {
592 if(needtoinsert || expired) {
593 /* timeout on entry that has expired before the timer
594 * keep old timeout from the function caller */
595 data->rtt.rto = orig_rtt;
596 data->probedelay = oldprobedelay;
597 }
598 rtt_lost(&data->rtt, orig_rtt);
599 if(qtype == LDNS_RR_TYPE_A) {
600 if(data->timeout_A < TIMEOUT_COUNT_MAX)
601 data->timeout_A++;
602 } else if(qtype == LDNS_RR_TYPE_AAAA) {
603 if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
604 data->timeout_AAAA++;
605 } else {
606 if(data->timeout_other < TIMEOUT_COUNT_MAX)
607 data->timeout_other++;
608 }
609 } else {
610 /* if we got a reply, but the old timeout was above server
611 * selection height, delete the timeout so the server is
612 * fully available again */
613 if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
614 rtt_init(&data->rtt);
615 rtt_update(&data->rtt, roundtrip);
616 data->probedelay = 0;
617 if(qtype == LDNS_RR_TYPE_A)
618 data->timeout_A = 0;
619 else if(qtype == LDNS_RR_TYPE_AAAA)
620 data->timeout_AAAA = 0;
621 else data->timeout_other = 0;
622 }
623 if(data->rtt.rto > 0)
624 rto = data->rtt.rto;
625
626 if(needtoinsert)
627 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
628 else { lock_rw_unlock(&e->lock); }
629 return rto;
630 }
631
infra_get_host_rto(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen,struct rtt_info * rtt,int * delay,time_t timenow,int * tA,int * tAAAA,int * tother)632 long long infra_get_host_rto(struct infra_cache* infra,
633 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
634 size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
635 int* tA, int* tAAAA, int* tother)
636 {
637 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
638 nm, nmlen, 0);
639 struct infra_data* data;
640 long long ttl = -2;
641 if(!e) return -1;
642 data = (struct infra_data*)e->data;
643 if(data->ttl >= timenow) {
644 ttl = (long long)(data->ttl - timenow);
645 memmove(rtt, &data->rtt, sizeof(*rtt));
646 if(timenow < data->probedelay)
647 *delay = (int)(data->probedelay - timenow);
648 else *delay = 0;
649 }
650 *tA = (int)data->timeout_A;
651 *tAAAA = (int)data->timeout_AAAA;
652 *tother = (int)data->timeout_other;
653 lock_rw_unlock(&e->lock);
654 return ttl;
655 }
656
657 int
infra_edns_update(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * nm,size_t nmlen,int edns_version,time_t timenow)658 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
659 socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
660 time_t timenow)
661 {
662 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
663 nm, nmlen, 1);
664 struct infra_data* data;
665 int needtoinsert = 0;
666 if(!e) {
667 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
668 return 0;
669 needtoinsert = 1;
670 } else if(((struct infra_data*)e->data)->ttl < timenow) {
671 data_entry_init(infra, e, timenow);
672 }
673 /* have an entry, update the rtt, and the ttl */
674 data = (struct infra_data*)e->data;
675 /* do not update if noEDNS and stored is yesEDNS */
676 if(!(edns_version == -1 && (data->edns_version != -1 &&
677 data->edns_lame_known))) {
678 data->edns_version = edns_version;
679 data->edns_lame_known = 1;
680 }
681
682 if(needtoinsert)
683 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
684 else { lock_rw_unlock(&e->lock); }
685 return 1;
686 }
687
688 int
infra_get_lame_rtt(struct infra_cache * infra,struct sockaddr_storage * addr,socklen_t addrlen,uint8_t * name,size_t namelen,uint16_t qtype,int * lame,int * dnsseclame,int * reclame,int * rtt,time_t timenow)689 infra_get_lame_rtt(struct infra_cache* infra,
690 struct sockaddr_storage* addr, socklen_t addrlen,
691 uint8_t* name, size_t namelen, uint16_t qtype,
692 int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
693 {
694 struct infra_data* host;
695 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
696 name, namelen, 0);
697 if(!e)
698 return 0;
699 host = (struct infra_data*)e->data;
700 *rtt = rtt_unclamped(&host->rtt);
701 if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay
702 && infra->infra_keep_probing) {
703 /* single probe, keep probing */
704 if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT)
705 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
706 } else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
707 && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
708 /* single probe for this domain, and we are not probing */
709 /* unless the query type allows a probe to happen */
710 if(qtype == LDNS_RR_TYPE_A) {
711 if(host->timeout_A >= TIMEOUT_COUNT_MAX)
712 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
713 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
714 } else if(qtype == LDNS_RR_TYPE_AAAA) {
715 if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
716 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
717 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
718 } else {
719 if(host->timeout_other >= TIMEOUT_COUNT_MAX)
720 *rtt = USEFUL_SERVER_TOP_TIMEOUT;
721 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
722 }
723 }
724 /* expired entry */
725 if(timenow > host->ttl) {
726
727 /* see if this can be a re-probe of an unresponsive server */
728 /* minus 1000 because that is outside of the RTTBAND, so
729 * blacklisted servers stay blacklisted if this is chosen */
730 if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
731 lock_rw_unlock(&e->lock);
732 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
733 *lame = 0;
734 *dnsseclame = 0;
735 *reclame = 0;
736 return 1;
737 }
738 lock_rw_unlock(&e->lock);
739 return 0;
740 }
741 /* check lameness first */
742 if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
743 lock_rw_unlock(&e->lock);
744 *lame = 1;
745 *dnsseclame = 0;
746 *reclame = 0;
747 return 1;
748 } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
749 lock_rw_unlock(&e->lock);
750 *lame = 1;
751 *dnsseclame = 0;
752 *reclame = 0;
753 return 1;
754 } else if(host->isdnsseclame) {
755 lock_rw_unlock(&e->lock);
756 *lame = 0;
757 *dnsseclame = 1;
758 *reclame = 0;
759 return 1;
760 } else if(host->rec_lame) {
761 lock_rw_unlock(&e->lock);
762 *lame = 0;
763 *dnsseclame = 0;
764 *reclame = 1;
765 return 1;
766 }
767 /* no lameness for this type of query */
768 lock_rw_unlock(&e->lock);
769 *lame = 0;
770 *dnsseclame = 0;
771 *reclame = 0;
772 return 1;
773 }
774
infra_find_ratelimit(struct infra_cache * infra,uint8_t * name,size_t namelen)775 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
776 size_t namelen)
777 {
778 int labs = dname_count_labels(name);
779 struct domain_limit_data* d = (struct domain_limit_data*)
780 name_tree_lookup(&infra->domain_limits, name, namelen, labs,
781 LDNS_RR_CLASS_IN);
782 if(!d) return infra_dp_ratelimit;
783
784 if(d->node.labs == labs && d->lim != -1)
785 return d->lim; /* exact match */
786
787 /* find 'below match' */
788 if(d->node.labs == labs)
789 d = (struct domain_limit_data*)d->node.parent;
790 while(d) {
791 if(d->below != -1)
792 return d->below;
793 d = (struct domain_limit_data*)d->node.parent;
794 }
795 return infra_dp_ratelimit;
796 }
797
ip_rate_sizefunc(void * k,void * ATTR_UNUSED (d))798 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
799 {
800 struct ip_rate_key* key = (struct ip_rate_key*)k;
801 return sizeof(*key) + sizeof(struct ip_rate_data)
802 + lock_get_mem(&key->entry.lock);
803 }
804
ip_rate_compfunc(void * key1,void * key2)805 int ip_rate_compfunc(void* key1, void* key2)
806 {
807 struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
808 struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
809 return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
810 &k2->addr, k2->addrlen);
811 }
812
ip_rate_delkeyfunc(void * k,void * ATTR_UNUSED (arg))813 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
814 {
815 struct ip_rate_key* key = (struct ip_rate_key*)k;
816 if(!key)
817 return;
818 lock_rw_destroy(&key->entry.lock);
819 free(key);
820 }
821
822 /** find data item in array, for write access, caller unlocks */
infra_find_ratedata(struct infra_cache * infra,uint8_t * name,size_t namelen,int wr)823 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
824 uint8_t* name, size_t namelen, int wr)
825 {
826 struct rate_key key;
827 hashvalue_type h = dname_query_hash(name, 0xab);
828 memset(&key, 0, sizeof(key));
829 key.name = name;
830 key.namelen = namelen;
831 key.entry.hash = h;
832 return slabhash_lookup(infra->domain_rates, h, &key, wr);
833 }
834
835 /** find data item in array for ip addresses */
infra_find_ip_ratedata(struct infra_cache * infra,struct comm_reply * repinfo,int wr)836 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
837 struct comm_reply* repinfo, int wr)
838 {
839 struct ip_rate_key key;
840 hashvalue_type h = hash_addr(&(repinfo->addr),
841 repinfo->addrlen, 0);
842 memset(&key, 0, sizeof(key));
843 key.addr = repinfo->addr;
844 key.addrlen = repinfo->addrlen;
845 key.entry.hash = h;
846 return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
847 }
848
849 /** create rate data item for name, number 1 in now */
infra_create_ratedata(struct infra_cache * infra,uint8_t * name,size_t namelen,time_t timenow)850 static void infra_create_ratedata(struct infra_cache* infra,
851 uint8_t* name, size_t namelen, time_t timenow)
852 {
853 hashvalue_type h = dname_query_hash(name, 0xab);
854 struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
855 struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
856 if(!k || !d) {
857 free(k);
858 free(d);
859 return; /* alloc failure */
860 }
861 k->namelen = namelen;
862 k->name = memdup(name, namelen);
863 if(!k->name) {
864 free(k);
865 free(d);
866 return; /* alloc failure */
867 }
868 lock_rw_init(&k->entry.lock);
869 k->entry.hash = h;
870 k->entry.key = k;
871 k->entry.data = d;
872 d->qps[0] = 1;
873 d->timestamp[0] = timenow;
874 slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
875 }
876
877 /** create rate data item for ip address */
infra_ip_create_ratedata(struct infra_cache * infra,struct comm_reply * repinfo,time_t timenow)878 static void infra_ip_create_ratedata(struct infra_cache* infra,
879 struct comm_reply* repinfo, time_t timenow)
880 {
881 hashvalue_type h = hash_addr(&(repinfo->addr),
882 repinfo->addrlen, 0);
883 struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
884 struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
885 if(!k || !d) {
886 free(k);
887 free(d);
888 return; /* alloc failure */
889 }
890 k->addr = repinfo->addr;
891 k->addrlen = repinfo->addrlen;
892 lock_rw_init(&k->entry.lock);
893 k->entry.hash = h;
894 k->entry.key = k;
895 k->entry.data = d;
896 d->qps[0] = 1;
897 d->timestamp[0] = timenow;
898 slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
899 }
900
901 /** Find the second and return its rate counter. If none and should_add, remove
902 * oldest to accommodate. Else return none. */
infra_rate_find_second_or_none(void * data,time_t t,int should_add)903 static int* infra_rate_find_second_or_none(void* data, time_t t, int should_add)
904 {
905 struct rate_data* d = (struct rate_data*)data;
906 int i, oldest;
907 for(i=0; i<RATE_WINDOW; i++) {
908 if(d->timestamp[i] == t)
909 return &(d->qps[i]);
910 }
911 if(!should_add) return NULL;
912 /* remove oldest timestamp, and insert it at t with 0 qps */
913 oldest = 0;
914 for(i=0; i<RATE_WINDOW; i++) {
915 if(d->timestamp[i] < d->timestamp[oldest])
916 oldest = i;
917 }
918 d->timestamp[oldest] = t;
919 d->qps[oldest] = 0;
920 return &(d->qps[oldest]);
921 }
922
923 /** find the second and return its rate counter, if none, remove oldest to
924 * accommodate */
infra_rate_give_second(void * data,time_t t)925 static int* infra_rate_give_second(void* data, time_t t)
926 {
927 return infra_rate_find_second_or_none(data, t, 1);
928 }
929
930 /** find the second and return its rate counter only if it exists. Caller
931 * should check for NULL return value */
infra_rate_get_second(void * data,time_t t)932 static int* infra_rate_get_second(void* data, time_t t)
933 {
934 return infra_rate_find_second_or_none(data, t, 0);
935 }
936
infra_rate_max(void * data,time_t now,int backoff)937 int infra_rate_max(void* data, time_t now, int backoff)
938 {
939 struct rate_data* d = (struct rate_data*)data;
940 int i, max = 0;
941 for(i=0; i<RATE_WINDOW; i++) {
942 if(backoff) {
943 if(now-d->timestamp[i] <= RATE_WINDOW &&
944 d->qps[i] > max) {
945 max = d->qps[i];
946 }
947 } else {
948 if(now == d->timestamp[i]) {
949 return d->qps[i];
950 }
951 }
952 }
953 return max;
954 }
955
infra_ratelimit_inc(struct infra_cache * infra,uint8_t * name,size_t namelen,time_t timenow,int backoff,struct query_info * qinfo,struct comm_reply * replylist)956 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
957 size_t namelen, time_t timenow, int backoff, struct query_info* qinfo,
958 struct comm_reply* replylist)
959 {
960 int lim, max;
961 struct lruhash_entry* entry;
962
963 if(!infra_dp_ratelimit)
964 return 1; /* not enabled */
965
966 /* find ratelimit */
967 lim = infra_find_ratelimit(infra, name, namelen);
968 if(!lim)
969 return 1; /* disabled for this domain */
970
971 /* find or insert ratedata */
972 entry = infra_find_ratedata(infra, name, namelen, 1);
973 if(entry) {
974 int premax = infra_rate_max(entry->data, timenow, backoff);
975 int* cur = infra_rate_give_second(entry->data, timenow);
976 (*cur)++;
977 max = infra_rate_max(entry->data, timenow, backoff);
978 lock_rw_unlock(&entry->lock);
979
980 if(premax <= lim && max > lim) {
981 char buf[257], qnm[257], ts[12], cs[12], ip[128];
982 dname_str(name, buf);
983 dname_str(qinfo->qname, qnm);
984 sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts));
985 sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs));
986 ip[0]=0;
987 if(replylist) {
988 addr_to_str((struct sockaddr_storage *)&replylist->addr,
989 replylist->addrlen, ip, sizeof(ip));
990 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip);
991 } else {
992 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts);
993 }
994 }
995 return (max <= lim);
996 }
997
998 /* create */
999 infra_create_ratedata(infra, name, namelen, timenow);
1000 return (1 <= lim);
1001 }
1002
infra_ratelimit_dec(struct infra_cache * infra,uint8_t * name,size_t namelen,time_t timenow)1003 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
1004 size_t namelen, time_t timenow)
1005 {
1006 struct lruhash_entry* entry;
1007 int* cur;
1008 if(!infra_dp_ratelimit)
1009 return; /* not enabled */
1010 entry = infra_find_ratedata(infra, name, namelen, 1);
1011 if(!entry) return; /* not cached */
1012 cur = infra_rate_get_second(entry->data, timenow);
1013 if(cur == NULL) {
1014 /* our timenow is not available anymore; nothing to decrease */
1015 lock_rw_unlock(&entry->lock);
1016 return;
1017 }
1018 if((*cur) > 0)
1019 (*cur)--;
1020 lock_rw_unlock(&entry->lock);
1021 }
1022
infra_ratelimit_exceeded(struct infra_cache * infra,uint8_t * name,size_t namelen,time_t timenow,int backoff)1023 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
1024 size_t namelen, time_t timenow, int backoff)
1025 {
1026 struct lruhash_entry* entry;
1027 int lim, max;
1028 if(!infra_dp_ratelimit)
1029 return 0; /* not enabled */
1030
1031 /* find ratelimit */
1032 lim = infra_find_ratelimit(infra, name, namelen);
1033 if(!lim)
1034 return 0; /* disabled for this domain */
1035
1036 /* find current rate */
1037 entry = infra_find_ratedata(infra, name, namelen, 0);
1038 if(!entry)
1039 return 0; /* not cached */
1040 max = infra_rate_max(entry->data, timenow, backoff);
1041 lock_rw_unlock(&entry->lock);
1042
1043 return (max >= lim);
1044 }
1045
1046 size_t
infra_get_mem(struct infra_cache * infra)1047 infra_get_mem(struct infra_cache* infra)
1048 {
1049 size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
1050 if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
1051 if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
1052 /* ignore domain_limits because walk through tree is big */
1053 return s;
1054 }
1055
infra_ip_ratelimit_inc(struct infra_cache * infra,struct comm_reply * repinfo,time_t timenow,int backoff,struct sldns_buffer * buffer)1056 int infra_ip_ratelimit_inc(struct infra_cache* infra,
1057 struct comm_reply* repinfo, time_t timenow, int backoff,
1058 struct sldns_buffer* buffer)
1059 {
1060 int max;
1061 struct lruhash_entry* entry;
1062
1063 /* not enabled */
1064 if(!infra_ip_ratelimit) {
1065 return 1;
1066 }
1067 /* find or insert ratedata */
1068 entry = infra_find_ip_ratedata(infra, repinfo, 1);
1069 if(entry) {
1070 int premax = infra_rate_max(entry->data, timenow, backoff);
1071 int* cur = infra_rate_give_second(entry->data, timenow);
1072 (*cur)++;
1073 max = infra_rate_max(entry->data, timenow, backoff);
1074 lock_rw_unlock(&entry->lock);
1075
1076 if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) {
1077 char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12];
1078 addr_to_str((struct sockaddr_storage *)&repinfo->addr,
1079 repinfo->addrlen, client_ip, sizeof(client_ip));
1080 qnm[0]=0;
1081 if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE &&
1082 LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) {
1083 (void)sldns_wire2str_rrquestion_buf(
1084 sldns_buffer_at(buffer, LDNS_HEADER_SIZE),
1085 sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE,
1086 qnm, sizeof(qnm));
1087 if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n')
1088 qnm[strlen(qnm)-1] = 0; /*remove newline*/
1089 if(strchr(qnm, '\t'))
1090 *strchr(qnm, '\t') = ' ';
1091 if(strchr(qnm, '\t'))
1092 *strchr(qnm, '\t') = ' ';
1093 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s",
1094 client_ip, infra_ip_ratelimit, qnm);
1095 } else {
1096 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)",
1097 client_ip, infra_ip_ratelimit);
1098 }
1099 }
1100 return (max <= infra_ip_ratelimit);
1101 }
1102
1103 /* create */
1104 infra_ip_create_ratedata(infra, repinfo, timenow);
1105 return 1;
1106 }
1107