1 /*
2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
3  *
4  * SPDX-License-Identifier: MPL-2.0
5  *
6  * This Source Code Form is subject to the terms of the Mozilla Public
7  * License, v. 2.0. If a copy of the MPL was not distributed with this
8  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
9  *
10  * See the COPYRIGHT file distributed with this work for additional
11  * information regarding copyright ownership.
12  */
13 
14 /*! \file */
15 
16 /*
17  * Rate limit DNS responses.
18  */
19 
20 /* #define ISC_LIST_CHECKINIT */
21 
22 #include <inttypes.h>
23 #include <stdbool.h>
24 
25 #include <isc/mem.h>
26 #include <isc/net.h>
27 #include <isc/netaddr.h>
28 #include <isc/print.h>
29 #include <isc/util.h>
30 
31 #include <dns/log.h>
32 #include <dns/rcode.h>
33 #include <dns/rdataclass.h>
34 #include <dns/rdatatype.h>
35 #include <dns/result.h>
36 #include <dns/rrl.h>
37 #include <dns/view.h>
38 
39 static void
40 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early, char *log_buf,
41 	unsigned int log_buf_len);
42 
43 /*
44  * Get a modulus for a hash function that is tolerably likely to be
45  * relatively prime to most inputs.  Of course, we get a prime for for initial
46  * values not larger than the square of the last prime.  We often get a prime
47  * after that.
48  * This works well in practice for hash tables up to at least 100
49  * times the square of the last prime and better than a multiplicative hash.
50  */
51 static int
hash_divisor(unsigned int initial)52 hash_divisor(unsigned int initial) {
53 	static uint16_t primes[] = {
54 		3,
55 		5,
56 		7,
57 		11,
58 		13,
59 		17,
60 		19,
61 		23,
62 		29,
63 		31,
64 		37,
65 		41,
66 		43,
67 		47,
68 		53,
69 		59,
70 		61,
71 		67,
72 		71,
73 		73,
74 		79,
75 		83,
76 		89,
77 		97,
78 #if 0
79 		101, 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
80 		163, 167, 173, 179, 181, 191, 193, 197, 199, 211, 223, 227,
81 		229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283,
82 		293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367,
83 		373, 379, 383, 389, 397, 401, 409, 419, 421, 431, 433, 439,
84 		443, 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
85 		521, 523, 541, 547, 557, 563, 569, 571, 577, 587, 593, 599,
86 		601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661,
87 		673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751,
88 		757, 761, 769, 773, 787, 797, 809, 811, 821, 823, 827, 829,
89 		839, 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, 919,
90 		929, 937, 941, 947, 953, 967, 971, 977, 983, 991, 997, 1009,
91 #endif /* if 0 */
92 	};
93 	int divisions, tries;
94 	unsigned int result;
95 	uint16_t *pp, p;
96 
97 	result = initial;
98 
99 	if (primes[sizeof(primes) / sizeof(primes[0]) - 1] >= result) {
100 		pp = primes;
101 		while (*pp < result) {
102 			++pp;
103 		}
104 		return (*pp);
105 	}
106 
107 	if ((result & 1) == 0) {
108 		++result;
109 	}
110 
111 	divisions = 0;
112 	tries = 1;
113 	pp = primes;
114 	do {
115 		p = *pp++;
116 		++divisions;
117 		if ((result % p) == 0) {
118 			++tries;
119 			result += 2;
120 			pp = primes;
121 		}
122 	} while (pp < &primes[sizeof(primes) / sizeof(primes[0])]);
123 
124 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) {
125 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
126 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG3,
127 			      "%d hash_divisor() divisions in %d tries"
128 			      " to get %d from %d",
129 			      divisions, tries, result, initial);
130 	}
131 
132 	return (result);
133 }
134 
135 /*
136  * Convert a timestamp to a number of seconds in the past.
137  */
138 static inline int
delta_rrl_time(isc_stdtime_t ts,isc_stdtime_t now)139 delta_rrl_time(isc_stdtime_t ts, isc_stdtime_t now) {
140 	int delta;
141 
142 	delta = now - ts;
143 	if (delta >= 0) {
144 		return (delta);
145 	}
146 
147 	/*
148 	 * The timestamp is in the future.  That future might result from
149 	 * re-ordered requests, because we use timestamps on requests
150 	 * instead of consulting a clock.  Timestamps in the distant future are
151 	 * assumed to result from clock changes.  When the clock changes to
152 	 * the past, make existing timestamps appear to be in the past.
153 	 */
154 	if (delta < -DNS_RRL_MAX_TIME_TRAVEL) {
155 		return (DNS_RRL_FOREVER);
156 	}
157 	return (0);
158 }
159 
160 static inline int
get_age(const dns_rrl_t * rrl,const dns_rrl_entry_t * e,isc_stdtime_t now)161 get_age(const dns_rrl_t *rrl, const dns_rrl_entry_t *e, isc_stdtime_t now) {
162 	if (!e->ts_valid) {
163 		return (DNS_RRL_FOREVER);
164 	}
165 	return (delta_rrl_time(e->ts + rrl->ts_bases[e->ts_gen], now));
166 }
167 
168 static inline void
set_age(dns_rrl_t * rrl,dns_rrl_entry_t * e,isc_stdtime_t now)169 set_age(dns_rrl_t *rrl, dns_rrl_entry_t *e, isc_stdtime_t now) {
170 	dns_rrl_entry_t *e_old;
171 	unsigned int ts_gen;
172 	int i, ts;
173 
174 	ts_gen = rrl->ts_gen;
175 	ts = now - rrl->ts_bases[ts_gen];
176 	if (ts < 0) {
177 		if (ts < -DNS_RRL_MAX_TIME_TRAVEL) {
178 			ts = DNS_RRL_FOREVER;
179 		} else {
180 			ts = 0;
181 		}
182 	}
183 
184 	/*
185 	 * Make a new timestamp base if the current base is too old.
186 	 * All entries older than DNS_RRL_MAX_WINDOW seconds are ancient,
187 	 * useless history.  Their timestamps can be treated as if they are
188 	 * all the same.
189 	 * We only do arithmetic on more recent timestamps, so bases for
190 	 * older timestamps can be recycled provided the old timestamps are
191 	 * marked as ancient history.
192 	 * This loop is almost always very short because most entries are
193 	 * recycled after one second and any entries that need to be marked
194 	 * are older than (DNS_RRL_TS_BASES)*DNS_RRL_MAX_TS seconds.
195 	 */
196 	if (ts >= DNS_RRL_MAX_TS) {
197 		ts_gen = (ts_gen + 1) % DNS_RRL_TS_BASES;
198 		for (e_old = ISC_LIST_TAIL(rrl->lru), i = 0;
199 		     e_old != NULL && (e_old->ts_gen == ts_gen ||
200 				       !ISC_LINK_LINKED(e_old, hlink));
201 		     e_old = ISC_LIST_PREV(e_old, lru), ++i)
202 		{
203 			e_old->ts_valid = false;
204 		}
205 		if (i != 0) {
206 			isc_log_write(
207 				dns_lctx, DNS_LOGCATEGORY_RRL,
208 				DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
209 				"rrl new time base scanned %d entries"
210 				" at %d for %d %d %d %d",
211 				i, now, rrl->ts_bases[ts_gen],
212 				rrl->ts_bases[(ts_gen + 1) % DNS_RRL_TS_BASES],
213 				rrl->ts_bases[(ts_gen + 2) % DNS_RRL_TS_BASES],
214 				rrl->ts_bases[(ts_gen + 3) % DNS_RRL_TS_BASES]);
215 		}
216 		rrl->ts_gen = ts_gen;
217 		rrl->ts_bases[ts_gen] = now;
218 		ts = 0;
219 	}
220 
221 	e->ts_gen = ts_gen;
222 	e->ts = ts;
223 	e->ts_valid = true;
224 }
225 
226 static isc_result_t
expand_entries(dns_rrl_t * rrl,int newsize)227 expand_entries(dns_rrl_t *rrl, int newsize) {
228 	unsigned int bsize;
229 	dns_rrl_block_t *b;
230 	dns_rrl_entry_t *e;
231 	double rate;
232 	int i;
233 
234 	if (rrl->num_entries + newsize >= rrl->max_entries &&
235 	    rrl->max_entries != 0) {
236 		newsize = rrl->max_entries - rrl->num_entries;
237 		if (newsize <= 0) {
238 			return (ISC_R_SUCCESS);
239 		}
240 	}
241 
242 	/*
243 	 * Log expansions so that the user can tune max-table-size
244 	 * and min-table-size.
245 	 */
246 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && rrl->hash != NULL) {
247 		rate = rrl->probes;
248 		if (rrl->searches != 0) {
249 			rate /= rrl->searches;
250 		}
251 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
252 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
253 			      "increase from %d to %d RRL entries with"
254 			      " %d bins; average search length %.1f",
255 			      rrl->num_entries, rrl->num_entries + newsize,
256 			      rrl->hash->length, rate);
257 	}
258 
259 	bsize = sizeof(dns_rrl_block_t) +
260 		(newsize - 1) * sizeof(dns_rrl_entry_t);
261 	b = isc_mem_get(rrl->mctx, bsize);
262 	memset(b, 0, bsize);
263 	b->size = bsize;
264 
265 	e = b->entries;
266 	for (i = 0; i < newsize; ++i, ++e) {
267 		ISC_LINK_INIT(e, hlink);
268 		ISC_LIST_INITANDAPPEND(rrl->lru, e, lru);
269 	}
270 	rrl->num_entries += newsize;
271 	ISC_LIST_INITANDAPPEND(rrl->blocks, b, link);
272 
273 	return (ISC_R_SUCCESS);
274 }
275 
276 static inline dns_rrl_bin_t *
get_bin(dns_rrl_hash_t * hash,unsigned int hval)277 get_bin(dns_rrl_hash_t *hash, unsigned int hval) {
278 	INSIST(hash != NULL);
279 	return (&hash->bins[hval % hash->length]);
280 }
281 
282 static void
free_old_hash(dns_rrl_t * rrl)283 free_old_hash(dns_rrl_t *rrl) {
284 	dns_rrl_hash_t *old_hash;
285 	dns_rrl_bin_t *old_bin;
286 	dns_rrl_entry_t *e, *e_next;
287 
288 	old_hash = rrl->old_hash;
289 	for (old_bin = &old_hash->bins[0];
290 	     old_bin < &old_hash->bins[old_hash->length]; ++old_bin)
291 	{
292 		for (e = ISC_LIST_HEAD(*old_bin); e != NULL; e = e_next) {
293 			e_next = ISC_LIST_NEXT(e, hlink);
294 			ISC_LINK_INIT(e, hlink);
295 		}
296 	}
297 
298 	isc_mem_put(rrl->mctx, old_hash,
299 		    sizeof(*old_hash) +
300 			    (old_hash->length - 1) * sizeof(old_hash->bins[0]));
301 	rrl->old_hash = NULL;
302 }
303 
304 static isc_result_t
expand_rrl_hash(dns_rrl_t * rrl,isc_stdtime_t now)305 expand_rrl_hash(dns_rrl_t *rrl, isc_stdtime_t now) {
306 	dns_rrl_hash_t *hash;
307 	int old_bins, new_bins, hsize;
308 	double rate;
309 
310 	if (rrl->old_hash != NULL) {
311 		free_old_hash(rrl);
312 	}
313 
314 	/*
315 	 * Most searches fail and so go to the end of the chain.
316 	 * Use a small hash table load factor.
317 	 */
318 	old_bins = (rrl->hash == NULL) ? 0 : rrl->hash->length;
319 	new_bins = old_bins / 8 + old_bins;
320 	if (new_bins < rrl->num_entries) {
321 		new_bins = rrl->num_entries;
322 	}
323 	new_bins = hash_divisor(new_bins);
324 
325 	hsize = sizeof(dns_rrl_hash_t) + (new_bins - 1) * sizeof(hash->bins[0]);
326 	hash = isc_mem_get(rrl->mctx, hsize);
327 	memset(hash, 0, hsize);
328 	hash->length = new_bins;
329 	rrl->hash_gen ^= 1;
330 	hash->gen = rrl->hash_gen;
331 
332 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP) && old_bins != 0) {
333 		rate = rrl->probes;
334 		if (rrl->searches != 0) {
335 			rate /= rrl->searches;
336 		}
337 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
338 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP,
339 			      "increase from %d to %d RRL bins for"
340 			      " %d entries; average search length %.1f",
341 			      old_bins, new_bins, rrl->num_entries, rate);
342 	}
343 
344 	rrl->old_hash = rrl->hash;
345 	if (rrl->old_hash != NULL) {
346 		rrl->old_hash->check_time = now;
347 	}
348 	rrl->hash = hash;
349 
350 	return (ISC_R_SUCCESS);
351 }
352 
353 static void
ref_entry(dns_rrl_t * rrl,dns_rrl_entry_t * e,int probes,isc_stdtime_t now)354 ref_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, int probes, isc_stdtime_t now) {
355 	/*
356 	 * Make the entry most recently used.
357 	 */
358 	if (ISC_LIST_HEAD(rrl->lru) != e) {
359 		if (e == rrl->last_logged) {
360 			rrl->last_logged = ISC_LIST_PREV(e, lru);
361 		}
362 		ISC_LIST_UNLINK(rrl->lru, e, lru);
363 		ISC_LIST_PREPEND(rrl->lru, e, lru);
364 	}
365 
366 	/*
367 	 * Expand the hash table if it is time and necessary.
368 	 * This will leave the newly referenced entry in a chain in the
369 	 * old hash table.  It will migrate to the new hash table the next
370 	 * time it is used or be cut loose when the old hash table is destroyed.
371 	 */
372 	rrl->probes += probes;
373 	++rrl->searches;
374 	if (rrl->searches > 100 &&
375 	    delta_rrl_time(rrl->hash->check_time, now) > 1) {
376 		if (rrl->probes / rrl->searches > 2) {
377 			expand_rrl_hash(rrl, now);
378 		}
379 		rrl->hash->check_time = now;
380 		rrl->probes = 0;
381 		rrl->searches = 0;
382 	}
383 }
384 
385 static inline bool
key_cmp(const dns_rrl_key_t * a,const dns_rrl_key_t * b)386 key_cmp(const dns_rrl_key_t *a, const dns_rrl_key_t *b) {
387 	if (memcmp(a, b, sizeof(dns_rrl_key_t)) == 0) {
388 		return (true);
389 	}
390 	return (false);
391 }
392 
393 static inline uint32_t
hash_key(const dns_rrl_key_t * key)394 hash_key(const dns_rrl_key_t *key) {
395 	uint32_t hval;
396 	int i;
397 
398 	hval = key->w[0];
399 	for (i = sizeof(key->w) / sizeof(key->w[0]) - 1; i >= 0; --i) {
400 		hval = key->w[i] + (hval << 1);
401 	}
402 	return (hval);
403 }
404 
405 /*
406  * Construct the hash table key.
407  * Use a hash of the DNS query name to save space in the database.
408  * Collisions result in legitimate rate limiting responses for one
409  * query name also limiting responses for other names to the
410  * same client.  This is rare and benign enough given the large
411  * space costs compared to keeping the entire name in the database
412  * entry or the time costs of dynamic allocation.
413  */
414 static void
make_key(const dns_rrl_t * rrl,dns_rrl_key_t * key,const isc_sockaddr_t * client_addr,dns_rdatatype_t qtype,const dns_name_t * qname,dns_rdataclass_t qclass,dns_rrl_rtype_t rtype)415 make_key(const dns_rrl_t *rrl, dns_rrl_key_t *key,
416 	 const isc_sockaddr_t *client_addr, dns_rdatatype_t qtype,
417 	 const dns_name_t *qname, dns_rdataclass_t qclass,
418 	 dns_rrl_rtype_t rtype) {
419 	dns_name_t base;
420 	dns_offsets_t base_offsets;
421 	int labels, i;
422 
423 	memset(key, 0, sizeof(*key));
424 
425 	key->s.rtype = rtype;
426 	if (rtype == DNS_RRL_RTYPE_QUERY) {
427 		key->s.qtype = qtype;
428 		key->s.qclass = qclass & 0xff;
429 	} else if (rtype == DNS_RRL_RTYPE_REFERRAL ||
430 		   rtype == DNS_RRL_RTYPE_NODATA) {
431 		/*
432 		 * Because there is no qtype in the empty answer sections of
433 		 * referral and NODATA responses, count them as the same.
434 		 */
435 		key->s.qclass = qclass & 0xff;
436 	}
437 
438 	if (qname != NULL && qname->labels != 0) {
439 		/*
440 		 * Ignore the first label of wildcards.
441 		 */
442 		if ((qname->attributes & DNS_NAMEATTR_WILDCARD) != 0 &&
443 		    (labels = dns_name_countlabels(qname)) > 1)
444 		{
445 			dns_name_init(&base, base_offsets);
446 			dns_name_getlabelsequence(qname, 1, labels - 1, &base);
447 			key->s.qname_hash = dns_name_fullhash(&base, false);
448 		} else {
449 			key->s.qname_hash = dns_name_fullhash(qname, false);
450 		}
451 	}
452 
453 	switch (client_addr->type.sa.sa_family) {
454 	case AF_INET:
455 		key->s.ip[0] = (client_addr->type.sin.sin_addr.s_addr &
456 				rrl->ipv4_mask);
457 		break;
458 	case AF_INET6:
459 		key->s.ipv6 = true;
460 		memmove(key->s.ip, &client_addr->type.sin6.sin6_addr,
461 			sizeof(key->s.ip));
462 		for (i = 0; i < DNS_RRL_MAX_PREFIX / 32; ++i) {
463 			key->s.ip[i] &= rrl->ipv6_mask[i];
464 		}
465 		break;
466 	}
467 }
468 
469 static inline dns_rrl_rate_t *
get_rate(dns_rrl_t * rrl,dns_rrl_rtype_t rtype)470 get_rate(dns_rrl_t *rrl, dns_rrl_rtype_t rtype) {
471 	switch (rtype) {
472 	case DNS_RRL_RTYPE_QUERY:
473 		return (&rrl->responses_per_second);
474 	case DNS_RRL_RTYPE_REFERRAL:
475 		return (&rrl->referrals_per_second);
476 	case DNS_RRL_RTYPE_NODATA:
477 		return (&rrl->nodata_per_second);
478 	case DNS_RRL_RTYPE_NXDOMAIN:
479 		return (&rrl->nxdomains_per_second);
480 	case DNS_RRL_RTYPE_ERROR:
481 		return (&rrl->errors_per_second);
482 	case DNS_RRL_RTYPE_ALL:
483 		return (&rrl->all_per_second);
484 	default:
485 		INSIST(0);
486 		ISC_UNREACHABLE();
487 	}
488 }
489 
490 static int
response_balance(dns_rrl_t * rrl,const dns_rrl_entry_t * e,int age)491 response_balance(dns_rrl_t *rrl, const dns_rrl_entry_t *e, int age) {
492 	dns_rrl_rate_t *ratep;
493 	int balance, rate;
494 
495 	if (e->key.s.rtype == DNS_RRL_RTYPE_TCP) {
496 		rate = 1;
497 	} else {
498 		ratep = get_rate(rrl, e->key.s.rtype);
499 		rate = ratep->scaled;
500 	}
501 
502 	balance = e->responses + age * rate;
503 	if (balance > rate) {
504 		balance = rate;
505 	}
506 	return (balance);
507 }
508 
509 /*
510  * Search for an entry for a response and optionally create it.
511  */
512 static dns_rrl_entry_t *
get_entry(dns_rrl_t * rrl,const isc_sockaddr_t * client_addr,dns_rdataclass_t qclass,dns_rdatatype_t qtype,const dns_name_t * qname,dns_rrl_rtype_t rtype,isc_stdtime_t now,bool create,char * log_buf,unsigned int log_buf_len)513 get_entry(dns_rrl_t *rrl, const isc_sockaddr_t *client_addr,
514 	  dns_rdataclass_t qclass, dns_rdatatype_t qtype,
515 	  const dns_name_t *qname, dns_rrl_rtype_t rtype, isc_stdtime_t now,
516 	  bool create, char *log_buf, unsigned int log_buf_len) {
517 	dns_rrl_key_t key;
518 	uint32_t hval;
519 	dns_rrl_entry_t *e;
520 	dns_rrl_hash_t *hash;
521 	dns_rrl_bin_t *new_bin, *old_bin;
522 	int probes, age;
523 
524 	make_key(rrl, &key, client_addr, qtype, qname, qclass, rtype);
525 	hval = hash_key(&key);
526 
527 	/*
528 	 * Look for the entry in the current hash table.
529 	 */
530 	new_bin = get_bin(rrl->hash, hval);
531 	probes = 1;
532 	e = ISC_LIST_HEAD(*new_bin);
533 	while (e != NULL) {
534 		if (key_cmp(&e->key, &key)) {
535 			ref_entry(rrl, e, probes, now);
536 			return (e);
537 		}
538 		++probes;
539 		e = ISC_LIST_NEXT(e, hlink);
540 	}
541 
542 	/*
543 	 * Look in the old hash table.
544 	 */
545 	if (rrl->old_hash != NULL) {
546 		old_bin = get_bin(rrl->old_hash, hval);
547 		e = ISC_LIST_HEAD(*old_bin);
548 		while (e != NULL) {
549 			if (key_cmp(&e->key, &key)) {
550 				ISC_LIST_UNLINK(*old_bin, e, hlink);
551 				ISC_LIST_PREPEND(*new_bin, e, hlink);
552 				e->hash_gen = rrl->hash_gen;
553 				ref_entry(rrl, e, probes, now);
554 				return (e);
555 			}
556 			e = ISC_LIST_NEXT(e, hlink);
557 		}
558 
559 		/*
560 		 * Discard previous hash table when all of its entries are old.
561 		 */
562 		age = delta_rrl_time(rrl->old_hash->check_time, now);
563 		if (age > rrl->window) {
564 			free_old_hash(rrl);
565 		}
566 	}
567 
568 	if (!create) {
569 		return (NULL);
570 	}
571 
572 	/*
573 	 * The entry does not exist, so create it by finding a free entry.
574 	 * Keep currently penalized and logged entries.
575 	 * Try to make more entries if none are idle.
576 	 * Steal the oldest entry if we cannot create more.
577 	 */
578 	for (e = ISC_LIST_TAIL(rrl->lru); e != NULL; e = ISC_LIST_PREV(e, lru))
579 	{
580 		if (!ISC_LINK_LINKED(e, hlink)) {
581 			break;
582 		}
583 		age = get_age(rrl, e, now);
584 		if (age <= 1) {
585 			e = NULL;
586 			break;
587 		}
588 		if (!e->logged && response_balance(rrl, e, age) > 0) {
589 			break;
590 		}
591 	}
592 	if (e == NULL) {
593 		expand_entries(rrl, ISC_MIN((rrl->num_entries + 1) / 2, 1000));
594 		e = ISC_LIST_TAIL(rrl->lru);
595 	}
596 	if (e->logged) {
597 		log_end(rrl, e, true, log_buf, log_buf_len);
598 	}
599 	if (ISC_LINK_LINKED(e, hlink)) {
600 		if (e->hash_gen == rrl->hash_gen) {
601 			hash = rrl->hash;
602 		} else {
603 			hash = rrl->old_hash;
604 		}
605 		old_bin = get_bin(hash, hash_key(&e->key));
606 		ISC_LIST_UNLINK(*old_bin, e, hlink);
607 	}
608 	ISC_LIST_PREPEND(*new_bin, e, hlink);
609 	e->hash_gen = rrl->hash_gen;
610 	e->key = key;
611 	e->ts_valid = false;
612 	ref_entry(rrl, e, probes, now);
613 	return (e);
614 }
615 
616 static void
debit_log(const dns_rrl_entry_t * e,int age,const char * action)617 debit_log(const dns_rrl_entry_t *e, int age, const char *action) {
618 	char buf[sizeof("age=2147483647")];
619 	const char *age_str;
620 
621 	if (age == DNS_RRL_FOREVER) {
622 		age_str = "";
623 	} else {
624 		snprintf(buf, sizeof(buf), "age=%d", age);
625 		age_str = buf;
626 	}
627 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL, DNS_LOGMODULE_REQUEST,
628 		      DNS_RRL_LOG_DEBUG3, "rrl %08x %6s  responses=%-3d %s",
629 		      hash_key(&e->key), age_str, e->responses, action);
630 }
631 
632 static inline dns_rrl_result_t
debit_rrl_entry(dns_rrl_t * rrl,dns_rrl_entry_t * e,double qps,double scale,const isc_sockaddr_t * client_addr,isc_stdtime_t now,char * log_buf,unsigned int log_buf_len)633 debit_rrl_entry(dns_rrl_t *rrl, dns_rrl_entry_t *e, double qps, double scale,
634 		const isc_sockaddr_t *client_addr, isc_stdtime_t now,
635 		char *log_buf, unsigned int log_buf_len) {
636 	int rate, new_rate, slip, new_slip, age, log_secs, min;
637 	dns_rrl_rate_t *ratep;
638 	dns_rrl_entry_t const *credit_e;
639 
640 	/*
641 	 * Pick the rate counter.
642 	 * Optionally adjust the rate by the estimated query/second rate.
643 	 */
644 	ratep = get_rate(rrl, e->key.s.rtype);
645 	rate = ratep->r;
646 	if (rate == 0) {
647 		return (DNS_RRL_RESULT_OK);
648 	}
649 
650 	if (scale < 1.0) {
651 		/*
652 		 * The limit for clients that have used TCP is not scaled.
653 		 */
654 		credit_e = get_entry(rrl, client_addr, 0, dns_rdatatype_none,
655 				     NULL, DNS_RRL_RTYPE_TCP, now, false,
656 				     log_buf, log_buf_len);
657 		if (credit_e != NULL) {
658 			age = get_age(rrl, e, now);
659 			if (age < rrl->window) {
660 				scale = 1.0;
661 			}
662 		}
663 	}
664 	if (scale < 1.0) {
665 		new_rate = (int)(rate * scale);
666 		if (new_rate < 1) {
667 			new_rate = 1;
668 		}
669 		if (ratep->scaled != new_rate) {
670 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
671 				      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
672 				      "%d qps scaled %s by %.2f"
673 				      " from %d to %d",
674 				      (int)qps, ratep->str, scale, rate,
675 				      new_rate);
676 			rate = new_rate;
677 			ratep->scaled = rate;
678 		}
679 	}
680 
681 	min = -rrl->window * rate;
682 
683 	/*
684 	 * Treat time jumps into the recent past as no time.
685 	 * Treat entries older than the window as if they were just created
686 	 * Credit other entries.
687 	 */
688 	age = get_age(rrl, e, now);
689 	if (age > 0) {
690 		/*
691 		 * Credit tokens earned during elapsed time.
692 		 */
693 		if (age > rrl->window) {
694 			e->responses = rate;
695 			e->slip_cnt = 0;
696 		} else {
697 			e->responses += rate * age;
698 			if (e->responses > rate) {
699 				e->responses = rate;
700 				e->slip_cnt = 0;
701 			}
702 		}
703 		/*
704 		 * Find the seconds since last log message without overflowing
705 		 * small counter.  This counter is reset when an entry is
706 		 * created.  It is not necessarily reset when some requests
707 		 * are answered provided other requests continue to be dropped
708 		 * or slipped.  This can happen when the request rate is just
709 		 * at the limit.
710 		 */
711 		if (e->logged) {
712 			log_secs = e->log_secs;
713 			log_secs += age;
714 			if (log_secs > DNS_RRL_MAX_LOG_SECS || log_secs < 0) {
715 				log_secs = DNS_RRL_MAX_LOG_SECS;
716 			}
717 			e->log_secs = log_secs;
718 		}
719 	}
720 	set_age(rrl, e, now);
721 
722 	/*
723 	 * Debit the entry for this response.
724 	 */
725 	if (--e->responses >= 0) {
726 		if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) {
727 			debit_log(e, age, "");
728 		}
729 		return (DNS_RRL_RESULT_OK);
730 	}
731 
732 	if (e->responses < min) {
733 		e->responses = min;
734 	}
735 
736 	/*
737 	 * Drop this response unless it should slip or leak.
738 	 */
739 	slip = rrl->slip.r;
740 	if (slip > 2 && scale < 1.0) {
741 		new_slip = (int)(slip * scale);
742 		if (new_slip < 2) {
743 			new_slip = 2;
744 		}
745 		if (rrl->slip.scaled != new_slip) {
746 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
747 				      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1,
748 				      "%d qps scaled slip"
749 				      " by %.2f from %d to %d",
750 				      (int)qps, scale, slip, new_slip);
751 			slip = new_slip;
752 			rrl->slip.scaled = slip;
753 		}
754 	}
755 	if (slip != 0 && e->key.s.rtype != DNS_RRL_RTYPE_ALL) {
756 		if (e->slip_cnt++ == 0) {
757 			if ((int)e->slip_cnt >= slip) {
758 				e->slip_cnt = 0;
759 			}
760 			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) {
761 				debit_log(e, age, "slip");
762 			}
763 			return (DNS_RRL_RESULT_SLIP);
764 		} else if ((int)e->slip_cnt >= slip) {
765 			e->slip_cnt = 0;
766 		}
767 	}
768 
769 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG3)) {
770 		debit_log(e, age, "drop");
771 	}
772 	return (DNS_RRL_RESULT_DROP);
773 }
774 
775 static inline dns_rrl_qname_buf_t *
get_qname(dns_rrl_t * rrl,const dns_rrl_entry_t * e)776 get_qname(dns_rrl_t *rrl, const dns_rrl_entry_t *e) {
777 	dns_rrl_qname_buf_t *qbuf;
778 
779 	qbuf = rrl->qnames[e->log_qname];
780 	if (qbuf == NULL || qbuf->e != e) {
781 		return (NULL);
782 	}
783 	return (qbuf);
784 }
785 
786 static inline void
free_qname(dns_rrl_t * rrl,dns_rrl_entry_t * e)787 free_qname(dns_rrl_t *rrl, dns_rrl_entry_t *e) {
788 	dns_rrl_qname_buf_t *qbuf;
789 
790 	qbuf = get_qname(rrl, e);
791 	if (qbuf != NULL) {
792 		qbuf->e = NULL;
793 		ISC_LIST_APPEND(rrl->qname_free, qbuf, link);
794 	}
795 }
796 
797 static void
add_log_str(isc_buffer_t * lb,const char * str,unsigned int str_len)798 add_log_str(isc_buffer_t *lb, const char *str, unsigned int str_len) {
799 	isc_region_t region;
800 
801 	isc_buffer_availableregion(lb, &region);
802 	if (str_len >= region.length) {
803 		if (region.length == 0U) {
804 			return;
805 		}
806 		str_len = region.length;
807 	}
808 	memmove(region.base, str, str_len);
809 	isc_buffer_add(lb, str_len);
810 }
811 
812 #define ADD_LOG_CSTR(eb, s) add_log_str(eb, s, sizeof(s) - 1)
813 
814 /*
815  * Build strings for the logs
816  */
817 static void
make_log_buf(dns_rrl_t * rrl,dns_rrl_entry_t * e,const char * str1,const char * str2,bool plural,const dns_name_t * qname,bool save_qname,dns_rrl_result_t rrl_result,isc_result_t resp_result,char * log_buf,unsigned int log_buf_len)818 make_log_buf(dns_rrl_t *rrl, dns_rrl_entry_t *e, const char *str1,
819 	     const char *str2, bool plural, const dns_name_t *qname,
820 	     bool save_qname, dns_rrl_result_t rrl_result,
821 	     isc_result_t resp_result, char *log_buf,
822 	     unsigned int log_buf_len) {
823 	isc_buffer_t lb;
824 	dns_rrl_qname_buf_t *qbuf;
825 	isc_netaddr_t cidr;
826 	char strbuf[ISC_MAX(sizeof("/123"), sizeof("  (12345678)"))];
827 	const char *rstr;
828 	isc_result_t msg_result;
829 
830 	if (log_buf_len <= 1) {
831 		if (log_buf_len == 1) {
832 			log_buf[0] = '\0';
833 		}
834 		return;
835 	}
836 	isc_buffer_init(&lb, log_buf, log_buf_len - 1);
837 
838 	if (str1 != NULL) {
839 		add_log_str(&lb, str1, strlen(str1));
840 	}
841 	if (str2 != NULL) {
842 		add_log_str(&lb, str2, strlen(str2));
843 	}
844 
845 	switch (rrl_result) {
846 	case DNS_RRL_RESULT_OK:
847 		break;
848 	case DNS_RRL_RESULT_DROP:
849 		ADD_LOG_CSTR(&lb, "drop ");
850 		break;
851 	case DNS_RRL_RESULT_SLIP:
852 		ADD_LOG_CSTR(&lb, "slip ");
853 		break;
854 	default:
855 		INSIST(0);
856 		ISC_UNREACHABLE();
857 	}
858 
859 	switch (e->key.s.rtype) {
860 	case DNS_RRL_RTYPE_QUERY:
861 		break;
862 	case DNS_RRL_RTYPE_REFERRAL:
863 		ADD_LOG_CSTR(&lb, "referral ");
864 		break;
865 	case DNS_RRL_RTYPE_NODATA:
866 		ADD_LOG_CSTR(&lb, "NODATA ");
867 		break;
868 	case DNS_RRL_RTYPE_NXDOMAIN:
869 		ADD_LOG_CSTR(&lb, "NXDOMAIN ");
870 		break;
871 	case DNS_RRL_RTYPE_ERROR:
872 		if (resp_result == ISC_R_SUCCESS) {
873 			ADD_LOG_CSTR(&lb, "error ");
874 		} else {
875 			rstr = isc_result_totext(resp_result);
876 			add_log_str(&lb, rstr, strlen(rstr));
877 			ADD_LOG_CSTR(&lb, " error ");
878 		}
879 		break;
880 	case DNS_RRL_RTYPE_ALL:
881 		ADD_LOG_CSTR(&lb, "all ");
882 		break;
883 	default:
884 		INSIST(0);
885 		ISC_UNREACHABLE();
886 	}
887 
888 	if (plural) {
889 		ADD_LOG_CSTR(&lb, "responses to ");
890 	} else {
891 		ADD_LOG_CSTR(&lb, "response to ");
892 	}
893 
894 	memset(&cidr, 0, sizeof(cidr));
895 	if (e->key.s.ipv6) {
896 		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv6_prefixlen);
897 		cidr.family = AF_INET6;
898 		memset(&cidr.type.in6, 0, sizeof(cidr.type.in6));
899 		memmove(&cidr.type.in6, e->key.s.ip, sizeof(e->key.s.ip));
900 	} else {
901 		snprintf(strbuf, sizeof(strbuf), "/%d", rrl->ipv4_prefixlen);
902 		cidr.family = AF_INET;
903 		cidr.type.in.s_addr = e->key.s.ip[0];
904 	}
905 	msg_result = isc_netaddr_totext(&cidr, &lb);
906 	if (msg_result != ISC_R_SUCCESS) {
907 		ADD_LOG_CSTR(&lb, "?");
908 	}
909 	add_log_str(&lb, strbuf, strlen(strbuf));
910 
911 	if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY ||
912 	    e->key.s.rtype == DNS_RRL_RTYPE_REFERRAL ||
913 	    e->key.s.rtype == DNS_RRL_RTYPE_NODATA ||
914 	    e->key.s.rtype == DNS_RRL_RTYPE_NXDOMAIN)
915 	{
916 		qbuf = get_qname(rrl, e);
917 		if (save_qname && qbuf == NULL && qname != NULL &&
918 		    dns_name_isabsolute(qname)) {
919 			/*
920 			 * Capture the qname for the "stop limiting" message.
921 			 */
922 			qbuf = ISC_LIST_TAIL(rrl->qname_free);
923 			if (qbuf != NULL) {
924 				ISC_LIST_UNLINK(rrl->qname_free, qbuf, link);
925 			} else if (rrl->num_qnames < DNS_RRL_QNAMES) {
926 				qbuf = isc_mem_get(rrl->mctx, sizeof(*qbuf));
927 				{
928 					memset(qbuf, 0, sizeof(*qbuf));
929 					ISC_LINK_INIT(qbuf, link);
930 					qbuf->index = rrl->num_qnames;
931 					rrl->qnames[rrl->num_qnames++] = qbuf;
932 				}
933 			}
934 			if (qbuf != NULL) {
935 				e->log_qname = qbuf->index;
936 				qbuf->e = e;
937 				dns_fixedname_init(&qbuf->qname);
938 				dns_name_copynf(qname, dns_fixedname_name(
939 							       &qbuf->qname));
940 			}
941 		}
942 		if (qbuf != NULL) {
943 			qname = dns_fixedname_name(&qbuf->qname);
944 		}
945 		if (qname != NULL) {
946 			ADD_LOG_CSTR(&lb, " for ");
947 			(void)dns_name_totext(qname, true, &lb);
948 		} else {
949 			ADD_LOG_CSTR(&lb, " for (?)");
950 		}
951 		if (e->key.s.rtype != DNS_RRL_RTYPE_NXDOMAIN) {
952 			ADD_LOG_CSTR(&lb, " ");
953 			(void)dns_rdataclass_totext(e->key.s.qclass, &lb);
954 			if (e->key.s.rtype == DNS_RRL_RTYPE_QUERY) {
955 				ADD_LOG_CSTR(&lb, " ");
956 				(void)dns_rdatatype_totext(e->key.s.qtype, &lb);
957 			}
958 		}
959 		snprintf(strbuf, sizeof(strbuf), "  (%08" PRIx32 ")",
960 			 e->key.s.qname_hash);
961 		add_log_str(&lb, strbuf, strlen(strbuf));
962 	}
963 
964 	/*
965 	 * We saved room for '\0'.
966 	 */
967 	log_buf[isc_buffer_usedlength(&lb)] = '\0';
968 }
969 
970 static void
log_end(dns_rrl_t * rrl,dns_rrl_entry_t * e,bool early,char * log_buf,unsigned int log_buf_len)971 log_end(dns_rrl_t *rrl, dns_rrl_entry_t *e, bool early, char *log_buf,
972 	unsigned int log_buf_len) {
973 	if (e->logged) {
974 		make_log_buf(rrl, e, early ? "*" : NULL,
975 			     rrl->log_only ? "would stop limiting "
976 					   : "stop limiting ",
977 			     true, NULL, false, DNS_RRL_RESULT_OK,
978 			     ISC_R_SUCCESS, log_buf, log_buf_len);
979 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
980 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "%s",
981 			      log_buf);
982 		free_qname(rrl, e);
983 		e->logged = false;
984 		--rrl->num_logged;
985 	}
986 }
987 
988 /*
989  * Log messages for streams that have stopped being rate limited.
990  */
991 static void
log_stops(dns_rrl_t * rrl,isc_stdtime_t now,int limit,char * log_buf,unsigned int log_buf_len)992 log_stops(dns_rrl_t *rrl, isc_stdtime_t now, int limit, char *log_buf,
993 	  unsigned int log_buf_len) {
994 	dns_rrl_entry_t *e;
995 	int age;
996 
997 	for (e = rrl->last_logged; e != NULL; e = ISC_LIST_PREV(e, lru)) {
998 		if (!e->logged) {
999 			continue;
1000 		}
1001 		if (now != 0) {
1002 			age = get_age(rrl, e, now);
1003 			if (age < DNS_RRL_STOP_LOG_SECS ||
1004 			    response_balance(rrl, e, age) < 0) {
1005 				break;
1006 			}
1007 		}
1008 
1009 		log_end(rrl, e, now == 0, log_buf, log_buf_len);
1010 		if (rrl->num_logged <= 0) {
1011 			break;
1012 		}
1013 
1014 		/*
1015 		 * Too many messages could stall real work.
1016 		 */
1017 		if (--limit < 0) {
1018 			rrl->last_logged = ISC_LIST_PREV(e, lru);
1019 			return;
1020 		}
1021 	}
1022 	if (e == NULL) {
1023 		INSIST(rrl->num_logged == 0);
1024 		rrl->log_stops_time = now;
1025 	}
1026 	rrl->last_logged = e;
1027 }
1028 
1029 /*
1030  * Main rate limit interface.
1031  */
1032 dns_rrl_result_t
dns_rrl(dns_view_t * view,const isc_sockaddr_t * client_addr,bool is_tcp,dns_rdataclass_t qclass,dns_rdatatype_t qtype,const dns_name_t * qname,isc_result_t resp_result,isc_stdtime_t now,bool wouldlog,char * log_buf,unsigned int log_buf_len)1033 dns_rrl(dns_view_t *view, const isc_sockaddr_t *client_addr, bool is_tcp,
1034 	dns_rdataclass_t qclass, dns_rdatatype_t qtype, const dns_name_t *qname,
1035 	isc_result_t resp_result, isc_stdtime_t now, bool wouldlog,
1036 	char *log_buf, unsigned int log_buf_len) {
1037 	dns_rrl_t *rrl;
1038 	dns_rrl_rtype_t rtype;
1039 	dns_rrl_entry_t *e;
1040 	isc_netaddr_t netclient;
1041 	int secs;
1042 	double qps, scale;
1043 	int exempt_match;
1044 	isc_result_t result;
1045 	dns_rrl_result_t rrl_result;
1046 
1047 	INSIST(log_buf != NULL && log_buf_len > 0);
1048 
1049 	rrl = view->rrl;
1050 	if (rrl->exempt != NULL) {
1051 		isc_netaddr_fromsockaddr(&netclient, client_addr);
1052 		result = dns_acl_match(&netclient, NULL, rrl->exempt,
1053 				       &view->aclenv, &exempt_match, NULL);
1054 		if (result == ISC_R_SUCCESS && exempt_match > 0) {
1055 			return (DNS_RRL_RESULT_OK);
1056 		}
1057 	}
1058 
1059 	LOCK(&rrl->lock);
1060 
1061 	/*
1062 	 * Estimate total query per second rate when scaling by qps.
1063 	 */
1064 	if (rrl->qps_scale == 0) {
1065 		qps = 0.0;
1066 		scale = 1.0;
1067 	} else {
1068 		++rrl->qps_responses;
1069 		secs = delta_rrl_time(rrl->qps_time, now);
1070 		if (secs <= 0) {
1071 			qps = rrl->qps;
1072 		} else {
1073 			qps = (1.0 * rrl->qps_responses) / secs;
1074 			if (secs >= rrl->window) {
1075 				if (isc_log_wouldlog(dns_lctx,
1076 						     DNS_RRL_LOG_DEBUG3)) {
1077 					isc_log_write(dns_lctx,
1078 						      DNS_LOGCATEGORY_RRL,
1079 						      DNS_LOGMODULE_REQUEST,
1080 						      DNS_RRL_LOG_DEBUG3,
1081 						      "%d responses/%d seconds"
1082 						      " = %d qps",
1083 						      rrl->qps_responses, secs,
1084 						      (int)qps);
1085 				}
1086 				rrl->qps = qps;
1087 				rrl->qps_responses = 0;
1088 				rrl->qps_time = now;
1089 			} else if (qps < rrl->qps) {
1090 				qps = rrl->qps;
1091 			}
1092 		}
1093 		scale = rrl->qps_scale / qps;
1094 	}
1095 
1096 	/*
1097 	 * Do maintenance once per second.
1098 	 */
1099 	if (rrl->num_logged > 0 && rrl->log_stops_time != now) {
1100 		log_stops(rrl, now, 8, log_buf, log_buf_len);
1101 	}
1102 
1103 	/*
1104 	 * Notice TCP responses when scaling limits by qps.
1105 	 * Do not try to rate limit TCP responses.
1106 	 */
1107 	if (is_tcp) {
1108 		if (scale < 1.0) {
1109 			e = get_entry(rrl, client_addr, 0, dns_rdatatype_none,
1110 				      NULL, DNS_RRL_RTYPE_TCP, now, true,
1111 				      log_buf, log_buf_len);
1112 			if (e != NULL) {
1113 				e->responses = -(rrl->window + 1);
1114 				set_age(rrl, e, now);
1115 			}
1116 		}
1117 		UNLOCK(&rrl->lock);
1118 		return (ISC_R_SUCCESS);
1119 	}
1120 
1121 	/*
1122 	 * Find the right kind of entry, creating it if necessary.
1123 	 * If that is impossible, then nothing more can be done
1124 	 */
1125 	switch (resp_result) {
1126 	case ISC_R_SUCCESS:
1127 		rtype = DNS_RRL_RTYPE_QUERY;
1128 		break;
1129 	case DNS_R_DELEGATION:
1130 		rtype = DNS_RRL_RTYPE_REFERRAL;
1131 		break;
1132 	case DNS_R_NXRRSET:
1133 		rtype = DNS_RRL_RTYPE_NODATA;
1134 		break;
1135 	case DNS_R_NXDOMAIN:
1136 		rtype = DNS_RRL_RTYPE_NXDOMAIN;
1137 		break;
1138 	default:
1139 		rtype = DNS_RRL_RTYPE_ERROR;
1140 		break;
1141 	}
1142 	e = get_entry(rrl, client_addr, qclass, qtype, qname, rtype, now, true,
1143 		      log_buf, log_buf_len);
1144 	if (e == NULL) {
1145 		UNLOCK(&rrl->lock);
1146 		return (DNS_RRL_RESULT_OK);
1147 	}
1148 
1149 	if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1150 		/*
1151 		 * Do not worry about speed or releasing the lock.
1152 		 * This message appears before messages from debit_rrl_entry().
1153 		 */
1154 		make_log_buf(rrl, e, "consider limiting ", NULL, false, qname,
1155 			     false, DNS_RRL_RESULT_OK, resp_result, log_buf,
1156 			     log_buf_len);
1157 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1158 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DEBUG1, "%s",
1159 			      log_buf);
1160 	}
1161 
1162 	rrl_result = debit_rrl_entry(rrl, e, qps, scale, client_addr, now,
1163 				     log_buf, log_buf_len);
1164 
1165 	if (rrl->all_per_second.r != 0) {
1166 		/*
1167 		 * We must debit the all-per-second token bucket if we have
1168 		 * an all-per-second limit for the IP address.
1169 		 * The all-per-second limit determines the log message
1170 		 * when both limits are hit.
1171 		 * The response limiting must continue if the
1172 		 * all-per-second limiting lapses.
1173 		 */
1174 		dns_rrl_entry_t *e_all;
1175 		dns_rrl_result_t rrl_all_result;
1176 
1177 		e_all = get_entry(rrl, client_addr, 0, dns_rdatatype_none, NULL,
1178 				  DNS_RRL_RTYPE_ALL, now, true, log_buf,
1179 				  log_buf_len);
1180 		if (e_all == NULL) {
1181 			UNLOCK(&rrl->lock);
1182 			return (DNS_RRL_RESULT_OK);
1183 		}
1184 		rrl_all_result = debit_rrl_entry(rrl, e_all, qps, scale,
1185 						 client_addr, now, log_buf,
1186 						 log_buf_len);
1187 		if (rrl_all_result != DNS_RRL_RESULT_OK) {
1188 			e = e_all;
1189 			rrl_result = rrl_all_result;
1190 			if (isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DEBUG1)) {
1191 				make_log_buf(rrl, e,
1192 					     "prefer all-per-second limiting ",
1193 					     NULL, true, qname, false,
1194 					     DNS_RRL_RESULT_OK, resp_result,
1195 					     log_buf, log_buf_len);
1196 				isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1197 					      DNS_LOGMODULE_REQUEST,
1198 					      DNS_RRL_LOG_DEBUG1, "%s",
1199 					      log_buf);
1200 			}
1201 		}
1202 	}
1203 
1204 	if (rrl_result == DNS_RRL_RESULT_OK) {
1205 		UNLOCK(&rrl->lock);
1206 		return (DNS_RRL_RESULT_OK);
1207 	}
1208 
1209 	/*
1210 	 * Log occasionally in the rate-limit category.
1211 	 */
1212 	if ((!e->logged || e->log_secs >= DNS_RRL_MAX_LOG_SECS) &&
1213 	    isc_log_wouldlog(dns_lctx, DNS_RRL_LOG_DROP))
1214 	{
1215 		make_log_buf(rrl, e, rrl->log_only ? "would " : NULL,
1216 			     e->logged ? "continue limiting " : "limit ", true,
1217 			     qname, true, DNS_RRL_RESULT_OK, resp_result,
1218 			     log_buf, log_buf_len);
1219 		if (!e->logged) {
1220 			e->logged = true;
1221 			if (++rrl->num_logged <= 1) {
1222 				rrl->last_logged = e;
1223 			}
1224 		}
1225 		e->log_secs = 0;
1226 
1227 		/*
1228 		 * Avoid holding the lock.
1229 		 */
1230 		if (!wouldlog) {
1231 			UNLOCK(&rrl->lock);
1232 			e = NULL;
1233 		}
1234 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_RRL,
1235 			      DNS_LOGMODULE_REQUEST, DNS_RRL_LOG_DROP, "%s",
1236 			      log_buf);
1237 	}
1238 
1239 	/*
1240 	 * Make a log message for the caller.
1241 	 */
1242 	if (wouldlog) {
1243 		make_log_buf(rrl, e,
1244 			     rrl->log_only ? "would rate limit "
1245 					   : "rate limit ",
1246 			     NULL, false, qname, false, rrl_result, resp_result,
1247 			     log_buf, log_buf_len);
1248 	}
1249 
1250 	if (e != NULL) {
1251 		/*
1252 		 * Do not save the qname unless we might need it for
1253 		 * the ending log message.
1254 		 */
1255 		if (!e->logged) {
1256 			free_qname(rrl, e);
1257 		}
1258 		UNLOCK(&rrl->lock);
1259 	}
1260 
1261 	return (rrl_result);
1262 }
1263 
1264 void
dns_rrl_view_destroy(dns_view_t * view)1265 dns_rrl_view_destroy(dns_view_t *view) {
1266 	dns_rrl_t *rrl;
1267 	dns_rrl_block_t *b;
1268 	dns_rrl_hash_t *h;
1269 	char log_buf[DNS_RRL_LOG_BUF_LEN];
1270 	int i;
1271 
1272 	rrl = view->rrl;
1273 	if (rrl == NULL) {
1274 		return;
1275 	}
1276 	view->rrl = NULL;
1277 
1278 	/*
1279 	 * Assume the caller takes care of locking the view and anything else.
1280 	 */
1281 
1282 	if (rrl->num_logged > 0) {
1283 		log_stops(rrl, 0, INT32_MAX, log_buf, sizeof(log_buf));
1284 	}
1285 
1286 	for (i = 0; i < DNS_RRL_QNAMES; ++i) {
1287 		if (rrl->qnames[i] == NULL) {
1288 			break;
1289 		}
1290 		isc_mem_put(rrl->mctx, rrl->qnames[i], sizeof(*rrl->qnames[i]));
1291 	}
1292 
1293 	if (rrl->exempt != NULL) {
1294 		dns_acl_detach(&rrl->exempt);
1295 	}
1296 
1297 	isc_mutex_destroy(&rrl->lock);
1298 
1299 	while (!ISC_LIST_EMPTY(rrl->blocks)) {
1300 		b = ISC_LIST_HEAD(rrl->blocks);
1301 		ISC_LIST_UNLINK(rrl->blocks, b, link);
1302 		isc_mem_put(rrl->mctx, b, b->size);
1303 	}
1304 
1305 	h = rrl->hash;
1306 	if (h != NULL) {
1307 		isc_mem_put(rrl->mctx, h,
1308 			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1309 	}
1310 
1311 	h = rrl->old_hash;
1312 	if (h != NULL) {
1313 		isc_mem_put(rrl->mctx, h,
1314 			    sizeof(*h) + (h->length - 1) * sizeof(h->bins[0]));
1315 	}
1316 
1317 	isc_mem_putanddetach(&rrl->mctx, rrl, sizeof(*rrl));
1318 }
1319 
1320 isc_result_t
dns_rrl_init(dns_rrl_t ** rrlp,dns_view_t * view,int min_entries)1321 dns_rrl_init(dns_rrl_t **rrlp, dns_view_t *view, int min_entries) {
1322 	dns_rrl_t *rrl;
1323 	isc_result_t result;
1324 
1325 	*rrlp = NULL;
1326 
1327 	rrl = isc_mem_get(view->mctx, sizeof(*rrl));
1328 	memset(rrl, 0, sizeof(*rrl));
1329 	isc_mem_attach(view->mctx, &rrl->mctx);
1330 	isc_mutex_init(&rrl->lock);
1331 	isc_stdtime_get(&rrl->ts_bases[0]);
1332 
1333 	view->rrl = rrl;
1334 
1335 	result = expand_entries(rrl, min_entries);
1336 	if (result != ISC_R_SUCCESS) {
1337 		dns_rrl_view_destroy(view);
1338 		return (result);
1339 	}
1340 	result = expand_rrl_hash(rrl, 0);
1341 	if (result != ISC_R_SUCCESS) {
1342 		dns_rrl_view_destroy(view);
1343 		return (result);
1344 	}
1345 
1346 	*rrlp = rrl;
1347 	return (ISC_R_SUCCESS);
1348 }
1349