1 /*
2  * edns-subnet/subnetmod.c - edns subnet module. Must be called before validator
3  * and iterator.
4  *
5  * Copyright (c) 2013, NLnet Labs. All rights reserved.
6  *
7  * This software is open source.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  *
16  * Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  *
20  * Neither the name of the NLNET LABS nor the names of its contributors may
21  * be used to endorse or promote products derived from this software without
22  * specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  */
36  /**
37  * \file
38  * subnet module for unbound.
39  */
40 
41 #include "config.h"
42 
43 #ifdef CLIENT_SUBNET /* keeps splint happy */
44 
45 #include "edns-subnet/subnetmod.h"
46 #include "edns-subnet/edns-subnet.h"
47 #include "edns-subnet/addrtree.h"
48 #include "edns-subnet/subnet-whitelist.h"
49 
50 #include "services/mesh.h"
51 #include "services/cache/dns.h"
52 #include "util/module.h"
53 #include "util/regional.h"
54 #include "util/storage/slabhash.h"
55 #include "util/config_file.h"
56 #include "util/data/msgreply.h"
57 #include "sldns/sbuffer.h"
58 #include "sldns/wire2str.h"
59 #include "iterator/iter_utils.h"
60 
61 /** externally called */
62 void
63 subnet_data_delete(void *d, void *ATTR_UNUSED(arg))
64 {
65 	struct subnet_msg_cache_data *r;
66 	r = (struct subnet_msg_cache_data*)d;
67 	addrtree_delete(r->tree4);
68 	addrtree_delete(r->tree6);
69 	free(r);
70 }
71 
72 /** externally called */
73 size_t
74 msg_cache_sizefunc(void *k, void *d)
75 {
76 	struct msgreply_entry *q = (struct msgreply_entry*)k;
77 	struct subnet_msg_cache_data *r = (struct subnet_msg_cache_data*)d;
78 	size_t s = sizeof(struct msgreply_entry)
79 		+ sizeof(struct subnet_msg_cache_data)
80 		+ q->key.qname_len + lock_get_mem(&q->entry.lock);
81 	s += addrtree_size(r->tree4);
82 	s += addrtree_size(r->tree6);
83 	return s;
84 }
85 
86 /** new query for ecs module */
87 static int
88 subnet_new_qstate(struct module_qstate *qstate, int id)
89 {
90 	struct subnet_qstate *sq = (struct subnet_qstate*)regional_alloc(
91 		qstate->region, sizeof(struct subnet_qstate));
92 	if(!sq)
93 		return 0;
94 	qstate->minfo[id] = sq;
95 	memset(sq, 0, sizeof(*sq));
96 	sq->started_no_cache_store = qstate->no_cache_store;
97 	sq->started_no_cache_lookup = qstate->no_cache_lookup;
98 	return 1;
99 }
100 
101 /** Add ecs struct to edns list, after parsing it to wire format. */
102 void
103 subnet_ecs_opt_list_append(struct ecs_data* ecs, struct edns_option** list,
104 	struct module_qstate *qstate, struct regional *region)
105 {
106 	size_t sn_octs, sn_octs_remainder;
107 	sldns_buffer* buf = qstate->env->scratch_buffer;
108 
109 	if(ecs->subnet_validdata) {
110 		log_assert(ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 ||
111 			ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6);
112 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP4 ||
113 			ecs->subnet_source_mask <=  INET_SIZE*8);
114 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP6 ||
115 			ecs->subnet_source_mask <= INET6_SIZE*8);
116 
117 		sn_octs = ecs->subnet_source_mask / 8;
118 		sn_octs_remainder =
119 			(size_t)((ecs->subnet_source_mask % 8)>0?1:0);
120 
121 		log_assert(sn_octs + sn_octs_remainder <= INET6_SIZE);
122 
123 		sldns_buffer_clear(buf);
124 		sldns_buffer_write_u16(buf, ecs->subnet_addr_fam);
125 		sldns_buffer_write_u8(buf, ecs->subnet_source_mask);
126 		sldns_buffer_write_u8(buf, ecs->subnet_scope_mask);
127 		sldns_buffer_write(buf, ecs->subnet_addr, sn_octs);
128 		if(sn_octs_remainder)
129 			sldns_buffer_write_u8(buf, ecs->subnet_addr[sn_octs] &
130 				~(0xFF >> (ecs->subnet_source_mask % 8)));
131 		sldns_buffer_flip(buf);
132 
133 		edns_opt_list_append(list,
134 				qstate->env->cfg->client_subnet_opcode,
135 				sn_octs + sn_octs_remainder + 4,
136 				sldns_buffer_begin(buf), region);
137 	}
138 }
139 
140 int ecs_whitelist_check(struct query_info* qinfo,
141 	uint16_t ATTR_UNUSED(flags), struct module_qstate* qstate,
142 	struct sockaddr_storage* addr, socklen_t addrlen,
143 	uint8_t* ATTR_UNUSED(zone), size_t ATTR_UNUSED(zonelen),
144 	struct regional *region, int id, void* ATTR_UNUSED(cbargs))
145 {
146 	struct subnet_qstate *sq;
147 	struct subnet_env *sn_env;
148 
149 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
150 		return 1;
151 	sn_env = (struct subnet_env*)qstate->env->modinfo[id];
152 
153 	/* Cache by default, might be disabled after parsing EDNS option
154 	 * received from nameserver. */
155 	if(!iter_stub_fwd_no_cache(qstate, &qstate->qinfo, NULL, NULL)) {
156 		qstate->no_cache_store = 0;
157 	}
158 
159 	if(sq->ecs_server_out.subnet_validdata && ((sq->subnet_downstream &&
160 		qstate->env->cfg->client_subnet_always_forward) ||
161 		ecs_is_whitelisted(sn_env->whitelist,
162 		addr, addrlen, qinfo->qname, qinfo->qname_len,
163 		qinfo->qclass))) {
164 		/* Address on whitelist or client query contains ECS option, we
165 		 * want to sent out ECS. Only add option if it is not already
166 		 * set. */
167 		if(!edns_opt_list_find(qstate->edns_opts_back_out,
168 			qstate->env->cfg->client_subnet_opcode)) {
169 			subnet_ecs_opt_list_append(&sq->ecs_server_out,
170 				&qstate->edns_opts_back_out, qstate, region);
171 		}
172 		sq->subnet_sent = 1;
173 	}
174 	else {
175 		/* Outgoing ECS option is set, but we don't want to sent it to
176 		 * this address, remove option. */
177 		if(edns_opt_list_find(qstate->edns_opts_back_out,
178 			qstate->env->cfg->client_subnet_opcode)) {
179 			edns_opt_list_remove(&qstate->edns_opts_back_out,
180 				qstate->env->cfg->client_subnet_opcode);
181 		}
182 		sq->subnet_sent = 0;
183 	}
184 	return 1;
185 }
186 
187 
188 void
189 subnet_markdel(void* key)
190 {
191 	struct msgreply_entry *e = (struct msgreply_entry*)key;
192 	e->key.qtype = 0;
193 	e->key.qclass = 0;
194 }
195 
196 int
197 subnetmod_init(struct module_env *env, int id)
198 {
199 	struct subnet_env *sn_env = (struct subnet_env*)calloc(1,
200 		sizeof(struct subnet_env));
201 	if(!sn_env) {
202 		log_err("malloc failure");
203 		return 0;
204 	}
205 	alloc_init(&sn_env->alloc, NULL, 0);
206 	env->modinfo[id] = (void*)sn_env;
207 
208 	/* Warn that serve-expired and prefetch do not work with the subnet
209 	 * module cache. */
210 	if(env->cfg->serve_expired)
211 		log_warn(
212 			"subnetcache: serve-expired is set but not working "
213 			"for data originating from the subnet module cache.");
214 	if(env->cfg->prefetch)
215 		log_warn(
216 			"subnetcache: prefetch is set but not working "
217 			"for data originating from the subnet module cache.");
218 	/* Copy msg_cache settings */
219 	sn_env->subnet_msg_cache = slabhash_create(env->cfg->msg_cache_slabs,
220 		HASH_DEFAULT_STARTARRAY, env->cfg->msg_cache_size,
221 		msg_cache_sizefunc, query_info_compare, query_entry_delete,
222 		subnet_data_delete, NULL);
223 	slabhash_setmarkdel(sn_env->subnet_msg_cache, &subnet_markdel);
224 	if(!sn_env->subnet_msg_cache) {
225 		log_err("subnetcache: could not create cache");
226 		free(sn_env);
227 		env->modinfo[id] = NULL;
228 		return 0;
229 	}
230 	/* whitelist for edns subnet capable servers */
231 	sn_env->whitelist = ecs_whitelist_create();
232 	if(!sn_env->whitelist ||
233 		!ecs_whitelist_apply_cfg(sn_env->whitelist, env->cfg)) {
234 		log_err("subnetcache: could not create ECS whitelist");
235 		slabhash_delete(sn_env->subnet_msg_cache);
236 		free(sn_env);
237 		env->modinfo[id] = NULL;
238 		return 0;
239 	}
240 
241 	verbose(VERB_QUERY, "subnetcache: option registered (%d)",
242 		env->cfg->client_subnet_opcode);
243 	/* Create new mesh state for all queries. */
244 	env->unique_mesh = 1;
245 	if(!edns_register_option(env->cfg->client_subnet_opcode,
246 		env->cfg->client_subnet_always_forward /* bypass cache */,
247 		1 /* no aggregation */, env)) {
248 		log_err("subnetcache: could not register opcode");
249 		ecs_whitelist_delete(sn_env->whitelist);
250 		slabhash_delete(sn_env->subnet_msg_cache);
251 		free(sn_env);
252 		env->modinfo[id] = NULL;
253 		return 0;
254 	}
255 	inplace_cb_register((void*)ecs_whitelist_check, inplace_cb_query, NULL,
256 		env, id);
257 	inplace_cb_register((void*)ecs_edns_back_parsed,
258 		inplace_cb_edns_back_parsed, NULL, env, id);
259 	inplace_cb_register((void*)ecs_query_response,
260 		inplace_cb_query_response, NULL, env, id);
261 	lock_rw_init(&sn_env->biglock);
262 	return 1;
263 }
264 
265 void
266 subnetmod_deinit(struct module_env *env, int id)
267 {
268 	struct subnet_env *sn_env;
269 	if(!env || !env->modinfo[id])
270 		return;
271 	sn_env = (struct subnet_env*)env->modinfo[id];
272 	lock_rw_destroy(&sn_env->biglock);
273 	inplace_cb_delete(env, inplace_cb_edns_back_parsed, id);
274 	inplace_cb_delete(env, inplace_cb_query, id);
275 	inplace_cb_delete(env, inplace_cb_query_response, id);
276 	ecs_whitelist_delete(sn_env->whitelist);
277 	slabhash_delete(sn_env->subnet_msg_cache);
278 	alloc_clear(&sn_env->alloc);
279 	free(sn_env);
280 	env->modinfo[id] = NULL;
281 }
282 
283 /** Tells client that upstream has no/improper support */
284 static void
285 cp_edns_bad_response(struct ecs_data *target, struct ecs_data *source)
286 {
287 	target->subnet_scope_mask  = 0;
288 	target->subnet_source_mask = source->subnet_source_mask;
289 	target->subnet_addr_fam    = source->subnet_addr_fam;
290 	memcpy(target->subnet_addr, source->subnet_addr, INET6_SIZE);
291 	target->subnet_validdata = 1;
292 }
293 
294 static void
295 delfunc(void *envptr, void *elemptr) {
296 	struct reply_info *elem = (struct reply_info *)elemptr;
297 	struct subnet_env *env = (struct subnet_env *)envptr;
298 	reply_info_parsedelete(elem, &env->alloc);
299 }
300 
301 static size_t
302 sizefunc(void *elemptr) {
303 	struct reply_info *elem  = (struct reply_info *)elemptr;
304 	return sizeof (struct reply_info) - sizeof (struct rrset_ref)
305 		+ elem->rrset_count * sizeof (struct rrset_ref)
306 		+ elem->rrset_count * sizeof (struct ub_packed_rrset_key *);
307 }
308 
309 /**
310  * Select tree from cache entry based on edns data.
311  * If for address family not present it will create a new one.
312  * NULL on failure to create. */
313 static struct addrtree*
314 get_tree(struct subnet_msg_cache_data *data, struct ecs_data *edns,
315 	struct subnet_env *env, struct config_file* cfg)
316 {
317 	struct addrtree *tree;
318 	if (edns->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
319 		if (!data->tree4)
320 			data->tree4 = addrtree_create(
321 				cfg->max_client_subnet_ipv4, &delfunc,
322 				&sizefunc, env, cfg->max_ecs_tree_size_ipv4);
323 		tree = data->tree4;
324 	} else {
325 		if (!data->tree6)
326 			data->tree6 = addrtree_create(
327 				cfg->max_client_subnet_ipv6, &delfunc,
328 				&sizefunc, env, cfg->max_ecs_tree_size_ipv6);
329 		tree = data->tree6;
330 	}
331 	return tree;
332 }
333 
334 static void
335 update_cache(struct module_qstate *qstate, int id)
336 {
337 	struct msgreply_entry *mrep_entry;
338 	struct addrtree *tree;
339 	struct reply_info *rep;
340 	struct query_info qinf;
341 	struct subnet_env *sne = qstate->env->modinfo[id];
342 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
343 	struct slabhash *subnet_msg_cache = sne->subnet_msg_cache;
344 	struct ecs_data *edns = &sq->ecs_client_in;
345 	size_t i;
346 	int only_match_scope_zero;
347 
348 	/* We already calculated hash upon lookup (lookup_and_reply) if we were
349 	 * allowed to look in the ECS cache */
350 	hashvalue_type h = qstate->minfo[id] &&
351 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash_calculated?
352 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash :
353 		query_info_hash(&qstate->qinfo, qstate->query_flags);
354 	/* Step 1, general qinfo lookup */
355 	struct lruhash_entry* lru_entry = slabhash_lookup(subnet_msg_cache, h,
356 		&qstate->qinfo, 1);
357 	int need_to_insert = (lru_entry == NULL);
358 	if (!lru_entry) {
359 		void* data = calloc(1,
360 			sizeof(struct subnet_msg_cache_data));
361 		if(!data) {
362 			log_err("malloc failed");
363 			return;
364 		}
365 		qinf = qstate->qinfo;
366 		qinf.qname = memdup(qstate->qinfo.qname,
367 			qstate->qinfo.qname_len);
368 		if(!qinf.qname) {
369 			free(data);
370 			log_err("memdup failed");
371 			return;
372 		}
373 		mrep_entry = query_info_entrysetup(&qinf, data, h);
374 		free(qinf.qname); /* if qname 'consumed', it is set to NULL */
375 		if (!mrep_entry) {
376 			free(data);
377 			log_err("query_info_entrysetup failed");
378 			return;
379 		}
380 		lru_entry = &mrep_entry->entry;
381 		lock_rw_wrlock(&lru_entry->lock);
382 	}
383 	/* lru_entry->lock is locked regardless of how we got here,
384 	 * either from the slabhash_lookup, or above in the new allocated */
385 	/* Step 2, find the correct tree */
386 	if (!(tree = get_tree(lru_entry->data, edns, sne, qstate->env->cfg))) {
387 		lock_rw_unlock(&lru_entry->lock);
388 		log_err("subnetcache: cache insertion failed");
389 		return;
390 	}
391 	lock_quick_lock(&sne->alloc.lock);
392 	rep = reply_info_copy(qstate->return_msg->rep, &sne->alloc, NULL);
393 	lock_quick_unlock(&sne->alloc.lock);
394 	if (!rep) {
395 		lock_rw_unlock(&lru_entry->lock);
396 		log_err("subnetcache: cache insertion failed");
397 		return;
398 	}
399 
400 	/* store RRsets */
401 	for(i=0; i<rep->rrset_count; i++) {
402 		rep->ref[i].key = rep->rrsets[i];
403 		rep->ref[i].id = rep->rrsets[i]->id;
404 	}
405 	reply_info_set_ttls(rep, *qstate->env->now);
406 	rep->flags |= (BIT_RA | BIT_QR); /* fix flags to be sensible for */
407 	rep->flags &= ~(BIT_AA | BIT_CD);/* a reply based on the cache   */
408 	if(edns->subnet_source_mask == 0 && edns->subnet_scope_mask == 0)
409 		only_match_scope_zero = 1;
410 	else only_match_scope_zero = 0;
411 	addrtree_insert(tree, (addrkey_t*)edns->subnet_addr,
412 		edns->subnet_source_mask, sq->max_scope, rep,
413 		rep->ttl, *qstate->env->now, only_match_scope_zero);
414 
415 	lock_rw_unlock(&lru_entry->lock);
416 	if (need_to_insert) {
417 		slabhash_insert(subnet_msg_cache, h, lru_entry, lru_entry->data,
418 			NULL);
419 	}
420 }
421 
422 /** Lookup in cache and reply true iff reply is sent. */
423 static int
424 lookup_and_reply(struct module_qstate *qstate, int id, struct subnet_qstate *sq, int prefetch)
425 {
426 	struct lruhash_entry *e;
427 	struct module_env *env = qstate->env;
428 	struct subnet_env *sne = (struct subnet_env*)env->modinfo[id];
429 	hashvalue_type h = query_info_hash(&qstate->qinfo, qstate->query_flags);
430 	struct subnet_msg_cache_data *data;
431 	struct ecs_data *ecs = &sq->ecs_client_in;
432 	struct addrtree *tree;
433 	struct addrnode *node;
434 	uint8_t scope;
435 
436 	memset(&sq->ecs_client_out, 0, sizeof(sq->ecs_client_out));
437 
438 	if (sq) {
439 		sq->qinfo_hash = h; /* Might be useful on cache miss */
440 		sq->qinfo_hash_calculated = 1;
441 	}
442 	e = slabhash_lookup(sne->subnet_msg_cache, h, &qstate->qinfo, 1);
443 	if (!e) return 0; /* qinfo not in cache */
444 	data = e->data;
445 	tree = (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4)?
446 		data->tree4 : data->tree6;
447 	if (!tree) { /* qinfo in cache but not for this family */
448 		lock_rw_unlock(&e->lock);
449 		return 0;
450 	}
451 	node = addrtree_find(tree, (addrkey_t*)ecs->subnet_addr,
452 		ecs->subnet_source_mask, *env->now);
453 	if (!node) { /* plain old cache miss */
454 		lock_rw_unlock(&e->lock);
455 		return 0;
456 	}
457 
458 	qstate->return_msg = tomsg(NULL, &qstate->qinfo,
459 		(struct reply_info *)node->elem, qstate->region, *env->now, 0,
460 		env->scratch);
461 	scope = (uint8_t)node->scope;
462 	lock_rw_unlock(&e->lock);
463 
464 	if (!qstate->return_msg) { /* Failed allocation or expired TTL */
465 		return 0;
466 	}
467 
468 	if (sq->subnet_downstream) { /* relay to interested client */
469 		sq->ecs_client_out.subnet_scope_mask = scope;
470 		sq->ecs_client_out.subnet_addr_fam = ecs->subnet_addr_fam;
471 		sq->ecs_client_out.subnet_source_mask = ecs->subnet_source_mask;
472 		memcpy(&sq->ecs_client_out.subnet_addr, &ecs->subnet_addr,
473 			INET6_SIZE);
474 		sq->ecs_client_out.subnet_validdata = 1;
475 	}
476 
477 	if (prefetch && *qstate->env->now >= ((struct reply_info *)node->elem)->prefetch_ttl) {
478 		qstate->need_refetch = 1;
479 	}
480 	return 1;
481 }
482 
483 /**
484  * Test first bits of addresses for equality. Caller is responsible
485  * for making sure that both a and b are at least net/8 octets long.
486  * @param a: first address.
487  * @param a: seconds address.
488  * @param net: Number of bits to test.
489  * @return: 1 if equal, 0 otherwise.
490  */
491 static int
492 common_prefix(uint8_t *a, uint8_t *b, uint8_t net)
493 {
494 	size_t n = (size_t)net / 8;
495 	return !memcmp(a, b, n) && ((net % 8) == 0 || a[n] == b[n]);
496 }
497 
498 static enum module_ext_state
499 eval_response(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
500 {
501 	struct subnet_env *sne = qstate->env->modinfo[id];
502 
503 	struct ecs_data *c_in  = &sq->ecs_client_in; /* rcvd from client */
504 	struct ecs_data *c_out = &sq->ecs_client_out;/* will send to client */
505 	struct ecs_data *s_in  = &sq->ecs_server_in; /* rcvd from auth */
506 	struct ecs_data *s_out = &sq->ecs_server_out;/* sent to auth */
507 
508 	memset(c_out, 0, sizeof(*c_out));
509 
510 	if (!qstate->return_msg) {
511 		/* already an answer and its not a message, but retain
512 		 * the actual rcode, instead of module_error, so send
513 		 * module_finished */
514 		return module_finished;
515 	}
516 
517 	/* We have not asked for subnet data */
518 	if (!sq->subnet_sent) {
519 		if (s_in->subnet_validdata)
520 			verbose(VERB_QUERY, "subnetcache: received spurious data");
521 		if (sq->subnet_downstream) /* Copy back to client */
522 			cp_edns_bad_response(c_out, c_in);
523 		return module_finished;
524 	}
525 
526 	/* subnet sent but nothing came back */
527 	if (!s_in->subnet_validdata) {
528 		/* The authority indicated no support for edns subnet. As a
529 		 * consequence the answer ended up in the regular cache. It
530 		 * is still useful to put it in the edns subnet cache for
531 		 * when a client explicitly asks for subnet specific answer. */
532 		verbose(VERB_QUERY, "subnetcache: Authority indicates no support");
533 		if(!sq->started_no_cache_store) {
534 			lock_rw_wrlock(&sne->biglock);
535 			update_cache(qstate, id);
536 			lock_rw_unlock(&sne->biglock);
537 		}
538 		if (sq->subnet_downstream)
539 			cp_edns_bad_response(c_out, c_in);
540 		return module_finished;
541 	}
542 
543 	/* Being here means we have asked for and got a subnet specific
544 	 * answer. Also, the answer from the authority is not yet cached
545 	 * anywhere. */
546 
547 	/* can we accept response? */
548 	if(s_out->subnet_addr_fam != s_in->subnet_addr_fam ||
549 		s_out->subnet_source_mask != s_in->subnet_source_mask ||
550 		!common_prefix(s_out->subnet_addr, s_in->subnet_addr,
551 			s_out->subnet_source_mask))
552 	{
553 		/* we can not accept, restart query without option */
554 		verbose(VERB_QUERY, "subnetcache: forged data");
555 		s_out->subnet_validdata = 0;
556 		(void)edns_opt_list_remove(&qstate->edns_opts_back_out,
557 			qstate->env->cfg->client_subnet_opcode);
558 		sq->subnet_sent = 0;
559 		return module_restart_next;
560 	}
561 
562 	lock_rw_wrlock(&sne->biglock);
563 	if(!sq->started_no_cache_store) {
564 		update_cache(qstate, id);
565 	}
566 	sne->num_msg_nocache++;
567 	lock_rw_unlock(&sne->biglock);
568 
569 	if (sq->subnet_downstream) {
570 		/* Client wants to see the answer, echo option back
571 		 * and adjust the scope. */
572 		c_out->subnet_addr_fam = c_in->subnet_addr_fam;
573 		c_out->subnet_source_mask = c_in->subnet_source_mask;
574 		memcpy(&c_out->subnet_addr, &c_in->subnet_addr, INET6_SIZE);
575 		c_out->subnet_scope_mask = sq->max_scope;
576 		/* Limit scope returned to client to scope used for caching. */
577 		if(c_out->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
578 			if(c_out->subnet_scope_mask >
579 				qstate->env->cfg->max_client_subnet_ipv4) {
580 				c_out->subnet_scope_mask =
581 					qstate->env->cfg->max_client_subnet_ipv4;
582 			}
583 		}
584 		else if(c_out->subnet_scope_mask >
585 				qstate->env->cfg->max_client_subnet_ipv6) {
586 				c_out->subnet_scope_mask =
587 					qstate->env->cfg->max_client_subnet_ipv6;
588 		}
589 		c_out->subnet_validdata = 1;
590 	}
591 	return module_finished;
592 }
593 
594 /** Parse EDNS opt data containing ECS */
595 static int
596 parse_subnet_option(struct edns_option* ecs_option, struct ecs_data* ecs)
597 {
598 	memset(ecs, 0, sizeof(*ecs));
599 	if (ecs_option->opt_len < 4)
600 		return 0;
601 
602 	ecs->subnet_addr_fam = sldns_read_uint16(ecs_option->opt_data);
603 	ecs->subnet_source_mask = ecs_option->opt_data[2];
604 	ecs->subnet_scope_mask = ecs_option->opt_data[3];
605 	/* remaining bytes indicate address */
606 
607 	/* validate input*/
608 	/* option length matches calculated length? */
609 	if (ecs_option->opt_len != (size_t)((ecs->subnet_source_mask+7)/8 + 4))
610 		return 0;
611 	if (ecs_option->opt_len - 4 > INET6_SIZE || ecs_option->opt_len == 0)
612 		return 0;
613 	if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
614 		if (ecs->subnet_source_mask > 32 || ecs->subnet_scope_mask > 32)
615 			return 0;
616 	} else if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6) {
617 		if (ecs->subnet_source_mask > 128 ||
618 			ecs->subnet_scope_mask > 128)
619 			return 0;
620 	} else
621 		return 0;
622 
623 	/* valid ECS data, write to ecs_data */
624 	if (copy_clear(ecs->subnet_addr, INET6_SIZE, ecs_option->opt_data + 4,
625 		ecs_option->opt_len - 4, ecs->subnet_source_mask))
626 		return 0;
627 	ecs->subnet_validdata = 1;
628 	return 1;
629 }
630 
631 void
632 subnet_option_from_ss(struct sockaddr_storage *ss, struct ecs_data* ecs,
633 	struct config_file* cfg)
634 {
635 	void* sinaddr;
636 
637 	/* Construct subnet option from original query */
638 	if(((struct sockaddr_in*)ss)->sin_family == AF_INET) {
639 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv4;
640 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP4;
641 		sinaddr = &((struct sockaddr_in*)ss)->sin_addr;
642 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
643 			(uint8_t *)sinaddr, INET_SIZE,
644 			ecs->subnet_source_mask)) {
645 			ecs->subnet_validdata = 1;
646 		}
647 	}
648 #ifdef INET6
649 	else {
650 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv6;
651 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP6;
652 		sinaddr = &((struct sockaddr_in6*)ss)->sin6_addr;
653 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
654 			(uint8_t *)sinaddr, INET6_SIZE,
655 			ecs->subnet_source_mask)) {
656 			ecs->subnet_validdata = 1;
657 		}
658 	}
659 #else
660 			/* We don't know how to handle ip6, just pass */
661 #endif /* INET6 */
662 }
663 
664 int
665 ecs_query_response(struct module_qstate* qstate, struct dns_msg* response,
666 	int id, void* ATTR_UNUSED(cbargs))
667 {
668 	struct subnet_qstate *sq;
669 
670 	if(!response || !(sq=(struct subnet_qstate*)qstate->minfo[id]))
671 		return 1;
672 
673 	if(sq->subnet_sent &&
674 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_REFUSED) {
675 		/* REFUSED response to ECS query, remove ECS option. */
676 		edns_opt_list_remove(&qstate->edns_opts_back_out,
677 			qstate->env->cfg->client_subnet_opcode);
678 		sq->subnet_sent = 0;
679 		memset(&sq->ecs_server_out, 0, sizeof(sq->ecs_server_out));
680 	} else if (!sq->track_max_scope &&
681 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_NOERROR &&
682 		response->rep->an_numrrsets > 0
683 		) {
684 		struct ub_packed_rrset_key* s = response->rep->rrsets[0];
685 		if(ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME &&
686 			query_dname_compare(qstate->qinfo.qname,
687 			s->rk.dname) == 0) {
688 			/* CNAME response for QNAME. From now on keep track of
689 			 * longest received ECS prefix for all queries on this
690 			 * qstate. */
691 			sq->track_max_scope = 1;
692 		}
693 	}
694 	return 1;
695 }
696 
697 /** verbose print edns subnet option in pretty print */
698 static void
699 subnet_log_print(const char* s, struct edns_option* ecs_opt)
700 {
701 	if(verbosity >= VERB_ALGO) {
702 		char buf[256];
703 		char* str = buf;
704 		size_t str_len = sizeof(buf);
705 		if(!ecs_opt) {
706 			verbose(VERB_ALGO, "%s (null)", s);
707 			return;
708 		}
709 		(void)sldns_wire2str_edns_subnet_print(&str, &str_len,
710 			ecs_opt->opt_data, ecs_opt->opt_len);
711 		verbose(VERB_ALGO, "%s %s", s, buf);
712 	}
713 }
714 
715 int
716 ecs_edns_back_parsed(struct module_qstate* qstate, int id,
717 	void* ATTR_UNUSED(cbargs))
718 {
719 	struct subnet_qstate *sq;
720 	struct edns_option* ecs_opt;
721 
722 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
723 		return 1;
724 	if((ecs_opt = edns_opt_list_find(
725 		qstate->edns_opts_back_in,
726 		qstate->env->cfg->client_subnet_opcode)) &&
727 		parse_subnet_option(ecs_opt, &sq->ecs_server_in) &&
728 		sq->subnet_sent && sq->ecs_server_in.subnet_validdata) {
729 			subnet_log_print("answer has edns subnet", ecs_opt);
730 			/* Only skip global cache store if we sent an ECS option
731 			 * and received one back. Answers from non-whitelisted
732 			 * servers will end up in global cache. Answers for
733 			 * queries with 0 source will not (unless nameserver
734 			 * does not support ECS). */
735 			qstate->no_cache_store = 1;
736 			if(!sq->track_max_scope || (sq->track_max_scope &&
737 				sq->ecs_server_in.subnet_scope_mask >
738 				sq->max_scope))
739 				sq->max_scope = sq->ecs_server_in.subnet_scope_mask;
740 	}
741 
742 	return 1;
743 }
744 
745 void
746 subnetmod_operate(struct module_qstate *qstate, enum module_ev event,
747 	int id, struct outbound_entry* outbound)
748 {
749 	struct subnet_env *sne = qstate->env->modinfo[id];
750 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
751 
752 	verbose(VERB_QUERY, "subnetcache[module %d] operate: extstate:%s "
753 		"event:%s", id, strextstate(qstate->ext_state[id]),
754 		strmodulevent(event));
755 	log_query_info(VERB_QUERY, "subnetcache operate: query", &qstate->qinfo);
756 
757 	if((event == module_event_new || event == module_event_pass) &&
758 		sq == NULL) {
759 		struct edns_option* ecs_opt;
760 		if(!subnet_new_qstate(qstate, id)) {
761 			qstate->return_msg = NULL;
762 			qstate->ext_state[id] = module_finished;
763 			return;
764 		}
765 
766 		sq = (struct subnet_qstate*)qstate->minfo[id];
767 
768 		if((ecs_opt = edns_opt_list_find(
769 			qstate->edns_opts_front_in,
770 			qstate->env->cfg->client_subnet_opcode))) {
771 			if(!parse_subnet_option(ecs_opt, &sq->ecs_client_in)) {
772 				/* Wrongly formatted ECS option. RFC mandates to
773 				 * return FORMERROR. */
774 				qstate->return_rcode = LDNS_RCODE_FORMERR;
775 				qstate->ext_state[id] = module_finished;
776 				return;
777 			}
778 			subnet_log_print("query has edns subnet", ecs_opt);
779 			sq->subnet_downstream = 1;
780 		}
781 		else if(qstate->mesh_info->reply_list) {
782 			subnet_option_from_ss(
783 				&qstate->mesh_info->reply_list->query_reply.client_addr,
784 				&sq->ecs_client_in, qstate->env->cfg);
785 		}
786 		else if(qstate->client_addr.ss_family != AF_UNSPEC) {
787 			subnet_option_from_ss(
788 				&qstate->client_addr,
789 				&sq->ecs_client_in, qstate->env->cfg);
790 		}
791 
792 		if(sq->ecs_client_in.subnet_validdata == 0) {
793 			/* No clients are interested in result or we could not
794 			 * parse it, we don't do client subnet */
795 			sq->ecs_server_out.subnet_validdata = 0;
796 			verbose(VERB_ALGO, "subnetcache: pass to next module");
797 			qstate->ext_state[id] = module_wait_module;
798 			return;
799 		}
800 
801 		/* Limit to minimum allowed source mask */
802 		if(sq->ecs_client_in.subnet_source_mask != 0 && (
803 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 &&
804 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv4) ||
805 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6 &&
806 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv6))) {
807 				qstate->return_rcode = LDNS_RCODE_REFUSED;
808 				qstate->ext_state[id] = module_finished;
809 				return;
810 		}
811 
812 		if(!sq->started_no_cache_lookup && !qstate->blacklist) {
813 			lock_rw_wrlock(&sne->biglock);
814 			if(qstate->mesh_info->reply_list &&
815 				lookup_and_reply(qstate, id, sq,
816 				qstate->env->cfg->prefetch)) {
817 				sne->num_msg_cache++;
818 				lock_rw_unlock(&sne->biglock);
819 				verbose(VERB_QUERY, "subnetcache: answered from cache");
820 				qstate->ext_state[id] = module_finished;
821 
822 				subnet_ecs_opt_list_append(&sq->ecs_client_out,
823 					&qstate->edns_opts_front_out, qstate,
824 					qstate->region);
825 				if(verbosity >= VERB_ALGO) {
826 					subnet_log_print("reply has edns subnet",
827 						edns_opt_list_find(
828 						qstate->edns_opts_front_out,
829 						qstate->env->cfg->
830 						client_subnet_opcode));
831 				}
832 				return;
833 			}
834 			lock_rw_unlock(&sne->biglock);
835 		}
836 
837 		sq->ecs_server_out.subnet_addr_fam =
838 			sq->ecs_client_in.subnet_addr_fam;
839 		sq->ecs_server_out.subnet_source_mask =
840 			sq->ecs_client_in.subnet_source_mask;
841 		/* Limit source prefix to configured maximum */
842 		if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4
843 			&& sq->ecs_server_out.subnet_source_mask >
844 			qstate->env->cfg->max_client_subnet_ipv4)
845 			sq->ecs_server_out.subnet_source_mask =
846 				qstate->env->cfg->max_client_subnet_ipv4;
847 		else if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6
848 			&& sq->ecs_server_out.subnet_source_mask >
849 			qstate->env->cfg->max_client_subnet_ipv6)
850 			sq->ecs_server_out.subnet_source_mask =
851 				qstate->env->cfg->max_client_subnet_ipv6;
852 		/* Safe to copy completely, even if the source is limited by the
853 		 * configuration. subnet_ecs_opt_list_append() will limit the address.
854 		 * */
855 		memcpy(&sq->ecs_server_out.subnet_addr,
856 			sq->ecs_client_in.subnet_addr, INET6_SIZE);
857 		sq->ecs_server_out.subnet_scope_mask = 0;
858 		sq->ecs_server_out.subnet_validdata = 1;
859 		if(sq->ecs_server_out.subnet_source_mask != 0 &&
860 			qstate->env->cfg->client_subnet_always_forward &&
861 			sq->subnet_downstream)
862 			/* ECS specific data required, do not look at the global
863 			 * cache in other modules. */
864 			qstate->no_cache_lookup = 1;
865 
866 		/* pass request to next module */
867 		verbose(VERB_ALGO,
868 			"subnetcache: not found in cache. pass to next module");
869 		qstate->ext_state[id] = module_wait_module;
870 		return;
871 	}
872 	/* Query handed back by next module, we have a 'final' answer */
873 	if(sq && event == module_event_moddone) {
874 		qstate->ext_state[id] = eval_response(qstate, id, sq);
875 		if(qstate->ext_state[id] == module_finished &&
876 			qstate->return_msg) {
877 			subnet_ecs_opt_list_append(&sq->ecs_client_out,
878 				&qstate->edns_opts_front_out, qstate,
879 				qstate->region);
880 			if(verbosity >= VERB_ALGO) {
881 				subnet_log_print("reply has edns subnet",
882 					edns_opt_list_find(
883 					qstate->edns_opts_front_out,
884 					qstate->env->cfg->
885 					client_subnet_opcode));
886 			}
887 		}
888 		qstate->no_cache_store = sq->started_no_cache_store;
889 		qstate->no_cache_lookup = sq->started_no_cache_lookup;
890 		return;
891 	}
892 	if(sq && outbound) {
893 		return;
894 	}
895 	/* We are being revisited */
896 	if(event == module_event_pass || event == module_event_new) {
897 		/* Just pass it on, we already did the work */
898 		verbose(VERB_ALGO, "subnetcache: pass to next module");
899 		qstate->ext_state[id] = module_wait_module;
900 		return;
901 	}
902 	if(!sq && (event == module_event_moddone)) {
903 		/* during priming, module done but we never started */
904 		qstate->ext_state[id] = module_finished;
905 		return;
906 	}
907 	log_err("subnetcache: bad event %s", strmodulevent(event));
908 	qstate->ext_state[id] = module_error;
909 	return;
910 }
911 
912 void
913 subnetmod_clear(struct module_qstate *ATTR_UNUSED(qstate),
914 	int ATTR_UNUSED(id))
915 {
916 	/* qstate has no data outside region */
917 }
918 
919 void
920 subnetmod_inform_super(struct module_qstate *ATTR_UNUSED(qstate),
921 	int ATTR_UNUSED(id), struct module_qstate *ATTR_UNUSED(super))
922 {
923 	/* Not used */
924 }
925 
926 size_t
927 subnetmod_get_mem(struct module_env *env, int id)
928 {
929 	struct subnet_env *sn_env = env->modinfo[id];
930 	if (!sn_env) return 0;
931 	return sizeof(*sn_env) +
932 		slabhash_get_mem(sn_env->subnet_msg_cache) +
933 		ecs_whitelist_get_mem(sn_env->whitelist);
934 }
935 
936 /**
937  * The module function block
938  */
939 static struct module_func_block subnetmod_block = {
940 	"subnetcache", &subnetmod_init, &subnetmod_deinit, &subnetmod_operate,
941 	&subnetmod_inform_super, &subnetmod_clear, &subnetmod_get_mem
942 };
943 
944 struct module_func_block*
945 subnetmod_get_funcblock(void)
946 {
947 	return &subnetmod_block;
948 }
949 
950 /** Wrappers for static functions to unit test */
951 size_t
952 unittest_wrapper_subnetmod_sizefunc(void *elemptr)
953 {
954 	return sizefunc(elemptr);
955 }
956 
957 #endif  /* CLIENT_SUBNET */
958