1 /*
2  * edns-subnet/subnetmod.c - edns subnet module. Must be called before validator
3  * and iterator.
4  *
5  * Copyright (c) 2013, NLnet Labs. All rights reserved.
6  *
7  * This software is open source.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  *
16  * Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  *
20  * Neither the name of the NLNET LABS nor the names of its contributors may
21  * be used to endorse or promote products derived from this software without
22  * specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
28  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
30  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  */
36  /**
37  * \file
38  * subnet module for unbound.
39  */
40 
41 #include "config.h"
42 
43 #ifdef CLIENT_SUBNET /* keeps splint happy */
44 
45 #include "edns-subnet/subnetmod.h"
46 #include "edns-subnet/edns-subnet.h"
47 #include "edns-subnet/addrtree.h"
48 #include "edns-subnet/subnet-whitelist.h"
49 
50 #include "services/mesh.h"
51 #include "services/cache/dns.h"
52 #include "util/module.h"
53 #include "util/regional.h"
54 #include "util/storage/slabhash.h"
55 #include "util/config_file.h"
56 #include "util/data/msgreply.h"
57 #include "sldns/sbuffer.h"
58 #include "iterator/iter_utils.h"
59 
60 /** externally called */
61 void
62 subnet_data_delete(void *d, void *ATTR_UNUSED(arg))
63 {
64 	struct subnet_msg_cache_data *r;
65 	r = (struct subnet_msg_cache_data*)d;
66 	addrtree_delete(r->tree4);
67 	addrtree_delete(r->tree6);
68 	free(r);
69 }
70 
71 /** externally called */
72 size_t
73 msg_cache_sizefunc(void *k, void *d)
74 {
75 	struct msgreply_entry *q = (struct msgreply_entry*)k;
76 	struct subnet_msg_cache_data *r = (struct subnet_msg_cache_data*)d;
77 	size_t s = sizeof(struct msgreply_entry)
78 		+ sizeof(struct subnet_msg_cache_data)
79 		+ q->key.qname_len + lock_get_mem(&q->entry.lock);
80 	s += addrtree_size(r->tree4);
81 	s += addrtree_size(r->tree6);
82 	return s;
83 }
84 
85 /** new query for ecs module */
86 static int
87 subnet_new_qstate(struct module_qstate *qstate, int id)
88 {
89 	struct subnet_qstate *sq = (struct subnet_qstate*)regional_alloc(
90 		qstate->region, sizeof(struct subnet_qstate));
91 	if(!sq)
92 		return 0;
93 	qstate->minfo[id] = sq;
94 	memset(sq, 0, sizeof(*sq));
95 	sq->started_no_cache_store = qstate->no_cache_store;
96 	sq->started_no_cache_lookup = qstate->no_cache_lookup;
97 	return 1;
98 }
99 
100 /** Add ecs struct to edns list, after parsing it to wire format. */
101 void
102 subnet_ecs_opt_list_append(struct ecs_data* ecs, struct edns_option** list,
103 	struct module_qstate *qstate, struct regional *region)
104 {
105 	size_t sn_octs, sn_octs_remainder;
106 	sldns_buffer* buf = qstate->env->scratch_buffer;
107 
108 	if(ecs->subnet_validdata) {
109 		log_assert(ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 ||
110 			ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6);
111 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP4 ||
112 			ecs->subnet_source_mask <=  INET_SIZE*8);
113 		log_assert(ecs->subnet_addr_fam != EDNSSUBNET_ADDRFAM_IP6 ||
114 			ecs->subnet_source_mask <= INET6_SIZE*8);
115 
116 		sn_octs = ecs->subnet_source_mask / 8;
117 		sn_octs_remainder =
118 			(size_t)((ecs->subnet_source_mask % 8)>0?1:0);
119 
120 		log_assert(sn_octs + sn_octs_remainder <= INET6_SIZE);
121 
122 		sldns_buffer_clear(buf);
123 		sldns_buffer_write_u16(buf, ecs->subnet_addr_fam);
124 		sldns_buffer_write_u8(buf, ecs->subnet_source_mask);
125 		sldns_buffer_write_u8(buf, ecs->subnet_scope_mask);
126 		sldns_buffer_write(buf, ecs->subnet_addr, sn_octs);
127 		if(sn_octs_remainder)
128 			sldns_buffer_write_u8(buf, ecs->subnet_addr[sn_octs] &
129 				~(0xFF >> (ecs->subnet_source_mask % 8)));
130 		sldns_buffer_flip(buf);
131 
132 		edns_opt_list_append(list,
133 				qstate->env->cfg->client_subnet_opcode,
134 				sn_octs + sn_octs_remainder + 4,
135 				sldns_buffer_begin(buf), region);
136 	}
137 }
138 
139 int ecs_whitelist_check(struct query_info* qinfo,
140 	uint16_t ATTR_UNUSED(flags), struct module_qstate* qstate,
141 	struct sockaddr_storage* addr, socklen_t addrlen,
142 	uint8_t* ATTR_UNUSED(zone), size_t ATTR_UNUSED(zonelen),
143 	struct regional *region, int id, void* ATTR_UNUSED(cbargs))
144 {
145 	struct subnet_qstate *sq;
146 	struct subnet_env *sn_env;
147 
148 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
149 		return 1;
150 	sn_env = (struct subnet_env*)qstate->env->modinfo[id];
151 
152 	/* Cache by default, might be disabled after parsing EDNS option
153 	 * received from nameserver. */
154 	if(!iter_stub_fwd_no_cache(qstate, &qstate->qinfo, NULL, NULL)) {
155 		qstate->no_cache_store = 0;
156 	}
157 
158 	if(sq->ecs_server_out.subnet_validdata && ((sq->subnet_downstream &&
159 		qstate->env->cfg->client_subnet_always_forward) ||
160 		ecs_is_whitelisted(sn_env->whitelist,
161 		addr, addrlen, qinfo->qname, qinfo->qname_len,
162 		qinfo->qclass))) {
163 		/* Address on whitelist or client query contains ECS option, we
164 		 * want to sent out ECS. Only add option if it is not already
165 		 * set. */
166 		if(!edns_opt_list_find(qstate->edns_opts_back_out,
167 			qstate->env->cfg->client_subnet_opcode)) {
168 			subnet_ecs_opt_list_append(&sq->ecs_server_out,
169 				&qstate->edns_opts_back_out, qstate, region);
170 		}
171 		sq->subnet_sent = 1;
172 	}
173 	else {
174 		/* Outgoing ECS option is set, but we don't want to sent it to
175 		 * this address, remove option. */
176 		if(edns_opt_list_find(qstate->edns_opts_back_out,
177 			qstate->env->cfg->client_subnet_opcode)) {
178 			edns_opt_list_remove(&qstate->edns_opts_back_out,
179 				qstate->env->cfg->client_subnet_opcode);
180 		}
181 		sq->subnet_sent = 0;
182 	}
183 	return 1;
184 }
185 
186 
187 void
188 subnet_markdel(void* key)
189 {
190 	struct msgreply_entry *e = (struct msgreply_entry*)key;
191 	e->key.qtype = 0;
192 	e->key.qclass = 0;
193 }
194 
195 int
196 subnetmod_init(struct module_env *env, int id)
197 {
198 	struct subnet_env *sn_env = (struct subnet_env*)calloc(1,
199 		sizeof(struct subnet_env));
200 	if(!sn_env) {
201 		log_err("malloc failure");
202 		return 0;
203 	}
204 	alloc_init(&sn_env->alloc, NULL, 0);
205 	env->modinfo[id] = (void*)sn_env;
206 	/* Copy msg_cache settings */
207 	sn_env->subnet_msg_cache = slabhash_create(env->cfg->msg_cache_slabs,
208 		HASH_DEFAULT_STARTARRAY, env->cfg->msg_cache_size,
209 		msg_cache_sizefunc, query_info_compare, query_entry_delete,
210 		subnet_data_delete, NULL);
211 	slabhash_setmarkdel(sn_env->subnet_msg_cache, &subnet_markdel);
212 	if(!sn_env->subnet_msg_cache) {
213 		log_err("subnetcache: could not create cache");
214 		free(sn_env);
215 		env->modinfo[id] = NULL;
216 		return 0;
217 	}
218 	/* whitelist for edns subnet capable servers */
219 	sn_env->whitelist = ecs_whitelist_create();
220 	if(!sn_env->whitelist ||
221 		!ecs_whitelist_apply_cfg(sn_env->whitelist, env->cfg)) {
222 		log_err("subnetcache: could not create ECS whitelist");
223 		slabhash_delete(sn_env->subnet_msg_cache);
224 		free(sn_env);
225 		env->modinfo[id] = NULL;
226 		return 0;
227 	}
228 
229 	verbose(VERB_QUERY, "subnetcache: option registered (%d)",
230 		env->cfg->client_subnet_opcode);
231 	/* Create new mesh state for all queries. */
232 	env->unique_mesh = 1;
233 	if(!edns_register_option(env->cfg->client_subnet_opcode,
234 		env->cfg->client_subnet_always_forward /* bypass cache */,
235 		1 /* no aggregation */, env)) {
236 		log_err("subnetcache: could not register opcode");
237 		ecs_whitelist_delete(sn_env->whitelist);
238 		slabhash_delete(sn_env->subnet_msg_cache);
239 		free(sn_env);
240 		env->modinfo[id] = NULL;
241 		return 0;
242 	}
243 	inplace_cb_register((void*)ecs_whitelist_check, inplace_cb_query, NULL,
244 		env, id);
245 	inplace_cb_register((void*)ecs_edns_back_parsed,
246 		inplace_cb_edns_back_parsed, NULL, env, id);
247 	inplace_cb_register((void*)ecs_query_response,
248 		inplace_cb_query_response, NULL, env, id);
249 	lock_rw_init(&sn_env->biglock);
250 	return 1;
251 }
252 
253 void
254 subnetmod_deinit(struct module_env *env, int id)
255 {
256 	struct subnet_env *sn_env;
257 	if(!env || !env->modinfo[id])
258 		return;
259 	sn_env = (struct subnet_env*)env->modinfo[id];
260 	lock_rw_destroy(&sn_env->biglock);
261 	inplace_cb_delete(env, inplace_cb_edns_back_parsed, id);
262 	inplace_cb_delete(env, inplace_cb_query, id);
263 	inplace_cb_delete(env, inplace_cb_query_response, id);
264 	ecs_whitelist_delete(sn_env->whitelist);
265 	slabhash_delete(sn_env->subnet_msg_cache);
266 	alloc_clear(&sn_env->alloc);
267 	free(sn_env);
268 	env->modinfo[id] = NULL;
269 }
270 
271 /** Tells client that upstream has no/improper support */
272 static void
273 cp_edns_bad_response(struct ecs_data *target, struct ecs_data *source)
274 {
275 	target->subnet_scope_mask  = 0;
276 	target->subnet_source_mask = source->subnet_source_mask;
277 	target->subnet_addr_fam    = source->subnet_addr_fam;
278 	memcpy(target->subnet_addr, source->subnet_addr, INET6_SIZE);
279 	target->subnet_validdata = 1;
280 }
281 
282 static void
283 delfunc(void *envptr, void *elemptr) {
284 	struct reply_info *elem = (struct reply_info *)elemptr;
285 	struct subnet_env *env = (struct subnet_env *)envptr;
286 	reply_info_parsedelete(elem, &env->alloc);
287 }
288 
289 static size_t
290 sizefunc(void *elemptr) {
291 	struct reply_info *elem  = (struct reply_info *)elemptr;
292 	return sizeof (struct reply_info) - sizeof (struct rrset_ref)
293 		+ elem->rrset_count * sizeof (struct rrset_ref)
294 		+ elem->rrset_count * sizeof (struct ub_packed_rrset_key *);
295 }
296 
297 /**
298  * Select tree from cache entry based on edns data.
299  * If for address family not present it will create a new one.
300  * NULL on failure to create. */
301 static struct addrtree*
302 get_tree(struct subnet_msg_cache_data *data, struct ecs_data *edns,
303 	struct subnet_env *env, struct config_file* cfg)
304 {
305 	struct addrtree *tree;
306 	if (edns->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
307 		if (!data->tree4)
308 			data->tree4 = addrtree_create(
309 				cfg->max_client_subnet_ipv4, &delfunc,
310 				&sizefunc, env, cfg->max_ecs_tree_size_ipv4);
311 		tree = data->tree4;
312 	} else {
313 		if (!data->tree6)
314 			data->tree6 = addrtree_create(
315 				cfg->max_client_subnet_ipv6, &delfunc,
316 				&sizefunc, env, cfg->max_ecs_tree_size_ipv6);
317 		tree = data->tree6;
318 	}
319 	return tree;
320 }
321 
322 static void
323 update_cache(struct module_qstate *qstate, int id)
324 {
325 	struct msgreply_entry *mrep_entry;
326 	struct addrtree *tree;
327 	struct reply_info *rep;
328 	struct query_info qinf;
329 	struct subnet_env *sne = qstate->env->modinfo[id];
330 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
331 	struct slabhash *subnet_msg_cache = sne->subnet_msg_cache;
332 	struct ecs_data *edns = &sq->ecs_client_in;
333 	size_t i;
334 
335 	/* We already calculated hash upon lookup (lookup_and_reply) if we were
336 	 * allowed to look in the ECS cache */
337 	hashvalue_type h = qstate->minfo[id] &&
338 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash_calculated?
339 		((struct subnet_qstate*)qstate->minfo[id])->qinfo_hash :
340 		query_info_hash(&qstate->qinfo, qstate->query_flags);
341 	/* Step 1, general qinfo lookup */
342 	struct lruhash_entry *lru_entry = slabhash_lookup(subnet_msg_cache, h,
343 		&qstate->qinfo, 1);
344 	int need_to_insert = (lru_entry == NULL);
345 	if (!lru_entry) {
346 		void* data = calloc(1,
347 			sizeof(struct subnet_msg_cache_data));
348 		if(!data) {
349 			log_err("malloc failed");
350 			return;
351 		}
352 		qinf = qstate->qinfo;
353 		qinf.qname = memdup(qstate->qinfo.qname,
354 			qstate->qinfo.qname_len);
355 		if(!qinf.qname) {
356 			free(data);
357 			log_err("memdup failed");
358 			return;
359 		}
360 		mrep_entry = query_info_entrysetup(&qinf, data, h);
361 		free(qinf.qname); /* if qname 'consumed', it is set to NULL */
362 		if (!mrep_entry) {
363 			free(data);
364 			log_err("query_info_entrysetup failed");
365 			return;
366 		}
367 		lru_entry = &mrep_entry->entry;
368 		lock_rw_wrlock(&lru_entry->lock);
369 	}
370 	/* lru_entry->lock is locked regardless of how we got here,
371 	 * either from the slabhash_lookup, or above in the new allocated */
372 	/* Step 2, find the correct tree */
373 	if (!(tree = get_tree(lru_entry->data, edns, sne, qstate->env->cfg))) {
374 		lock_rw_unlock(&lru_entry->lock);
375 		log_err("subnetcache: cache insertion failed");
376 		return;
377 	}
378 	lock_quick_lock(&sne->alloc.lock);
379 	rep = reply_info_copy(qstate->return_msg->rep, &sne->alloc, NULL);
380 	lock_quick_unlock(&sne->alloc.lock);
381 	if (!rep) {
382 		lock_rw_unlock(&lru_entry->lock);
383 		log_err("subnetcache: cache insertion failed");
384 		return;
385 	}
386 
387 	/* store RRsets */
388 	for(i=0; i<rep->rrset_count; i++) {
389 		rep->ref[i].key = rep->rrsets[i];
390 		rep->ref[i].id = rep->rrsets[i]->id;
391 	}
392 	reply_info_set_ttls(rep, *qstate->env->now);
393 	rep->flags |= (BIT_RA | BIT_QR); /* fix flags to be sensible for */
394 	rep->flags &= ~(BIT_AA | BIT_CD);/* a reply based on the cache   */
395 	addrtree_insert(tree, (addrkey_t*)edns->subnet_addr,
396 		edns->subnet_source_mask, sq->max_scope, rep,
397 		rep->ttl, *qstate->env->now);
398 
399 	lock_rw_unlock(&lru_entry->lock);
400 	if (need_to_insert) {
401 		slabhash_insert(subnet_msg_cache, h, lru_entry, lru_entry->data,
402 			NULL);
403 	}
404 }
405 
406 /** Lookup in cache and reply true iff reply is sent. */
407 static int
408 lookup_and_reply(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
409 {
410 	struct lruhash_entry *e;
411 	struct module_env *env = qstate->env;
412 	struct subnet_env *sne = (struct subnet_env*)env->modinfo[id];
413 	hashvalue_type h = query_info_hash(&qstate->qinfo, qstate->query_flags);
414 	struct subnet_msg_cache_data *data;
415 	struct ecs_data *ecs = &sq->ecs_client_in;
416 	struct addrtree *tree;
417 	struct addrnode *node;
418 	uint8_t scope;
419 
420 	memset(&sq->ecs_client_out, 0, sizeof(sq->ecs_client_out));
421 
422 	if (sq) {
423 		sq->qinfo_hash = h; /* Might be useful on cache miss */
424 		sq->qinfo_hash_calculated = 1;
425 	}
426 	e = slabhash_lookup(sne->subnet_msg_cache, h, &qstate->qinfo, 1);
427 	if (!e) return 0; /* qinfo not in cache */
428 	data = e->data;
429 	tree = (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4)?
430 		data->tree4 : data->tree6;
431 	if (!tree) { /* qinfo in cache but not for this family */
432 		lock_rw_unlock(&e->lock);
433 		return 0;
434 	}
435 	node = addrtree_find(tree, (addrkey_t*)ecs->subnet_addr,
436 		ecs->subnet_source_mask, *env->now);
437 	if (!node) { /* plain old cache miss */
438 		lock_rw_unlock(&e->lock);
439 		return 0;
440 	}
441 
442 	qstate->return_msg = tomsg(NULL, &qstate->qinfo,
443 		(struct reply_info *)node->elem, qstate->region, *env->now, 0,
444 		env->scratch);
445 	scope = (uint8_t)node->scope;
446 	lock_rw_unlock(&e->lock);
447 
448 	if (!qstate->return_msg) { /* Failed allocation or expired TTL */
449 		return 0;
450 	}
451 
452 	if (sq->subnet_downstream) { /* relay to interested client */
453 		sq->ecs_client_out.subnet_scope_mask = scope;
454 		sq->ecs_client_out.subnet_addr_fam = ecs->subnet_addr_fam;
455 		sq->ecs_client_out.subnet_source_mask = ecs->subnet_source_mask;
456 		memcpy(&sq->ecs_client_out.subnet_addr, &ecs->subnet_addr,
457 			INET6_SIZE);
458 		sq->ecs_client_out.subnet_validdata = 1;
459 	}
460 	return 1;
461 }
462 
463 /**
464  * Test first bits of addresses for equality. Caller is responsible
465  * for making sure that both a and b are at least net/8 octets long.
466  * @param a: first address.
467  * @param a: seconds address.
468  * @param net: Number of bits to test.
469  * @return: 1 if equal, 0 otherwise.
470  */
471 static int
472 common_prefix(uint8_t *a, uint8_t *b, uint8_t net)
473 {
474 	size_t n = (size_t)net / 8;
475 	return !memcmp(a, b, n) && ((net % 8) == 0 || a[n] == b[n]);
476 }
477 
478 static enum module_ext_state
479 eval_response(struct module_qstate *qstate, int id, struct subnet_qstate *sq)
480 {
481 	struct subnet_env *sne = qstate->env->modinfo[id];
482 
483 	struct ecs_data *c_in  = &sq->ecs_client_in; /* rcvd from client */
484 	struct ecs_data *c_out = &sq->ecs_client_out;/* will send to client */
485 	struct ecs_data *s_in  = &sq->ecs_server_in; /* rcvd from auth */
486 	struct ecs_data *s_out = &sq->ecs_server_out;/* sent to auth */
487 
488 	memset(c_out, 0, sizeof(*c_out));
489 
490 	if (!qstate->return_msg) {
491 		/* already an answer and its not a message, but retain
492 		 * the actual rcode, instead of module_error, so send
493 		 * module_finished */
494 		return module_finished;
495 	}
496 
497 	/* We have not asked for subnet data */
498 	if (!sq->subnet_sent) {
499 		if (s_in->subnet_validdata)
500 			verbose(VERB_QUERY, "subnetcache: received spurious data");
501 		if (sq->subnet_downstream) /* Copy back to client */
502 			cp_edns_bad_response(c_out, c_in);
503 		return module_finished;
504 	}
505 
506 	/* subnet sent but nothing came back */
507 	if (!s_in->subnet_validdata) {
508 		/* The authority indicated no support for edns subnet. As a
509 		 * consequence the answer ended up in the regular cache. It
510 		 * is still useful to put it in the edns subnet cache for
511 		 * when a client explicitly asks for subnet specific answer. */
512 		verbose(VERB_QUERY, "subnetcache: Authority indicates no support");
513 		if(!sq->started_no_cache_store) {
514 			lock_rw_wrlock(&sne->biglock);
515 			update_cache(qstate, id);
516 			lock_rw_unlock(&sne->biglock);
517 		}
518 		if (sq->subnet_downstream)
519 			cp_edns_bad_response(c_out, c_in);
520 		return module_finished;
521 	}
522 
523 	/* Being here means we have asked for and got a subnet specific
524 	 * answer. Also, the answer from the authority is not yet cached
525 	 * anywhere. */
526 
527 	/* can we accept response? */
528 	if(s_out->subnet_addr_fam != s_in->subnet_addr_fam ||
529 		s_out->subnet_source_mask != s_in->subnet_source_mask ||
530 		!common_prefix(s_out->subnet_addr, s_in->subnet_addr,
531 			s_out->subnet_source_mask))
532 	{
533 		/* we can not accept, restart query without option */
534 		verbose(VERB_QUERY, "subnetcache: forged data");
535 		s_out->subnet_validdata = 0;
536 		(void)edns_opt_list_remove(&qstate->edns_opts_back_out,
537 			qstate->env->cfg->client_subnet_opcode);
538 		sq->subnet_sent = 0;
539 		return module_restart_next;
540 	}
541 
542 	lock_rw_wrlock(&sne->biglock);
543 	if(!sq->started_no_cache_store) {
544 		update_cache(qstate, id);
545 	}
546 	sne->num_msg_nocache++;
547 	lock_rw_unlock(&sne->biglock);
548 
549 	if (sq->subnet_downstream) {
550 		/* Client wants to see the answer, echo option back
551 		 * and adjust the scope. */
552 		c_out->subnet_addr_fam = c_in->subnet_addr_fam;
553 		c_out->subnet_source_mask = c_in->subnet_source_mask;
554 		memcpy(&c_out->subnet_addr, &c_in->subnet_addr, INET6_SIZE);
555 		c_out->subnet_scope_mask = sq->max_scope;
556 		/* Limit scope returned to client to scope used for caching. */
557 		if(c_out->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
558 			if(c_out->subnet_scope_mask >
559 				qstate->env->cfg->max_client_subnet_ipv4) {
560 				c_out->subnet_scope_mask =
561 					qstate->env->cfg->max_client_subnet_ipv4;
562 			}
563 		}
564 		else if(c_out->subnet_scope_mask >
565 				qstate->env->cfg->max_client_subnet_ipv6) {
566 				c_out->subnet_scope_mask =
567 					qstate->env->cfg->max_client_subnet_ipv6;
568 		}
569 		c_out->subnet_validdata = 1;
570 	}
571 	return module_finished;
572 }
573 
574 /** Parse EDNS opt data containing ECS */
575 static int
576 parse_subnet_option(struct edns_option* ecs_option, struct ecs_data* ecs)
577 {
578 	memset(ecs, 0, sizeof(*ecs));
579 	if (ecs_option->opt_len < 4)
580 		return 0;
581 
582 	ecs->subnet_addr_fam = sldns_read_uint16(ecs_option->opt_data);
583 	ecs->subnet_source_mask = ecs_option->opt_data[2];
584 	ecs->subnet_scope_mask = ecs_option->opt_data[3];
585 	/* remaining bytes indicate address */
586 
587 	/* validate input*/
588 	/* option length matches calculated length? */
589 	if (ecs_option->opt_len != (size_t)((ecs->subnet_source_mask+7)/8 + 4))
590 		return 0;
591 	if (ecs_option->opt_len - 4 > INET6_SIZE || ecs_option->opt_len == 0)
592 		return 0;
593 	if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4) {
594 		if (ecs->subnet_source_mask > 32 || ecs->subnet_scope_mask > 32)
595 			return 0;
596 	} else if (ecs->subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6) {
597 		if (ecs->subnet_source_mask > 128 ||
598 			ecs->subnet_scope_mask > 128)
599 			return 0;
600 	} else
601 		return 0;
602 
603 	/* valid ECS data, write to ecs_data */
604 	if (copy_clear(ecs->subnet_addr, INET6_SIZE, ecs_option->opt_data + 4,
605 		ecs_option->opt_len - 4, ecs->subnet_source_mask))
606 		return 0;
607 	ecs->subnet_validdata = 1;
608 	return 1;
609 }
610 
611 void
612 subnet_option_from_ss(struct sockaddr_storage *ss, struct ecs_data* ecs,
613 	struct config_file* cfg)
614 {
615 	void* sinaddr;
616 
617 	/* Construct subnet option from original query */
618 	if(((struct sockaddr_in*)ss)->sin_family == AF_INET) {
619 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv4;
620 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP4;
621 		sinaddr = &((struct sockaddr_in*)ss)->sin_addr;
622 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
623 			(uint8_t *)sinaddr, INET_SIZE,
624 			ecs->subnet_source_mask)) {
625 			ecs->subnet_validdata = 1;
626 		}
627 	}
628 #ifdef INET6
629 	else {
630 		ecs->subnet_source_mask = cfg->max_client_subnet_ipv6;
631 		ecs->subnet_addr_fam = EDNSSUBNET_ADDRFAM_IP6;
632 		sinaddr = &((struct sockaddr_in6*)ss)->sin6_addr;
633 		if (!copy_clear( ecs->subnet_addr, INET6_SIZE,
634 			(uint8_t *)sinaddr, INET6_SIZE,
635 			ecs->subnet_source_mask)) {
636 			ecs->subnet_validdata = 1;
637 		}
638 	}
639 #else
640 			/* We don't know how to handle ip6, just pass */
641 #endif /* INET6 */
642 }
643 
644 int
645 ecs_query_response(struct module_qstate* qstate, struct dns_msg* response,
646 	int id, void* ATTR_UNUSED(cbargs))
647 {
648 	struct subnet_qstate *sq;
649 
650 	if(!response || !(sq=(struct subnet_qstate*)qstate->minfo[id]))
651 		return 1;
652 
653 	if(sq->subnet_sent &&
654 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_REFUSED) {
655 		/* REFUSED response to ECS query, remove ECS option. */
656 		edns_opt_list_remove(&qstate->edns_opts_back_out,
657 			qstate->env->cfg->client_subnet_opcode);
658 		sq->subnet_sent = 0;
659 		memset(&sq->ecs_server_out, 0, sizeof(sq->ecs_server_out));
660 	} else if (!sq->track_max_scope &&
661 		FLAGS_GET_RCODE(response->rep->flags) == LDNS_RCODE_NOERROR &&
662 		response->rep->an_numrrsets > 0
663 		) {
664 		struct ub_packed_rrset_key* s = response->rep->rrsets[0];
665 		if(ntohs(s->rk.type) == LDNS_RR_TYPE_CNAME &&
666 			query_dname_compare(qstate->qinfo.qname,
667 			s->rk.dname) == 0) {
668 			/* CNAME response for QNAME. From now on keep track of
669 			 * longest received ECS prefix for all queries on this
670 			 * qstate. */
671 			sq->track_max_scope = 1;
672 		}
673 	}
674 	return 1;
675 }
676 
677 int
678 ecs_edns_back_parsed(struct module_qstate* qstate, int id,
679 	void* ATTR_UNUSED(cbargs))
680 {
681 	struct subnet_qstate *sq;
682 	struct edns_option* ecs_opt;
683 
684 	if(!(sq=(struct subnet_qstate*)qstate->minfo[id]))
685 		return 1;
686 	if((ecs_opt = edns_opt_list_find(
687 		qstate->edns_opts_back_in,
688 		qstate->env->cfg->client_subnet_opcode)) &&
689 		parse_subnet_option(ecs_opt, &sq->ecs_server_in) &&
690 		sq->subnet_sent && sq->ecs_server_in.subnet_validdata) {
691 			/* Only skip global cache store if we sent an ECS option
692 			 * and received one back. Answers from non-whitelisted
693 			 * servers will end up in global cache. Answers for
694 			 * queries with 0 source will not (unless nameserver
695 			 * does not support ECS). */
696 			qstate->no_cache_store = 1;
697 			if(!sq->track_max_scope || (sq->track_max_scope &&
698 				sq->ecs_server_in.subnet_scope_mask >
699 				sq->max_scope))
700 				sq->max_scope = sq->ecs_server_in.subnet_scope_mask;
701 	}
702 
703 	return 1;
704 }
705 
706 void
707 subnetmod_operate(struct module_qstate *qstate, enum module_ev event,
708 	int id, struct outbound_entry* outbound)
709 {
710 	struct subnet_env *sne = qstate->env->modinfo[id];
711 	struct subnet_qstate *sq = (struct subnet_qstate*)qstate->minfo[id];
712 
713 	verbose(VERB_QUERY, "subnetcache[module %d] operate: extstate:%s "
714 		"event:%s", id, strextstate(qstate->ext_state[id]),
715 		strmodulevent(event));
716 	log_query_info(VERB_QUERY, "subnetcache operate: query", &qstate->qinfo);
717 
718 	if((event == module_event_new || event == module_event_pass) &&
719 		sq == NULL) {
720 		struct edns_option* ecs_opt;
721 		if(!subnet_new_qstate(qstate, id)) {
722 			qstate->return_msg = NULL;
723 			qstate->ext_state[id] = module_finished;
724 			return;
725 		}
726 
727 		sq = (struct subnet_qstate*)qstate->minfo[id];
728 
729 		if((ecs_opt = edns_opt_list_find(
730 			qstate->edns_opts_front_in,
731 			qstate->env->cfg->client_subnet_opcode))) {
732 			if(!parse_subnet_option(ecs_opt, &sq->ecs_client_in)) {
733 				/* Wrongly formatted ECS option. RFC mandates to
734 				 * return FORMERROR. */
735 				qstate->return_rcode = LDNS_RCODE_FORMERR;
736 				qstate->ext_state[id] = module_finished;
737 				return;
738 			}
739 			sq->subnet_downstream = 1;
740 		}
741 		else if(qstate->mesh_info->reply_list) {
742 			subnet_option_from_ss(
743 				&qstate->mesh_info->reply_list->query_reply.addr,
744 				&sq->ecs_client_in, qstate->env->cfg);
745 		}
746 
747 		if(sq->ecs_client_in.subnet_validdata == 0) {
748 			/* No clients are interested in result or we could not
749 			 * parse it, we don't do client subnet */
750 			sq->ecs_server_out.subnet_validdata = 0;
751 			verbose(VERB_ALGO, "subnetcache: pass to next module");
752 			qstate->ext_state[id] = module_wait_module;
753 			return;
754 		}
755 
756 		/* Limit to minimum allowed source mask */
757 		if(sq->ecs_client_in.subnet_source_mask != 0 && (
758 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4 &&
759 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv4) ||
760 			(sq->ecs_client_in.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6 &&
761 			 sq->ecs_client_in.subnet_source_mask < qstate->env->cfg->min_client_subnet_ipv6))) {
762 				qstate->return_rcode = LDNS_RCODE_REFUSED;
763 				qstate->ext_state[id] = module_finished;
764 				return;
765 		}
766 
767 		if(!sq->started_no_cache_lookup && !qstate->blacklist) {
768 			lock_rw_wrlock(&sne->biglock);
769 			if(lookup_and_reply(qstate, id, sq)) {
770 				sne->num_msg_cache++;
771 				lock_rw_unlock(&sne->biglock);
772 				verbose(VERB_QUERY, "subnetcache: answered from cache");
773 				qstate->ext_state[id] = module_finished;
774 
775 				subnet_ecs_opt_list_append(&sq->ecs_client_out,
776 					&qstate->edns_opts_front_out, qstate,
777 					qstate->region);
778 				return;
779 			}
780 			lock_rw_unlock(&sne->biglock);
781 		}
782 
783 		sq->ecs_server_out.subnet_addr_fam =
784 			sq->ecs_client_in.subnet_addr_fam;
785 		sq->ecs_server_out.subnet_source_mask =
786 			sq->ecs_client_in.subnet_source_mask;
787 		/* Limit source prefix to configured maximum */
788 		if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP4
789 			&& sq->ecs_server_out.subnet_source_mask >
790 			qstate->env->cfg->max_client_subnet_ipv4)
791 			sq->ecs_server_out.subnet_source_mask =
792 				qstate->env->cfg->max_client_subnet_ipv4;
793 		else if(sq->ecs_server_out.subnet_addr_fam == EDNSSUBNET_ADDRFAM_IP6
794 			&& sq->ecs_server_out.subnet_source_mask >
795 			qstate->env->cfg->max_client_subnet_ipv6)
796 			sq->ecs_server_out.subnet_source_mask =
797 				qstate->env->cfg->max_client_subnet_ipv6;
798 		/* Safe to copy completely, even if the source is limited by the
799 		 * configuration. subnet_ecs_opt_list_append() will limit the address.
800 		 * */
801 		memcpy(&sq->ecs_server_out.subnet_addr,
802 			sq->ecs_client_in.subnet_addr, INET6_SIZE);
803 		sq->ecs_server_out.subnet_scope_mask = 0;
804 		sq->ecs_server_out.subnet_validdata = 1;
805 		if(sq->ecs_server_out.subnet_source_mask != 0 &&
806 			qstate->env->cfg->client_subnet_always_forward &&
807 			sq->subnet_downstream)
808 			/* ECS specific data required, do not look at the global
809 			 * cache in other modules. */
810 			qstate->no_cache_lookup = 1;
811 
812 		/* pass request to next module */
813 		verbose(VERB_ALGO,
814 			"subnetcache: not found in cache. pass to next module");
815 		qstate->ext_state[id] = module_wait_module;
816 		return;
817 	}
818 	/* Query handed back by next module, we have a 'final' answer */
819 	if(sq && event == module_event_moddone) {
820 		qstate->ext_state[id] = eval_response(qstate, id, sq);
821 		if(qstate->ext_state[id] == module_finished &&
822 			qstate->return_msg) {
823 			subnet_ecs_opt_list_append(&sq->ecs_client_out,
824 				&qstate->edns_opts_front_out, qstate,
825 				qstate->region);
826 		}
827 		qstate->no_cache_store = sq->started_no_cache_store;
828 		qstate->no_cache_lookup = sq->started_no_cache_lookup;
829 		return;
830 	}
831 	if(sq && outbound) {
832 		return;
833 	}
834 	/* We are being revisited */
835 	if(event == module_event_pass || event == module_event_new) {
836 		/* Just pass it on, we already did the work */
837 		verbose(VERB_ALGO, "subnetcache: pass to next module");
838 		qstate->ext_state[id] = module_wait_module;
839 		return;
840 	}
841 	if(!sq && (event == module_event_moddone)) {
842 		/* during priming, module done but we never started */
843 		qstate->ext_state[id] = module_finished;
844 		return;
845 	}
846 	log_err("subnetcache: bad event %s", strmodulevent(event));
847 	qstate->ext_state[id] = module_error;
848 	return;
849 }
850 
851 void
852 subnetmod_clear(struct module_qstate *ATTR_UNUSED(qstate),
853 	int ATTR_UNUSED(id))
854 {
855 	/* qstate has no data outside region */
856 }
857 
858 void
859 subnetmod_inform_super(struct module_qstate *ATTR_UNUSED(qstate),
860 	int ATTR_UNUSED(id), struct module_qstate *ATTR_UNUSED(super))
861 {
862 	/* Not used */
863 }
864 
865 size_t
866 subnetmod_get_mem(struct module_env *env, int id)
867 {
868 	struct subnet_env *sn_env = env->modinfo[id];
869 	if (!sn_env) return 0;
870 	return sizeof(*sn_env) +
871 		slabhash_get_mem(sn_env->subnet_msg_cache) +
872 		ecs_whitelist_get_mem(sn_env->whitelist);
873 }
874 
875 /**
876  * The module function block
877  */
878 static struct module_func_block subnetmod_block = {
879 	"subnetcache", &subnetmod_init, &subnetmod_deinit, &subnetmod_operate,
880 	&subnetmod_inform_super, &subnetmod_clear, &subnetmod_get_mem
881 };
882 
883 struct module_func_block*
884 subnetmod_get_funcblock(void)
885 {
886 	return &subnetmod_block;
887 }
888 
889 /** Wrappers for static functions to unit test */
890 size_t
891 unittest_wrapper_subnetmod_sizefunc(void *elemptr)
892 {
893 	return sizefunc(elemptr);
894 }
895 
896 #endif  /* CLIENT_SUBNET */
897