1 /*  Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2  *  SPDX-License-Identifier: GPL-3.0-or-later
3  */
4 
5 #include "lib/cache/impl.h"
6 
7 #include "lib/dnssec/ta.h"
8 #include "lib/layer/iterate.h"
9 
10 /* The whole file only exports peek_nosync().
11  * Forwards for larger chunks of code: */
12 
13 static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val,
14 			   uint8_t lowest_rank);
15 static int closest_NS(struct kr_cache *cache, struct key *k, entry_list_t el,
16 			struct kr_query *qry, bool only_NS, bool is_DS);
17 static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
18 		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);
19 static int answer_dname_hit(kr_layer_t *ctx, knot_pkt_t *pkt, const knot_dname_t *dname_owner,
20 		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);
21 static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
22 		    uint16_t type, uint8_t lowest_rank,
23 		    const struct kr_query *qry, struct kr_cache *cache);
24 
25 static int peek_encloser(
26 	struct key *k, struct answer *ans, int sname_labels,
27 	uint8_t lowest_rank, const struct kr_query *qry, struct kr_cache *cache);
28 
29 
nsec_p_init(struct nsec_p * nsec_p,knot_db_val_t nsec_p_entry,bool with_knot)30 static int nsec_p_init(struct nsec_p *nsec_p, knot_db_val_t nsec_p_entry, bool with_knot)
31 {
32 	const size_t stamp_len = sizeof(uint32_t);
33 	if (nsec_p_entry.len <= stamp_len) { /* plain NSEC if equal */
34 		nsec_p->raw = NULL;
35 		nsec_p->hash = 0;
36 		return kr_ok();
37 	}
38 	nsec_p->raw = (uint8_t *)nsec_p_entry.data + stamp_len;
39 	nsec_p->hash = nsec_p_mkHash(nsec_p->raw);
40 	if (!with_knot) return kr_ok();
41 	/* Convert NSEC3 params to another format. */
42 	const dnssec_binary_t rdata = {
43 		.size = nsec_p_rdlen(nsec_p->raw),
44 		.data = (uint8_t *)/*const-cast*/nsec_p->raw,
45 	};
46 	int ret = dnssec_nsec3_params_from_rdata(&nsec_p->libknot, &rdata);
47 	return ret == DNSSEC_EOK ? kr_ok() : kr_error(ret);
48 }
49 
nsec_p_cleanup(struct nsec_p * nsec_p)50 static void nsec_p_cleanup(struct nsec_p *nsec_p)
51 {
52 	dnssec_binary_free(&nsec_p->libknot.salt);
53 	/* We don't really need to clear it, but it's not large. (`salt` zeroed above) */
54 	memset(nsec_p, 0, sizeof(*nsec_p));
55 }
56 
57 /** Compute new TTL for nsec_p entry, using SOA serial arith.
58  * \param new_ttl (optionally) write the new TTL (even if negative)
59  * \return error code, e.g. kr_error(ESTALE) */
nsec_p_ttl(knot_db_val_t entry,const uint32_t timestamp,int32_t * new_ttl)60 static int nsec_p_ttl(knot_db_val_t entry, const uint32_t timestamp, int32_t *new_ttl)
61 {
62 	if (kr_fails_assert(entry.data))
63 		return kr_error(EINVAL);
64 	uint32_t stamp;
65 	if (!entry.len)
66 		return kr_error(ENOENT);
67 	if (kr_fails_assert(entry.len >= sizeof(stamp)))
68 		return kr_error(EILSEQ);
69 	memcpy(&stamp, entry.data, sizeof(stamp));
70 	int32_t newttl = stamp - timestamp;
71 	if (new_ttl) *new_ttl = newttl;
72 	return newttl < 0 ? kr_error(ESTALE) : kr_ok();
73 }
74 
get_lowest_rank(const struct kr_query * qry,const knot_dname_t * name,const uint16_t type)75 static uint8_t get_lowest_rank(const struct kr_query *qry, const knot_dname_t *name, const uint16_t type)
76 {
77 	/* Shut up linters. */
78 	if (unlikely(!qry || !qry->request)) abort();
79 	/* TODO: move rank handling into the iterator (DNSSEC_* flags)? */
80 	const bool allow_unverified =
81 		knot_wire_get_cd(qry->request->qsource.packet->wire) || qry->flags.STUB;
82 		/* in stub mode we don't trust RRs anyway ^^ */
83 	if (qry->flags.NONAUTH) {
84 		return KR_RANK_INITIAL;
85 		/* Note: there's little sense in validation status for non-auth records.
86 		 * In case of using NONAUTH to get NS IPs, knowing that you ask correct
87 		 * IP doesn't matter much for security; it matters whether you can
88 		 * validate the answers from the NS.
89 		 */
90 	} else if (!allow_unverified) {
91 		/* Records not present under any TA don't have their security
92 		 * verified at all, so we also accept low ranks in that case. */
93 		const bool ta_covers = kr_ta_closest(qry->request->ctx, name, type);
94 		/* ^ TODO: performance?  TODO: stype - call sites */
95 		if (ta_covers) {
96 			return KR_RANK_INSECURE | KR_RANK_AUTH;
97 		} /* else fallthrough */
98 	}
99 	return KR_RANK_INITIAL | KR_RANK_AUTH;
100 }
101 
102 
103 /** Almost whole .produce phase for the cache module.
104  * \note we don't transition to KR_STATE_FAIL even in case of "unexpected errors".
105  */
peek_nosync(kr_layer_t * ctx,knot_pkt_t * pkt)106 int peek_nosync(kr_layer_t *ctx, knot_pkt_t *pkt)
107 {
108 	struct kr_request *req = ctx->req;
109 	struct kr_query *qry = req->current_query;
110 	struct kr_cache *cache = &req->ctx->cache;
111 
112 	struct key k_storage, *k = &k_storage;
113 	int ret = kr_dname_lf(k->buf, qry->sname, false);
114 	if (kr_fails_assert(ret == 0))
115 		return ctx->state;
116 
117 	const uint8_t lowest_rank = get_lowest_rank(qry, qry->sname, qry->stype);
118 
119 	/**** 1. find the name or the closest (available) zone, not considering wildcards
120 	 **** 1a. exact name+type match (can be negative, mainly in insecure zones) */
121 	{
122 		knot_db_val_t key = key_exact_type_maypkt(k, qry->stype);
123 		knot_db_val_t val = { NULL, 0 };
124 		ret = cache_op(cache, read, &key, &val, 1);
125 		if (!ret) {
126 			/* found an entry: test conditions, materialize into pkt, etc. */
127 			ret = found_exact_hit(ctx, pkt, val, lowest_rank);
128 		}
129 	}
130 	if (!ret) {
131 		return KR_STATE_DONE;
132 	} else if (kr_fails_assert(ret == kr_error(ENOENT))) {
133 		VERBOSE_MSG(qry, "=> exact hit error: %d %s\n", ret, kr_strerror(ret));
134 		return ctx->state;
135 	}
136 
137 	/**** 1b. otherwise, find the longest prefix zone/xNAME (with OK time+rank). [...] */
138 	k->zname = qry->sname;
139 	ret = kr_dname_lf(k->buf, k->zname, false); /* LATER(optim.): probably remove */
140 	if (kr_fails_assert(ret == 0))
141 		return ctx->state;
142 	entry_list_t el;
143 	ret = closest_NS(cache, k, el, qry, false, qry->stype == KNOT_RRTYPE_DS);
144 	if (ret) {
145 		if (kr_fails_assert(ret == kr_error(ENOENT)) || !el[0].len) {
146 			return ctx->state;
147 		}
148 	}
149 	switch (k->type) {
150 	case KNOT_RRTYPE_CNAME: {
151 		const knot_db_val_t v = el[EL_CNAME];
152 		if (kr_fails_assert(v.data && v.len))
153 			return ctx->state;
154 		const int32_t new_ttl = get_new_ttl(v.data, qry, qry->sname,
155 						KNOT_RRTYPE_CNAME, qry->timestamp.tv_sec);
156 		ret = answer_simple_hit(ctx, pkt, KNOT_RRTYPE_CNAME, v.data,
157 					knot_db_val_bound(v), new_ttl);
158 		return ret == kr_ok() ? KR_STATE_DONE : ctx->state;
159 		}
160 	case KNOT_RRTYPE_DNAME: {
161 		const knot_db_val_t v = el[EL_DNAME];
162 		if (kr_fails_assert(v.data && v.len))
163 			return ctx->state;
164 		/* TTL: for simplicity, we just ask for TTL of the generated CNAME. */
165 		const int32_t new_ttl = get_new_ttl(v.data, qry, qry->sname,
166 						KNOT_RRTYPE_CNAME, qry->timestamp.tv_sec);
167 		ret = answer_dname_hit(ctx, pkt, k->zname, v.data,
168 					knot_db_val_bound(v), new_ttl);
169 		return ret == kr_ok() ? KR_STATE_DONE : ctx->state;
170 		}
171 	}
172 
173 	/* We have to try proving from NSEC*. */
174 	auto_free char *log_zname = NULL;
175 	WITH_VERBOSE(qry) {
176 		log_zname = kr_dname_text(k->zname);
177 		if (!el[0].len) {
178 			VERBOSE_MSG(qry, "=> no NSEC* cached for zone: %s\n", log_zname);
179 		}
180 	}
181 
182 #if 0
183 	if (!eh) { /* fall back to root hints? */
184 		ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
185 		if (ret) return ctx->state;
186 		kr_assert(!qry->zone_cut.parent);
187 
188 		//VERBOSE_MSG(qry, "=> using root hints\n");
189 		//qry->flags.AWAIT_CUT = false;
190 		return ctx->state;
191 	}
192 
193 	/* Now `eh` points to the closest NS record that we've found,
194 	 * and that's the only place to start - we may either find
195 	 * a negative proof or we may query upstream from that point. */
196 	kr_zonecut_set(&qry->zone_cut, k->zname);
197 	ret = kr_make_query(qry, pkt); // TODO: probably not yet - qname minimization
198 	if (ret) return ctx->state;
199 #endif
200 
201 	/** Structure for collecting multiple NSEC* + RRSIG records,
202 	 * in preparation for the answer, and for tracking the progress. */
203 	struct answer ans;
204 	memset(&ans, 0, sizeof(ans));
205 	ans.mm = &pkt->mm;
206 	const int sname_labels = knot_dname_labels(qry->sname, NULL);
207 
208 	/* Try the NSEC* parameters in order, until success.
209 	 * Let's not mix different parameters for NSEC* RRs in a single proof. */
210 	for (int i = 0; ;) {
211 		int32_t log_new_ttl = -123456789; /* visually recognizable value */
212 		ret = nsec_p_ttl(el[i], qry->timestamp.tv_sec, &log_new_ttl);
213 		if (!ret || kr_log_is_debug_qry(CACHE, qry)) {
214 			nsec_p_init(&ans.nsec_p, el[i], !ret);
215 		}
216 		if (ret) {
217 			VERBOSE_MSG(qry, "=> skipping zone: %s, %s, hash %x;"
218 				"new TTL %d, ret %d\n",
219 				log_zname, (ans.nsec_p.raw ? "NSEC3" : "NSEC"),
220 				(unsigned)ans.nsec_p.hash, (int)log_new_ttl, ret);
221 			/* no need for nsec_p_cleanup() in this case */
222 			goto cont;
223 		}
224 		VERBOSE_MSG(qry, "=> trying zone: %s, %s, hash %x\n",
225 				log_zname, (ans.nsec_p.raw ? "NSEC3" : "NSEC"),
226 				(unsigned)ans.nsec_p.hash);
227 		/**** 2. and 3. inside */
228 		ret = peek_encloser(k, &ans, sname_labels,
229 					lowest_rank, qry, cache);
230 		nsec_p_cleanup(&ans.nsec_p);
231 		if (!ret) break;
232 		if (ret < 0) return ctx->state;
233 	cont:
234 		/* Otherwise we try another nsec_p, if available. */
235 		if (++i == ENTRY_APEX_NSECS_CNT) return ctx->state;
236 		/* clear possible partial answers in `ans` (no need to deallocate) */
237 		ans.rcode = 0;
238 		memset(&ans.rrsets, 0, sizeof(ans.rrsets));
239 	}
240 
241 	/**** 4. add SOA iff needed */
242 	if (ans.rcode != PKT_NOERROR) {
243 		/* Assuming k->buf still starts with zone's prefix,
244 		 * look up the SOA in cache. */
245 		k->buf[0] = k->zlf_len;
246 		knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_SOA);
247 		knot_db_val_t val = { NULL, 0 };
248 		ret = cache_op(cache, read, &key, &val, 1);
249 		const struct entry_h *eh;
250 		if (ret || !(eh = entry_h_consistent_E(val, KNOT_RRTYPE_SOA))) {
251 			kr_assert(ret); /* only want to catch `eh` failures */
252 			VERBOSE_MSG(qry, "=> SOA missed\n");
253 			return ctx->state;
254 		}
255 		/* Check if the record is OK. */
256 		int32_t new_ttl = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_SOA,
257 						qry->timestamp.tv_sec);
258 		if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) {
259 			VERBOSE_MSG(qry, "=> SOA unfit %s: rank 0%.2o, new TTL %d\n",
260 					(eh->is_packet ? "packet" : "RR"),
261 					eh->rank, new_ttl);
262 			return ctx->state;
263 		}
264 		/* Add the SOA into the answer. */
265 		ret = entry2answer(&ans, AR_SOA, eh, knot_db_val_bound(val),
266 				   k->zname, KNOT_RRTYPE_SOA, new_ttl);
267 		if (ret) return ctx->state;
268 	}
269 
270 	/* Find our target RCODE. */
271 	int real_rcode;
272 	switch (ans.rcode) {
273 	case PKT_NODATA:
274 	case PKT_NOERROR: /* positive wildcarded response */
275 		real_rcode = KNOT_RCODE_NOERROR;
276 		break;
277 	case PKT_NXDOMAIN:
278 		real_rcode = KNOT_RCODE_NXDOMAIN;
279 		break;
280 	default:
281 		kr_assert(false);
282 	case 0: /* i.e. nothing was found */
283 		/* LATER(optim.): zone cut? */
284 		VERBOSE_MSG(qry, "=> cache miss\n");
285 		return ctx->state;
286 	}
287 
288 	if (pkt_renew(pkt, qry->sname, qry->stype)
289 	    || knot_pkt_begin(pkt, KNOT_ANSWER)
290 	   ) {
291 		kr_assert(false);
292 		return ctx->state;
293 	}
294 	knot_wire_set_rcode(pkt->wire, real_rcode);
295 
296 	bool expiring = false; // TODO
297 	for (int i = 0; i < sizeof(ans.rrsets) / sizeof(ans.rrsets[0]); ++i) {
298 		if (i == 1) knot_pkt_begin(pkt, KNOT_AUTHORITY);
299 		if (!ans.rrsets[i].set.rr) continue;
300 		expiring = expiring || ans.rrsets[i].set.expiring;
301 		ret = pkt_append(pkt, &ans.rrsets[i], ans.rrsets[i].set.rank);
302 		if (kr_fails_assert(ret == 0))
303 			return ctx->state;
304 	}
305 
306 	/* Finishing touches. */
307 	struct kr_qflags * const qf = &qry->flags;
308 	qf->EXPIRING = expiring;
309 	qf->CACHED = true;
310 	qf->NO_MINIMIZE = true;
311 
312 	return KR_STATE_DONE;
313 }
314 
315 /**
316  * This is where the high-level "business logic" of aggressive cache is.
317  * \return 0: success (may need SOA);  >0: try other nsec_p;  <0: exit cache immediately.
318  */
peek_encloser(struct key * k,struct answer * ans,const int sname_labels,uint8_t lowest_rank,const struct kr_query * qry,struct kr_cache * cache)319 static int peek_encloser(
320 	struct key *k, struct answer *ans, const int sname_labels,
321 	uint8_t lowest_rank, const struct kr_query *qry, struct kr_cache *cache)
322 {
323 	/** Start of NSEC* covering the sname;
324 	 * it's part of key - the one within zone (read only) */
325 	knot_db_val_t cover_low_kwz = { NULL, 0 };
326 	knot_dname_t cover_hi_storage[KNOT_DNAME_MAXLEN];
327 	/** End of NSEC* covering the sname. */
328 	knot_db_val_t cover_hi_kwz = {
329 		.data = cover_hi_storage,
330 		.len = sizeof(cover_hi_storage),
331 	};
332 
333 	/**** 2. Find a closest (provable) encloser (of sname). */
334 	int clencl_labels = -1;
335 	bool clencl_is_tentative = false;
336 	if (!ans->nsec_p.raw) { /* NSEC */
337 		int ret = nsec1_encloser(k, ans, sname_labels, &clencl_labels,
338 					 &cover_low_kwz, &cover_hi_kwz, qry, cache);
339 		if (ret) return ret;
340 	} else {
341 		int ret = nsec3_encloser(k, ans, sname_labels, &clencl_labels,
342 					 qry, cache);
343 		clencl_is_tentative = ret == ABS(ENOENT) && clencl_labels >= 0;
344 		/* ^^ Last chance: *positive* wildcard record under this clencl. */
345 		if (ret && !clencl_is_tentative) return ret;
346 	}
347 
348 	/* We should have either a match or a cover at this point. */
349 	if (kr_fails_assert(ans->rcode == PKT_NODATA || ans->rcode == PKT_NXDOMAIN))
350 		return kr_error(EINVAL);
351 	const bool ncloser_covered = ans->rcode == PKT_NXDOMAIN;
352 
353 	/** Name of the closest (provable) encloser. */
354 	const knot_dname_t *clencl_name = qry->sname;
355 	for (int l = sname_labels; l > clencl_labels; --l)
356 		clencl_name = knot_wire_next_label(clencl_name, NULL);
357 
358 	/**** 3. source of synthesis checks, in case the next closer name was covered.
359 	 **** 3a. We want to query for NSEC* of source of synthesis (SS) or its
360 	 * predecessor, providing us with a proof of its existence or non-existence. */
361 	if (ncloser_covered && !ans->nsec_p.raw) {
362 		int ret = nsec1_src_synth(k, ans, clencl_name,
363 					  cover_low_kwz, cover_hi_kwz, qry, cache);
364 		if (ret == AR_SOA) return 0;
365 		kr_assert(ret <= 0);
366 		if (ret) return ret;
367 
368 	} else if (ncloser_covered && ans->nsec_p.raw && !clencl_is_tentative) {
369 		int ret = nsec3_src_synth(k, ans, clencl_name, qry, cache);
370 		if (ret == AR_SOA) return 0;
371 		kr_assert(ret <= 0);
372 		if (ret) return ret;
373 
374 	} /* else (!ncloser_covered) so no wildcard checks needed,
375 	   * as we proved that sname exists. */
376 
377 	/**** 3b. find wildcarded answer, if next closer name was covered
378 	 * and we don't have a full proof yet.  (common for NSEC*) */
379 	if (!ncloser_covered)
380 		return kr_ok(); /* decrease indentation */
381 	/* Construct key for exact qry->stype + source of synthesis. */
382 	int ret = kr_dname_lf(k->buf, clencl_name, true);
383 	if (kr_fails_assert(ret == 0))
384 		return kr_error(ret);
385 	const uint16_t types[] = { qry->stype, KNOT_RRTYPE_CNAME };
386 	for (int i = 0; i < (2 - (qry->stype == KNOT_RRTYPE_CNAME)); ++i) {
387 		ret = try_wild(k, ans, clencl_name, types[i],
388 				lowest_rank, qry, cache);
389 		if (ret == kr_ok()) {
390 			return kr_ok();
391 		} else if (kr_fails_assert(ret == kr_error(ENOENT) || ret == kr_error(ESTALE))) {
392 			return kr_error(ret);
393 		}
394 		/* else continue */
395 	}
396 	/* Neither attempt succeeded, but the NSEC* proofs were found,
397 	 * so skip trying other parameters, as it seems very unlikely
398 	 * to turn out differently than by the same wildcard search. */
399 	return kr_error(ENOENT);
400 }
401 
answer_simple_qflags(struct kr_qflags * qf,const struct entry_h * eh,uint32_t new_ttl)402 static void answer_simple_qflags(struct kr_qflags *qf, const struct entry_h *eh,
403 				 uint32_t new_ttl)
404 {
405 	/* Finishing touches. */
406 	qf->EXPIRING = is_expiring(eh->ttl, new_ttl);
407 	qf->CACHED = true;
408 	qf->NO_MINIMIZE = true;
409 	qf->DNSSEC_INSECURE = kr_rank_test(eh->rank, KR_RANK_INSECURE);
410 	if (qf->DNSSEC_INSECURE) {
411 		qf->DNSSEC_WANT = false;
412 	}
413 }
414 
415 #define CHECK_RET(ret) do { \
416 	if (kr_fails_assert((ret) >= 0)) return kr_error((ret)); \
417 } while (false)
418 
answer_simple_hit(kr_layer_t * ctx,knot_pkt_t * pkt,uint16_t type,const struct entry_h * eh,const void * eh_bound,uint32_t new_ttl)419 static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
420 		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl)
421 {
422 	struct kr_request *req = ctx->req;
423 	struct kr_query *qry = req->current_query;
424 
425 	/* All OK, so start constructing the (pseudo-)packet. */
426 	int ret = pkt_renew(pkt, qry->sname, qry->stype);
427 	CHECK_RET(ret);
428 
429 	/* Materialize the sets for the answer in (pseudo-)packet. */
430 	struct answer ans;
431 	memset(&ans, 0, sizeof(ans));
432 	ans.mm = &pkt->mm;
433 	ret = entry2answer(&ans, AR_ANSWER, eh, eh_bound,
434 			   qry->sname, type, new_ttl);
435 	CHECK_RET(ret);
436 	/* Put links to the materialized data into the pkt. */
437 	ret = pkt_append(pkt, &ans.rrsets[AR_ANSWER], eh->rank);
438 	CHECK_RET(ret);
439 
440 	answer_simple_qflags(&qry->flags, eh, new_ttl);
441 
442 	VERBOSE_MSG(qry, "=> satisfied by exact %s: rank 0%.2o, new TTL %d\n",
443 			(type == KNOT_RRTYPE_CNAME ? "CNAME" : "RRset"),
444 			eh->rank, new_ttl);
445 	return kr_ok();
446 }
447 
answer_dname_hit(kr_layer_t * ctx,knot_pkt_t * pkt,const knot_dname_t * dname_owner,const struct entry_h * eh,const void * eh_bound,uint32_t new_ttl)448 static int answer_dname_hit(kr_layer_t *ctx, knot_pkt_t *pkt, const knot_dname_t *dname_owner,
449 		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl)
450 {
451 	struct kr_request *req = ctx->req;
452 	struct kr_query *qry = req->current_query;
453 
454 	/* All OK, so start constructing the (pseudo-)packet. */
455 	int ret = pkt_renew(pkt, qry->sname, qry->stype);
456 	CHECK_RET(ret);
457 
458 	/* Materialize the DNAME for the answer in (pseudo-)packet. */
459 	struct answer ans;
460 	memset(&ans, 0, sizeof(ans));
461 	ans.mm = &pkt->mm;
462 	ret = entry2answer(&ans, AR_ANSWER, eh, eh_bound,
463 			   dname_owner, KNOT_RRTYPE_DNAME, new_ttl);
464 	CHECK_RET(ret);
465 	/* Put link to the RRset into the pkt. */
466 	ret = pkt_append(pkt, &ans.rrsets[AR_ANSWER], eh->rank);
467 	CHECK_RET(ret);
468 	const knot_dname_t *dname_target =
469 		knot_dname_target(ans.rrsets[AR_ANSWER].set.rr->rrs.rdata);
470 
471 	/* Generate CNAME RRset for the answer in (pseudo-)packet. */
472 	const int AR_CNAME = AR_SOA;
473 	knot_rrset_t *rr = ans.rrsets[AR_CNAME].set.rr
474 		= knot_rrset_new(qry->sname, KNOT_RRTYPE_CNAME, KNOT_CLASS_IN,
475 				 new_ttl, ans.mm);
476 	CHECK_RET(rr ? kr_ok() : -ENOMEM);
477 	const knot_dname_t *cname_target = knot_dname_replace_suffix(qry->sname,
478 			knot_dname_labels(dname_owner, NULL), dname_target, ans.mm);
479 	CHECK_RET(cname_target ? kr_ok() : -ENOMEM);
480 	const int rdata_len = knot_dname_size(cname_target);
481 
482 	if (rdata_len <= KNOT_DNAME_MAXLEN
483 	    && knot_dname_labels(cname_target, NULL) <= KNOT_DNAME_MAXLABELS) {
484 		/* Normal case: the target name fits. */
485 		rr->rrs.count = 1;
486 		rr->rrs.size = knot_rdata_size(rdata_len);
487 		rr->rrs.rdata = mm_alloc(ans.mm, rr->rrs.size);
488 		CHECK_RET(rr->rrs.rdata ? kr_ok() : -ENOMEM);
489 		knot_rdata_init(rr->rrs.rdata, rdata_len, cname_target);
490 		/* Put link to the RRset into the pkt. */
491 		ret = pkt_append(pkt, &ans.rrsets[AR_CNAME], eh->rank);
492 		CHECK_RET(ret);
493 	} else {
494 		/* Note that it's basically a successful answer; name just doesn't fit. */
495 		knot_wire_set_rcode(pkt->wire, KNOT_RCODE_YXDOMAIN);
496 	}
497 
498 	answer_simple_qflags(&qry->flags, eh, new_ttl);
499 	VERBOSE_MSG(qry, "=> satisfied by DNAME+CNAME: rank 0%.2o, new TTL %d\n",
500 			eh->rank, new_ttl);
501 	return kr_ok();
502 }
503 
504 #undef CHECK_RET
505 
506 /** TODO: description; see the single call site for now. */
found_exact_hit(kr_layer_t * ctx,knot_pkt_t * pkt,knot_db_val_t val,uint8_t lowest_rank)507 static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val,
508 			   uint8_t lowest_rank)
509 {
510 	struct kr_request *req = ctx->req;
511 	struct kr_query *qry = req->current_query;
512 
513 	int ret = entry_h_seek(&val, qry->stype);
514 	if (ret) return ret;
515 	const struct entry_h *eh = entry_h_consistent_E(val, qry->stype);
516 	if (kr_fails_assert(eh))
517 		return kr_error(ENOENT);
518 		// LATER: recovery in case of error, perhaps via removing the entry?
519 		// LATER(optim): perhaps optimize the zone cut search
520 
521 	int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, qry->stype,
522 					qry->timestamp.tv_sec);
523 	if (new_ttl < 0 || eh->rank < lowest_rank) {
524 		/* Positive record with stale TTL or bad rank.
525 		 * LATER(optim.): It's unlikely that we find a negative one,
526 		 * so we might theoretically skip all the cache code. */
527 
528 		VERBOSE_MSG(qry, "=> skipping exact %s: rank 0%.2o (min. 0%.2o), new TTL %d\n",
529 				eh->is_packet ? "packet" : "RR", eh->rank, lowest_rank, new_ttl);
530 		return kr_error(ENOENT);
531 	}
532 
533 	const uint8_t *eh_bound = knot_db_val_bound(val);
534 	if (eh->is_packet) {
535 		/* Note: we answer here immediately, even if it's (theoretically)
536 		 * possible that we could generate a higher-security negative proof.
537 		 * Rank is high-enough so we take it to save time searching;
538 		 * in practice this also helps in some incorrect zones (live-signed). */
539 		return answer_from_pkt  (ctx, pkt, qry->stype, eh, eh_bound, new_ttl);
540 	} else {
541 		return answer_simple_hit(ctx, pkt, qry->stype, eh, eh_bound, new_ttl);
542 	}
543 }
544 
545 
546 /** Try to satisfy via wildcard (positively).  See the single call site. */
try_wild(struct key * k,struct answer * ans,const knot_dname_t * clencl_name,const uint16_t type,const uint8_t lowest_rank,const struct kr_query * qry,struct kr_cache * cache)547 static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
548 		    const uint16_t type, const uint8_t lowest_rank,
549 		    const struct kr_query *qry, struct kr_cache *cache)
550 {
551 	knot_db_val_t key = key_exact_type(k, type);
552 	/* Find the record. */
553 	knot_db_val_t val = { NULL, 0 };
554 	int ret = cache_op(cache, read, &key, &val, 1);
555 	if (!ret) {
556 		ret = entry_h_seek(&val, type);
557 	}
558 	if (ret) {
559 		if (kr_fails_assert(ret == kr_error(ENOENT)))
560 			VERBOSE_MSG(qry, "=> wildcard: hit error %d %s\n",
561 					ret, strerror(abs(ret)));
562 		WITH_VERBOSE(qry) {
563 			auto_free char *clencl_str = kr_dname_text(clencl_name),
564 				*type_str = kr_rrtype_text(type);
565 			VERBOSE_MSG(qry, "=> wildcard: not found: *.%s %s\n",
566 					clencl_str, type_str);
567 		}
568 		return ret;
569 	}
570 	/* Check if the record is OK. */
571 	const struct entry_h *eh = entry_h_consistent_E(val, type);
572 	if (kr_fails_assert(eh))
573 		return kr_error(ret);
574 		// LATER: recovery in case of error, perhaps via removing the entry?
575 	int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, type, qry->timestamp.tv_sec);
576 		/* ^^ here we use the *expanded* wildcard name */
577 	if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) {
578 		/* Wildcard record with stale TTL, bad rank or packet.  */
579 		VERBOSE_MSG(qry, "=> wildcard: skipping %s, rank 0%.2o, new TTL %d\n",
580 				eh->is_packet ? "packet" : "RR", eh->rank, new_ttl);
581 		return kr_error(ESTALE);
582 	}
583 	/* Add the RR into the answer. */
584 	ret = entry2answer(ans, AR_ANSWER, eh, knot_db_val_bound(val),
585 			   qry->sname, type, new_ttl);
586 	VERBOSE_MSG(qry, "=> wildcard: answer expanded, ret = %d, new TTL %d\n",
587 			ret, (int)new_ttl);
588 	if (ret) return kr_error(ret);
589 	ans->rcode = PKT_NOERROR;
590 	return kr_ok();
591 }
592 
kr_cache_closest_apex(struct kr_cache * cache,const knot_dname_t * name,bool is_DS,knot_dname_t ** apex)593 int kr_cache_closest_apex(struct kr_cache *cache, const knot_dname_t *name, bool is_DS,
594 			  knot_dname_t ** apex)
595 {
596 	if (kr_fails_assert(cache && cache->db && name && apex && *apex == NULL))
597 		return kr_error(EINVAL);
598 	struct key k_storage, *k = &k_storage;
599 	int ret = kr_dname_lf(k->buf, name, false);
600 	if (ret)
601 		return kr_error(ret);
602 	entry_list_t el_;
603 	k->zname = name;
604 	ret = closest_NS(cache, k, el_, NULL, true, is_DS);
605 	if (ret && ret != -abs(ENOENT))
606 		return ret;
607 	*apex = knot_dname_copy(k->zname, NULL);
608 	if (!*apex)
609 		return kr_error(ENOMEM);
610 	return kr_ok();
611 }
612 
613 /** \internal for closest_NS.  Check suitability of a single entry, setting k->type if OK.
614  * \return error code, negative iff whole list should be skipped.
615  */
616 static int check_NS_entry(struct key *k, knot_db_val_t entry, int i,
617 			  bool exact_match, bool is_DS,
618 			  const struct kr_query *qry, uint32_t timestamp);
619 
620 /**
621  * Find the longest prefix zone/xNAME (with OK time+rank), starting at k->*.
622  *
623  * The found type is returned via k->type; the values are returned in el.
624  * \note we use k->type = KNOT_RRTYPE_NS also for the nsec_p result.
625  * \param qry can be NULL (-> gettimeofday(), but you lose the stale-serve hook)
626  * \param only_NS don't consider xNAMEs
627  * \return error code
628  */
closest_NS(struct kr_cache * cache,struct key * k,entry_list_t el,struct kr_query * qry,const bool only_NS,const bool is_DS)629 static int closest_NS(struct kr_cache *cache, struct key *k, entry_list_t el,
630 			struct kr_query *qry, const bool only_NS, const bool is_DS)
631 {
632 	/* get the current timestamp */
633 	uint32_t timestamp;
634 	if (qry) {
635 		timestamp = qry->timestamp.tv_sec;
636 	} else {
637 		struct timeval tv;
638 		if (gettimeofday(&tv, NULL)) return kr_error(errno);
639 		timestamp = tv.tv_sec;
640 	}
641 
642 	int zlf_len = k->buf[0];
643 
644 	// LATER(optim): if stype is NS, we check the same value again
645 	bool exact_match = true;
646 	bool need_zero = true;
647 	/* Inspect the NS/xNAME entries, shortening by a label on each iteration. */
648 	do {
649 		k->buf[0] = zlf_len;
650 		knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS);
651 		knot_db_val_t val;
652 		int ret = cache_op(cache, read, &key, &val, 1);
653 		if (ret == kr_error(ENOENT)) goto next_label;
654 		if (kr_fails_assert(ret == 0)) {
655 			if (need_zero) memset(el, 0, sizeof(entry_list_t));
656 			return kr_error(ret);
657 		}
658 
659 		/* Check consistency, find any type;
660 		 * using `goto` for shortening by another label. */
661 		ret = entry_list_parse(val, el);
662 		if (kr_fails_assert(ret == 0)) // do something about it?
663 			goto next_label;
664 		need_zero = false;
665 		/* More types are possible; try in order.
666 		 * For non-fatal failures just "continue;" to try the next type. */
667 		/* Now a complication - we need to try EL_DNAME before NSEC*
668 		 * (Unfortunately that's not easy to write very nicely.) */
669 		if (!only_NS) {
670 			const int i = EL_DNAME;
671 			ret = check_NS_entry(k, el[i], i, exact_match, is_DS,
672 						qry, timestamp);
673 			if (ret < 0) goto next_label; else
674 			if (!ret) {
675 				/* We found our match. */
676 				k->zlf_len = zlf_len;
677 				return kr_ok();
678 			}
679 		}
680 		const int el_count = only_NS ? EL_NS + 1 : EL_LENGTH;
681 		for (int i = 0; i < el_count; ++i) {
682 			if (i == EL_DNAME) continue;
683 			ret = check_NS_entry(k, el[i], i, exact_match, is_DS,
684 						qry, timestamp);
685 			if (ret < 0) goto next_label; else
686 			if (!ret) {
687 				/* We found our match. */
688 				k->zlf_len = zlf_len;
689 				return kr_ok();
690 			}
691 		}
692 
693 	next_label:
694 		/* remove one more label */
695 		exact_match = false;
696 		if (k->zname[0] == 0) {
697 			/* We miss root NS in cache, but let's at least assume it exists. */
698 			k->type = KNOT_RRTYPE_NS;
699 			k->zlf_len = zlf_len;
700 			kr_assert(zlf_len == 0);
701 			if (need_zero) memset(el, 0, sizeof(entry_list_t));
702 			return kr_error(ENOENT);
703 		}
704 		zlf_len -= (k->zname[0] + 1);
705 		k->zname += (k->zname[0] + 1);
706 		k->buf[zlf_len + 1] = 0;
707 	} while (true);
708 }
709 
check_NS_entry(struct key * k,const knot_db_val_t entry,const int i,const bool exact_match,const bool is_DS,const struct kr_query * qry,uint32_t timestamp)710 static int check_NS_entry(struct key *k, const knot_db_val_t entry, const int i,
711 			  const bool exact_match, const bool is_DS,
712 			  const struct kr_query *qry, uint32_t timestamp)
713 {
714 	const int ESKIP = ABS(ENOENT);
715 	if (!entry.len
716 		/* On a zone cut we want DS from the parent zone. */
717 		|| (exact_match && is_DS)
718 		/* CNAME is interesting only if we
719 		 * directly hit the name that was asked.
720 		 * Note that we want it even in the DS case. */
721 		|| (i == EL_CNAME && !exact_match)
722 		/* DNAME is interesting only if we did NOT
723 		 * directly hit the name that was asked. */
724 		|| (i == EL_DNAME && exact_match)
725 	   ) {
726 		return ESKIP;
727 	}
728 
729 	uint16_t type;
730 	if (i < ENTRY_APEX_NSECS_CNT) {
731 		type = KNOT_RRTYPE_NS;
732 		int32_t log_new_ttl = -123456789; /* visually recognizable value */
733 		const int err = nsec_p_ttl(entry, timestamp, &log_new_ttl);
734 		if (err) {
735 			VERBOSE_MSG(qry,
736 				"=> skipping unfit nsec_p: new TTL %d, error %d\n",
737 				(int)log_new_ttl, err);
738 			return ESKIP;
739 		}
740 	} else {
741 		type = EL2RRTYPE(i);
742 		/* Find the entry for the type, check positivity, TTL */
743 		const struct entry_h *eh = entry_h_consistent_E(entry, type);
744 		if (kr_fails_assert(eh)) {
745 			VERBOSE_MSG(qry, "=> EH not consistent\n");
746 			return kr_error(EILSEQ);
747 		}
748 		const int32_t log_new_ttl = get_new_ttl(eh, qry, k->zname, type, timestamp);
749 
750 		const bool ok = /* Not interested in negative bogus or outdated RRs. */
751 			!eh->is_packet && log_new_ttl >= 0
752 			/* For NS any kr_rank is accepted, as insecure or even nonauth is OK */
753 			&& (type == KNOT_RRTYPE_NS
754 			    || eh->rank >= get_lowest_rank(qry, k->zname, type));
755 
756 		WITH_VERBOSE(qry) { if (!ok) {
757 			auto_free char *type_str = kr_rrtype_text(type);
758 			const char *packet_str = eh->is_packet ? "packet" : "RR";
759 			VERBOSE_MSG(qry,
760 				"=> skipping unfit %s %s: rank 0%.2o, new TTL %d\n",
761 				type_str, packet_str, eh->rank, (int)log_new_ttl);
762 		} }
763 		if (!ok) return ESKIP;
764 	}
765 	k->type = type;
766 	return kr_ok();
767 }
768 
769