1 /*  Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
2  *  SPDX-License-Identifier: GPL-3.0-or-later
3  */
4 
5 /** @file
6  * Header internal for cache implementation(s).
7  * Only LMDB works for now.
8  */
9 #pragma once
10 
11 #include <stdbool.h>
12 #include <stdint.h>
13 
14 #include <libdnssec/error.h>
15 #include <libdnssec/nsec.h>
16 #include <libknot/consts.h>
17 #include <libknot/db/db.h>
18 #include <libknot/dname.h>
19 
20 #include "contrib/cleanup.h"
21 #include "contrib/murmurhash3/murmurhash3.h" /* hash() for nsec_p_hash() */
22 #include "lib/cache/cdb_api.h"
23 #include "lib/resolve.h"
24 
25 /* Cache entry values - binary layout.
26  *
27  * It depends on type which is recognizable by the key.
28  * Code depending on the contents of the key is marked by CACHE_KEY_DEF.
29  *
30  * 'E' entry (exact hit):
31  *	- ktype == NS: struct entry_apex - multiple types inside (NS and xNAME);
32  *	- ktype != NS: struct entry_h
33  *	    * is_packet: uint16_t length, the rest is opaque and handled by ./entry_pkt.c
34  *	    * otherwise RRset + its RRSIG set (possibly empty).
35  * '1' or '3' entry (NSEC or NSEC3)
36  *	- struct entry_h, contents is the same as for exact hit
37  *	- flags don't make sense there
38  */
39 
40 struct entry_h {
41 	uint32_t time;	/**< The time of inception. */
42 	uint32_t ttl;	/**< TTL at inception moment.  Assuming it fits into int32_t ATM. */
43 	uint8_t  rank : 6;	/**< See enum kr_rank */
44 	bool is_packet : 1;	/**< Negative-answer packet for insecure/bogus name. */
45 	bool has_optout : 1;	/**< Only for packets; persisted DNSSEC_OPTOUT. */
46 	uint8_t _pad;		/**< We need even alignment for data now. */
47 	uint8_t data[];
48 /* Well, we don't really need packing or alignment changes,
49  * but due to LMDB the whole structure may not be stored at an aligned address,
50  * and we need compilers (for non-x86) to know it to avoid SIGBUS (test: UBSAN). */
51 } __attribute__ ((packed,aligned(1)));
52 struct entry_apex;
53 
54 /** Check basic consistency of entry_h for 'E' entries, not looking into ->data.
55  * (for is_packet the length of data is checked)
56  */
57 KR_EXPORT
58 struct entry_h * entry_h_consistent_E(knot_db_val_t data, uint16_t type);
59 
60 struct entry_apex * entry_apex_consistent(knot_db_val_t val);
61 
62 /** Consistency check, ATM common for NSEC and NSEC3. */
entry_h_consistent_NSEC(knot_db_val_t data)63 static inline struct entry_h * entry_h_consistent_NSEC(knot_db_val_t data)
64 {
65 	/* ATM it's enough to just extend the checks for exact entries. */
66 	const struct entry_h *eh = entry_h_consistent_E(data, KNOT_RRTYPE_NSEC);
67 	bool ok = eh != NULL;
68 	ok = ok && !eh->is_packet && !eh->has_optout;
69 	return ok ? /*const-cast*/(struct entry_h *)eh : NULL;
70 }
71 
entry_h_consistent(knot_db_val_t data,uint16_t type)72 static inline struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type)
73 {
74 	switch (type) {
75 	case KNOT_RRTYPE_NSEC:
76 	case KNOT_RRTYPE_NSEC3:
77 		return entry_h_consistent_NSEC(data);
78 	default:
79 		return entry_h_consistent_E(data, type);
80 	}
81 }
82 
83 /* nsec_p* - NSEC* chain parameters */
84 
nsec_p_rdlen(const uint8_t * rdata)85 static inline int nsec_p_rdlen(const uint8_t *rdata)
86 {
87 	//TODO: do we really need the zero case?
88 	return rdata ? 5 + rdata[4] : 0; /* rfc5155 4.2 and 3.2. */
89 }
90 static const int NSEC_P_MAXLEN = sizeof(uint32_t) + 5 + 255; // TODO: remove??
91 
92 /** Hash of NSEC3 parameters, used as a tag to separate different chains for same zone. */
93 typedef uint32_t nsec_p_hash_t;
nsec_p_mkHash(const uint8_t * nsec_p)94 static inline nsec_p_hash_t nsec_p_mkHash(const uint8_t *nsec_p)
95 {
96 	kr_require(nsec_p && !(KNOT_NSEC3_FLAG_OPT_OUT & nsec_p[1]));
97 	return hash((const char *)nsec_p, nsec_p_rdlen(nsec_p));
98 }
99 
100 /** NSEC* parameters for the chain. */
101 struct nsec_p {
102 	const uint8_t *raw; /**< Pointer to raw NSEC3 parameters; NULL for NSEC. */
103 	nsec_p_hash_t hash; /**< Hash of `raw`, used for cache keys. */
104 	dnssec_nsec3_params_t libknot; /**< Format for libknot; owns malloced memory! */
105 };
106 
107 
108 
109 /** LATER(optim.): this is overshot, but struct key usage should be cheap ATM. */
110 #define KR_CACHE_KEY_MAXLEN (KNOT_DNAME_MAXLEN + 100) /* CACHE_KEY_DEF */
111 
112 struct key {
113 	const knot_dname_t *zname; /**< current zone name (points within qry->sname) */
114 	uint8_t zlf_len; /**< length of current zone's lookup format */
115 
116 	/** Corresponding key type; e.g. NS for CNAME.
117 	 * Note: NSEC type is ambiguous (exact and range key). */
118 	uint16_t type;
119 	/** The key data start at buf+1, and buf[0] contains some length.
120 	 * For details see key_exact* and key_NSEC* functions. */
121 	uint8_t buf[KR_CACHE_KEY_MAXLEN];
122 	/* LATER(opt.): ^^ probably change the anchoring, so that kr_dname_lf()
123 	 * doesn't need to move data after knot_dname_lf(). */
124 };
125 
key_nwz_off(const struct key * k)126 static inline size_t key_nwz_off(const struct key *k)
127 {
128 	/* CACHE_KEY_DEF: zone name lf + 0 ('1' or '3').
129 	 * NSEC '1' case continues just with the name within zone. */
130 	return k->zlf_len + 2;
131 }
key_nsec3_hash_off(const struct key * k)132 static inline size_t key_nsec3_hash_off(const struct key *k)
133 {
134 	/* CACHE_KEY_DEF NSEC3: tag (nsec_p_hash_t) + 20 bytes NSEC3 name hash) */
135 	return key_nwz_off(k) + sizeof(nsec_p_hash_t);
136 }
137 /** Hash is always SHA1; I see no plans to standardize anything else.
138  * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml#dnssec-nsec3-parameters-3
139  */
140 static const int NSEC3_HASH_LEN = 20,
141 		 NSEC3_HASH_TXT_LEN = 32;
142 
143 /** Finish constructing string key for for exact search.
144  * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran.
145  */
146 knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type);
147 
148 /** Like key_exact_type_maypkt but with extra checks if used for RRs only. */
key_exact_type(struct key * k,uint16_t type)149 static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type)
150 {
151 	switch (type) {
152 	/* Sanity check: forbidden types represented in other way(s). */
153 	case KNOT_RRTYPE_NSEC:
154 	case KNOT_RRTYPE_NSEC3:
155 		kr_assert(false);
156 		return (knot_db_val_t){ NULL, 0 };
157 	}
158 	return key_exact_type_maypkt(k, type);
159 }
160 
161 
162 /* entry_h chaining; implementation in ./entry_list.c */
163 
164 enum { ENTRY_APEX_NSECS_CNT = 2 };
165 
166 /** Header of 'E' entry with ktype == NS.  Inside is private to ./entry_list.c
167  *
168  * We store xNAME at NS type to lower the number of searches in closest_NS().
169  * CNAME is only considered for equal name, of course.
170  * We also store NSEC* parameters at NS type.
171  */
172 struct entry_apex {
173 	/* ENTRY_H_FLAGS */
174 	bool has_ns : 1;
175 	bool has_cname : 1;
176 	bool has_dname : 1;
177 
178 	uint8_t pad_; /**< 1 byte + 2 bytes + x bytes would be weird; let's do 2+2+x. */
179 
180 	/** We have two slots for NSEC* parameters.
181 	 *
182 	 * This array describes how they're filled;
183 	 * values:  0: none, 1: NSEC, 3: NSEC3.
184 	 *
185 	 * Two slots are a compromise to smoothly handle normal rollovers
186 	 * (either changing NSEC3 parameters or between NSEC and NSEC3). */
187 	int8_t nsecs[ENTRY_APEX_NSECS_CNT];
188 	uint8_t data[];
189 	/* XXX: if not first, stamp of last being the first?
190 	 * Purpose: save cache operations if rolled the algo/params long ago. */
191 };
192 
193 /** Indices for decompressed entry_list_t. */
194 enum EL {
195 	EL_NS = ENTRY_APEX_NSECS_CNT,
196 	EL_CNAME,
197 	EL_DNAME,
198 	EL_LENGTH
199 };
200 /** Decompressed entry_apex.  It's an array of unparsed entry_h references.
201  * Note: arrays are passed "by reference" to functions (in C99). */
202 typedef knot_db_val_t entry_list_t[EL_LENGTH];
203 
EL2RRTYPE(enum EL i)204 static inline uint16_t EL2RRTYPE(enum EL i)
205 {
206 	switch (i) {
207 	case EL_NS:	return KNOT_RRTYPE_NS;
208 	case EL_CNAME:	return KNOT_RRTYPE_CNAME;
209 	case EL_DNAME:	return KNOT_RRTYPE_DNAME;
210 	default:	kr_assert(false);  return 0;
211 	}
212 }
213 
214 /** There may be multiple entries within, so rewind `val` to the one we want.
215  *
216  * ATM there are multiple types only for the NS ktype - it also accommodates xNAMEs.
217  * \note `val->len` represents the bound of the whole list, not of a single entry.
218  * \note in case of ENOENT, `val` is still rewound to the beginning of the next entry.
219  * \return error code
220  * TODO: maybe get rid of this API?
221  */
222 int entry_h_seek(knot_db_val_t *val, uint16_t type);
223 
224 /** Prepare space to insert an entry.
225  *
226  * Some checks are performed (rank, TTL), the current entry in cache is copied
227  * with a hole ready for the new entry (old one of the same type is cut out).
228  *
229  * \param val_new_entry The only changing parameter; ->len is read, ->data written.
230  * \return error code
231  */
232 int entry_h_splice(
233 	knot_db_val_t *val_new_entry, uint8_t rank,
234 	const knot_db_val_t key, const uint16_t ktype, const uint16_t type,
235 	const knot_dname_t *owner/*log only*/,
236 	const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp);
237 
238 /** Parse an entry_apex into individual items.  @return error code. */
239 KR_EXPORT int entry_list_parse(const knot_db_val_t val, entry_list_t list);
240 
to_even(size_t n)241 static inline size_t to_even(size_t n)
242 {
243 	return n + (n & 1);
244 }
245 
entry_list_serial_size(const entry_list_t list)246 static inline int entry_list_serial_size(const entry_list_t list)
247 {
248 	int size = offsetof(struct entry_apex, data);
249 	for (int i = 0; i < EL_LENGTH; ++i) {
250 		size += to_even(list[i].len);
251 	}
252 	return size;
253 }
254 
255 /** Fill contents of an entry_apex.
256  *
257  * @note NULL pointers are overwritten - caller may like to fill the space later.
258  */
259 void entry_list_memcpy(struct entry_apex *ea, entry_list_t list);
260 
261 
262 
263 /* Packet caching; implementation in ./entry_pkt.c */
264 
265 /** Stash the packet into cache (if suitable, etc.)
266  * \param needs_pkt we need the packet due to not stashing some RRs;
267  * 		see stash_rrset() for details
268  * It assumes check_dname_for_lf(). */
269 void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry,
270 		const struct kr_request *req, bool needs_pkt);
271 
272 /** Try answering from packet cache, given an entry_h.
273  *
274  * This assumes the TTL is OK and entry_h_consistent, but it may still return error.
275  * On success it handles all the rest, incl. qry->flags.
276  */
277 int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
278 		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);
279 
280 
281 /** Record is expiring if it has less than 1% TTL (or less than 5s) */
is_expiring(uint32_t orig_ttl,uint32_t new_ttl)282 static inline bool is_expiring(uint32_t orig_ttl, uint32_t new_ttl)
283 {
284 	int64_t nttl = new_ttl; /* avoid potential over/under-flow */
285 	return 100 * (nttl - 5) < orig_ttl;
286 }
287 
288 /** Returns signed result so you can inspect how much stale the RR is.
289  *
290  * @param owner name for stale-serving decisions.  You may pass NULL to disable stale.
291  * @note: NSEC* uses zone name ATM; for NSEC3 the owner may not even be knowable.
292  * @param type for stale-serving.
293  */
294 int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry,
295                     const knot_dname_t *owner, uint16_t type, uint32_t now);
296 
297 
298 /* RRset (de)materialization; implementation in ./entry_rr.c */
299 
300 /** Size of the RR count field */
301 #define KR_CACHE_RR_COUNT_SIZE sizeof(uint16_t)
302 
303 /** Compute size of serialized rdataset.  NULL is accepted as empty set. */
rdataset_dematerialize_size(const knot_rdataset_t * rds)304 static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds)
305 {
306 	return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 : rds->size);
307 }
308 
309 /** Analyze the length of a dematerialized rdataset.
310  * Note that in the data it's KR_CACHE_RR_COUNT_SIZE and then this returned size. */
rdataset_dematerialized_size(const uint8_t * data,uint16_t * rdataset_count)311 static inline int rdataset_dematerialized_size(const uint8_t *data, uint16_t *rdataset_count)
312 {
313 	uint16_t count;
314 	static_assert(sizeof(count) == KR_CACHE_RR_COUNT_SIZE,
315 			"Unexpected KR_CACHE_RR_COUNT_SIZE.");
316 	memcpy(&count, data, sizeof(count));
317 	const uint8_t *rdata = data + sizeof(count);
318 	if (rdataset_count) // memcpy is safe for unaligned case (on non-x86)
319 		memcpy(rdataset_count, &count, sizeof(count));
320 	for (int i = 0; i < count; ++i) {
321 		__typeof__(((knot_rdata_t *)NULL)->len) len; // memcpy as above
322 		memcpy(&len, rdata + offsetof(knot_rdata_t, len), sizeof(len));
323 		rdata += knot_rdata_size(len);
324 	}
325 	return rdata - (data + sizeof(count));
326 }
327 
328 /** Serialize an rdataset.  It may be NULL as short-hand for empty. */
329 void rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data);
330 
331 
332 /** Partially constructed answer when gathering RRsets from cache. */
333 struct answer {
334 	int rcode;		/**< PKT_NODATA, etc. */
335 	struct nsec_p nsec_p;	/**< Don't mix different NSEC* parameters in one answer. */
336 	knot_mm_t *mm;		/**< Allocator for rrsets */
337 	struct answer_rrset {
338 		ranked_rr_array_entry_t set;	/**< set+rank for the main data */
339 		knot_rdataset_t sig_rds;	/**< RRSIG data, if any */
340 	} rrsets[1+1+3]; /**< see AR_ANSWER and friends; only required records are filled */
341 };
342 enum {
343 	AR_ANSWER = 0, /**< Positive answer record.  It might be wildcard-expanded. */
344 	AR_SOA,  /**< SOA record. */
345 	AR_NSEC, /**< NSEC* covering or matching the SNAME (next closer name in NSEC3 case). */
346 	AR_WILD, /**< NSEC* covering or matching the source of synthesis. */
347 	AR_CPE,  /**< NSEC3 matching the closest provable encloser. */
348 };
349 
350 /** Materialize RRset + RRSIGs into ans->rrsets[id].
351  * LATER(optim.): it's slightly wasteful that we allocate knot_rrset_t for the packet
352  *
353  * \return error code.  They are all bad conditions and "guarded" by kresd's assertions.
354  */
355 int entry2answer(struct answer *ans, int id,
356 		const struct entry_h *eh, const uint8_t *eh_bound,
357 		const knot_dname_t *owner, uint16_t type, uint32_t new_ttl);
358 
359 
360 /* Preparing knot_pkt_t for cache answer from RRs; implementation in ./knot_pkt.c */
361 
362 /** Prepare answer packet to be filled by RRs (without RR data in wire). */
363 int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type);
364 
365 /** Append RRset + its RRSIGs into the current section (*shallow* copy), with given rank.
366  *
367  * \note it works with empty set as well (skipped)
368  * \note pkt->wire is not updated in any way
369  * \note KNOT_CLASS_IN is assumed
370  * \note Whole RRsets are put into the pseudo-packet;
371  *       normal parsed packets would only contain single-RR sets.
372  */
373 int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank);
374 
375 
376 
377 /* NSEC (1) stuff.  Implementation in ./nsec1.c */
378 
379 /** Construct a string key for for NSEC (1) predecessor-search.
380  * \param add_wildcard Act as if the name was extended by "*."
381  * \note k->zlf_len is assumed to have been correctly set */
382 knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard);
383 
384 /** Closest encloser check for NSEC (1).
385  * To understand the interface, see the call point.
386  * \param k	space to store key + input: zname and zlf_len
387  * \return 0: success;  >0: try other (NSEC3);  <0: exit cache immediately. */
388 int nsec1_encloser(struct key *k, struct answer *ans,
389 		   const int sname_labels, int *clencl_labels,
390 		   knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz,
391 		   const struct kr_query *qry, struct kr_cache *cache);
392 
393 /** Source of synthesis (SS) check for NSEC (1).
394  * To understand the interface, see the call point.
395  * \return 0: continue; <0: exit cache immediately;
396  * 	AR_SOA: skip to adding SOA (SS was covered or matched for NODATA). */
397 int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
398 		    knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz,
399 		    const struct kr_query *qry, struct kr_cache *cache);
400 
401 
402 /* NSEC3 stuff.  Implementation in ./nsec3.c */
403 
404 /** Construct a string key for for NSEC3 predecessor-search, from an NSEC3 name.
405  * \note k->zlf_len is assumed to have been correctly set */
406 knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name,
407 			const nsec_p_hash_t nsec_p_hash);
408 
409 /** TODO.  See nsec1_encloser(...) */
410 int nsec3_encloser(struct key *k, struct answer *ans,
411 		   const int sname_labels, int *clencl_labels,
412 		   const struct kr_query *qry, struct kr_cache *cache);
413 
414 /** TODO.  See nsec1_src_synth(...) */
415 int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
416 		    const struct kr_query *qry, struct kr_cache *cache);
417 
418 
419 
420 #define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), CACHE,  ## __VA_ARGS__)
421 #define WITH_VERBOSE(qry) if (kr_log_is_debug_qry(CACHE, (qry)))
422 
423 /** Shorthand for operations on cache backend */
424 #define cache_op(cache, op, ...) (cache)->api->op((cache)->db, &(cache)->stats, ## __VA_ARGS__)
425 
426 
get_uint16(const void * address)427 static inline uint16_t get_uint16(const void *address)
428 {
429 	uint16_t tmp;
430 	memcpy(&tmp, address, sizeof(tmp));
431 	return tmp;
432 }
433 
434 /** Useful pattern, especially as void-pointer arithmetic isn't standard-compliant. */
knot_db_val_bound(knot_db_val_t val)435 static inline uint8_t * knot_db_val_bound(knot_db_val_t val)
436 {
437 	return (uint8_t *)val.data + val.len;
438 }
439 
440