1 /*
2  * iterator/iterator.h - iterative resolver DNS query response module
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains a module that performs recursive iterative DNS query
40  * processing.
41  */
42 
43 #ifndef ITERATOR_ITERATOR_H
44 #define ITERATOR_ITERATOR_H
45 #include "services/outbound_list.h"
46 #include "util/data/msgreply.h"
47 #include "util/module.h"
48 struct delegpt;
49 struct iter_hints;
50 struct iter_forwards;
51 struct iter_donotq;
52 struct iter_prep_list;
53 struct iter_priv;
54 struct rbtree_type;
55 
56 /** max number of targets spawned for a query and its subqueries */
57 #define MAX_TARGET_COUNT	64
58 /** max number of upstream queries for a query and its subqueries, it is
59  * never reset. */
60 #define MAX_GLOBAL_QUOTA	128
61 /** max number of target lookups per qstate, per delegation point */
62 #define MAX_DP_TARGET_COUNT	16
63 /** max number of nxdomains allowed for target lookups for a query and
64  * its subqueries */
65 #define MAX_TARGET_NX		5
66 /** max number of nxdomains allowed for target lookups for a query and
67  * its subqueries when fallback has kicked in */
68 #define MAX_TARGET_NX_FALLBACK	(MAX_TARGET_NX*2)
69 /** max number of referrals. Makes sure resolver does not run away */
70 #define MAX_REFERRAL_COUNT	130
71 /** max number of queries for which to perform dnsseclameness detection,
72  * (rrsigs missing detection) after that, just pick up that response */
73 #define DNSSEC_LAME_DETECT_COUNT 4
74 /**
75  * max number of QNAME minimisation iterations. Limits number of queries for
76  * QNAMEs with a lot of labels.
77 */
78 #define MAX_MINIMISE_COUNT	10
79 /* max number of time-outs for minimised query. Prevents resolving failures
80  * when the QNAME minimisation QTYPE is blocked. */
81 #define MAX_MINIMISE_TIMEOUT_COUNT 3
82 /**
83  * number of labels from QNAME that are always send individually when using
84  * QNAME minimisation, even when the number of labels of the QNAME is bigger
85  * than MAX_MINIMISE_COUNT */
86 #define MINIMISE_ONE_LAB	4
87 #define MINIMISE_MULTIPLE_LABS	(MAX_MINIMISE_COUNT - MINIMISE_ONE_LAB)
88 /** at what query-sent-count to stop target fetch policy */
89 #define TARGET_FETCH_STOP	3
90 /** how nice is a server without further information, in msec
91  * Equals rtt initial timeout value.
92  */
93 extern int UNKNOWN_SERVER_NICENESS;
94 /** maximum timeout before a host is deemed unsuitable, in msec.
95  * After host_ttl this will be timed out and the host will be tried again.
96  * Equals RTT_MAX_TIMEOUT, and thus when RTT_MAX_TIMEOUT is overwritten by
97  * config infra_cache_max_rtt, it will be overwritten as well. */
98 extern int USEFUL_SERVER_TOP_TIMEOUT;
99 /** penalty to validation failed blacklisted IPs
100  * Equals USEFUL_SERVER_TOP_TIMEOUT*4, and thus when RTT_MAX_TIMEOUT is
101  * overwritten by config infra_cache_max_rtt, it will be overwritten as well. */
102 extern int BLACKLIST_PENALTY;
103 /** RTT band, within this amount from the best, servers are chosen randomly.
104  * Chosen so that the UNKNOWN_SERVER_NICENESS falls within the band of a
105  * fast server, this causes server exploration as a side benefit. msec. */
106 #define RTT_BAND 400
107 /** Number of retries for empty nodata packets before it is accepted. */
108 #define EMPTY_NODATA_RETRY_COUNT 2
109 
110 /**
111  * Global state for the iterator.
112  */
113 struct iter_env {
114 	/** A flag to indicate whether or not we have an IPv6 route */
115 	int supports_ipv6;
116 
117 	/** A flag to indicate whether or not we have an IPv4 route */
118 	int supports_ipv4;
119 
120 	/** A flag to locally apply NAT64 to make IPv4 addrs into IPv6 */
121 	int use_nat64;
122 
123 	/** NAT64 prefix address, cf. dns64_env->prefix_addr */
124 	struct sockaddr_storage nat64_prefix_addr;
125 
126 	/** sizeof(sockaddr_in6) */
127 	socklen_t nat64_prefix_addrlen;
128 
129 	/** CIDR mask length of NAT64 prefix */
130 	int nat64_prefix_net;
131 
132 	/** A set of inetaddrs that should never be queried. */
133 	struct iter_donotq* donotq;
134 
135 	/** private address space and private domains */
136 	struct iter_priv* priv;
137 
138 	/** whitelist for capsforid names */
139 	struct rbtree_type* caps_white;
140 
141 	/** The maximum dependency depth that this resolver will pursue. */
142 	int max_dependency_depth;
143 
144 	/**
145 	 * The target fetch policy for each dependency level. This is
146 	 * described as a simple number (per dependency level):
147 	 *	negative numbers (usually just -1) mean fetch-all,
148 	 *	0 means only fetch on demand, and
149 	 *	positive numbers mean to fetch at most that many targets.
150 	 * array of max_dependency_depth+1 size.
151 	 */
152 	int* target_fetch_policy;
153 
154 	/** lock on ratelimit counter */
155 	lock_basic_type queries_ratelimit_lock;
156 	/** number of queries that have been ratelimited */
157 	size_t num_queries_ratelimited;
158 
159 	/** number of retries on outgoing queries */
160 	int outbound_msg_retry;
161 
162 	/** number of queries_sent */
163 	int max_sent_count;
164 
165 	/** max number of query restarts to limit length of CNAME chain */
166 	int max_query_restarts;
167 };
168 
169 /**
170  * QNAME minimisation state
171  */
172 enum minimisation_state {
173 	/**
174 	 * (Re)start minimisation. Outgoing QNAME should be set to dp->name.
175 	 * State entered on new query or after following referral or CNAME.
176 	 */
177 	INIT_MINIMISE_STATE = 0,
178 	/**
179 	 * QNAME minimisation ongoing. Increase QNAME on every iteration.
180 	 */
181 	MINIMISE_STATE,
182 	/**
183 	 * Don't increment QNAME this iteration
184 	 */
185 	SKIP_MINIMISE_STATE,
186 	/**
187 	 * Send out full QNAME + original QTYPE
188 	 */
189 	DONOT_MINIMISE_STATE,
190 };
191 
192 /**
193  * State of the iterator for a query.
194  */
195 enum iter_state {
196 	/**
197 	 * Externally generated queries start at this state. Query restarts are
198 	 * reset to this state.
199 	 */
200 	INIT_REQUEST_STATE = 0,
201 
202 	/**
203 	 * Root priming events reactivate here, most other events pass
204 	 * through this naturally as the 2nd part of the INIT_REQUEST_STATE.
205 	 */
206 	INIT_REQUEST_2_STATE,
207 
208 	/**
209 	 * Stub priming events reactivate here, most other events pass
210 	 * through this naturally as the 3rd part of the INIT_REQUEST_STATE.
211 	 */
212 	INIT_REQUEST_3_STATE,
213 
214 	/**
215 	 * Each time a delegation point changes for a given query or a
216 	 * query times out and/or wakes up, this state is (re)visited.
217 	 * This state is responsible for iterating through a list of
218 	 * nameserver targets.
219 	 */
220 	QUERYTARGETS_STATE,
221 
222 	/**
223 	 * Responses to queries start at this state. This state handles
224 	 * the decision tree associated with handling responses.
225 	 */
226 	QUERY_RESP_STATE,
227 
228 	/** Responses to priming queries finish at this state. */
229 	PRIME_RESP_STATE,
230 
231 	/** Collecting query class information, for qclass=ANY, when
232 	 * it spawns off queries for every class, it returns here. */
233 	COLLECT_CLASS_STATE,
234 
235 	/** Find NS record to resolve DS record from, walking to the right
236 	 * NS spot until we find it */
237 	DSNS_FIND_STATE,
238 
239 	/** Responses that are to be returned upstream end at this state.
240 	 * As well as responses to target queries. */
241 	FINISHED_STATE
242 };
243 
244 /**
245  * Shared counters for queries.
246  */
247 enum target_count_variables {
248 	/** Reference count for the shared iter_qstate->target_count. */
249 	TARGET_COUNT_REF = 0,
250 	/** Number of target queries spawned for the query and subqueries. */
251 	TARGET_COUNT_QUERIES,
252 	/** Number of nxdomain responses encountered. */
253 	TARGET_COUNT_NX,
254 	/** Global quota on number of queries to upstream servers per
255 	 * client request, that is never reset. */
256 	TARGET_COUNT_GLOBAL_QUOTA,
257 
258 	/** This should stay last here, it is used for the allocation */
259 	TARGET_COUNT_MAX,
260 };
261 
262 /**
263  * Per query state for the iterator module.
264  */
265 struct iter_qstate {
266 	/**
267 	 * State of the iterator module.
268 	 * This is the state that event is in or should sent to -- all
269 	 * requests should start with the INIT_REQUEST_STATE. All
270 	 * responses should start with QUERY_RESP_STATE. Subsequent
271 	 * processing of the event will change this state.
272 	 */
273 	enum iter_state state;
274 
275 	/**
276 	 * Final state for the iterator module.
277 	 * This is the state that responses should be routed to once the
278 	 * response is final. For externally initiated queries, this
279 	 * will be FINISHED_STATE, locally initiated queries will have
280 	 * different final states.
281 	 */
282 	enum iter_state final_state;
283 
284 	/**
285 	 * The depth of this query, this means the depth of recursion.
286 	 * This address is needed for another query, which is an address
287 	 * needed for another query, etc. Original client query has depth 0.
288 	 */
289 	int depth;
290 
291 	/**
292 	 * The response
293 	 */
294 	struct dns_msg* response;
295 
296 	/**
297 	 * This is a list of RRsets that must be prepended to the
298 	 * ANSWER section of a response before being sent upstream.
299 	 */
300 	struct iter_prep_list* an_prepend_list;
301 	/** Last element of the prepend list */
302 	struct iter_prep_list* an_prepend_last;
303 
304 	/**
305 	 * This is the list of RRsets that must be prepended to the
306 	 * AUTHORITY section of the response before being sent upstream.
307 	 */
308 	struct iter_prep_list* ns_prepend_list;
309 	/** Last element of the authority prepend list */
310 	struct iter_prep_list* ns_prepend_last;
311 
312 	/** query name used for chasing the results. Initially the same as
313 	 * the state qinfo, but after CNAMEs this will be different.
314 	 * The query info used to elicit the results needed. */
315 	struct query_info qchase;
316 	/** query flags to use when chasing the answer (i.e. RD flag) */
317 	uint16_t chase_flags;
318 	/** true if we set RD bit because of last resort recursion lame query*/
319 	int chase_to_rd;
320 
321 	/**
322 	 * This is the current delegation point for an in-progress query. This
323 	 * object retains state as to which delegation targets need to be
324 	 * (sub)queried for vs which ones have already been visited.
325 	 */
326 	struct delegpt* dp;
327 
328 	/** state for 0x20 fallback when capsfail happens, 0 not a fallback */
329 	int caps_fallback;
330 	/** state for capsfail: current server number to try */
331 	size_t caps_server;
332 	/** state for capsfail: stored query for comparisons. Can be NULL if
333 	 * no response had been seen prior to starting the fallback. */
334 	struct reply_info* caps_reply;
335 	struct dns_msg* caps_response;
336 
337 	/** Current delegation message - returned for non-RD queries */
338 	struct dns_msg* deleg_msg;
339 
340 	/** number of outstanding target sub queries */
341 	int num_target_queries;
342 
343 	/** outstanding direct queries */
344 	int num_current_queries;
345 
346 	/** the number of times this query has been restarted. */
347 	int query_restart_count;
348 
349 	/** the number of times this query has followed a referral. */
350 	int referral_count;
351 
352 	/** number of queries fired off */
353 	int sent_count;
354 
355 	/** malloced-array shared with this query and its subqueries. It keeps
356 	 * track of the defined enum target_count_variables counters. */
357 	int* target_count;
358 
359 	/** number of target lookups per delegation point. Reset to 0 after
360 	 * receiving referral answer. Not shared with subqueries. */
361 	int dp_target_count;
362 
363 	/** Delegation point that triggered the NXNS fallback; shared with
364 	 * this query and its subqueries, count-referenced by the reference
365 	 * counter in target_count.
366 	 * This also marks the fallback activation. */
367 	uint8_t** nxns_dp;
368 
369 	/** if true, already tested for ratelimiting and passed the test */
370 	int ratelimit_ok;
371 
372 	/**
373 	 * The query must store NS records from referrals as parentside RRs
374 	 * Enabled once it hits resolution problems, to throttle retries.
375 	 * If enabled it is the pointer to the old delegation point with
376 	 * the old retry counts for bad-nameserver-addresses.
377 	 */
378 	struct delegpt* store_parent_NS;
379 
380 	/**
381 	 * The query is for parent-side glue(A or AAAA) for a nameserver.
382 	 * If the item is seen as glue in a referral, and pside_glue is NULL,
383 	 * then it is stored in pside_glue for later.
384 	 * If it was never seen, at the end, then a negative caching element
385 	 * must be created.
386 	 * The (data or negative) RR cache element then throttles retries.
387 	 */
388 	int query_for_pside_glue;
389 	/** the parent-side-glue element (NULL if none, its first match) */
390 	struct ub_packed_rrset_key* pside_glue;
391 
392 	/** If nonNULL we are walking upwards from DS query to find NS */
393 	uint8_t* dsns_point;
394 	/** length of the dname in dsns_point */
395 	size_t dsns_point_len;
396 
397 	/**
398 	 * expected dnssec information for this iteration step.
399 	 * If dnssec rrsigs are expected and not given, the server is marked
400 	 * lame (dnssec-lame).
401 	 */
402 	int dnssec_expected;
403 
404 	/**
405 	 * We are expecting dnssec information, but we also know the server
406 	 * is DNSSEC lame.  The response need not be marked dnssec-lame again.
407 	 */
408 	int dnssec_lame_query;
409 
410 	/**
411 	 * This is flag that, if true, means that this event is
412 	 * waiting for a stub priming query.
413 	 */
414 	int wait_priming_stub;
415 
416 	/**
417 	 * This is a flag that, if true, means that this query is
418 	 * for (re)fetching glue from a zone. Since the address should
419 	 * have been glue, query again to the servers that should have
420 	 * been returning it as glue.
421 	 * The delegation point must be set to the one that should *not*
422 	 * be used when creating the state. A higher one will be attempted.
423 	 */
424 	int refetch_glue;
425 
426 	/**
427 	 * This flag detects that a completely empty nodata was received,
428 	 * already so that it is accepted later. */
429 	int empty_nodata_found;
430 
431 	/** list of pending queries to authoritative servers. */
432 	struct outbound_list outlist;
433 
434 	/** QNAME minimisation state, RFC9156 */
435 	enum minimisation_state minimisation_state;
436 
437 	/** State for capsfail: QNAME minimisation state for comparisons. */
438 	enum minimisation_state caps_minimisation_state;
439 
440 	/**
441 	 * The query info that is sent upstream. Will be a subset of qchase
442 	 * when qname minimisation is enabled.
443 	 */
444 	struct query_info qinfo_out;
445 
446 	/**
447 	 * Count number of QNAME minimisation iterations. Used to limit number of
448 	 * outgoing queries when QNAME minimisation is enabled.
449 	 */
450 	int minimise_count;
451 
452 	/**
453 	 * Count number of time-outs. Used to prevent resolving failures when
454 	 * the QNAME minimisation QTYPE is blocked. Used to determine if
455 	 * capsforid fallback should be started.*/
456 	int timeout_count;
457 
458 	/** True if the current response is from auth_zone */
459 	int auth_zone_response;
460 	/** True if the auth_zones should not be consulted for the query */
461 	int auth_zone_avoid;
462 	/** true if there have been scrubbing failures of reply packets */
463 	int scrub_failures;
464 	/** true if there have been parse failures of reply packets */
465 	int parse_failures;
466 	/** a failure printout address for last received answer */
467 	union {
468 		struct in_addr in;
469 #ifdef AF_INET6
470 		struct in6_addr in6;
471 #endif
472 	} fail_addr;
473 	/** which fail_addr, 0 is nothing, 4 or 6 */
474 	int fail_addr_type;
475 };
476 
477 /**
478  * List of prepend items
479  */
480 struct iter_prep_list {
481 	/** next in list */
482 	struct iter_prep_list* next;
483 	/** rrset */
484 	struct ub_packed_rrset_key* rrset;
485 };
486 
487 /**
488  * Get the iterator function block.
489  * @return: function block with function pointers to iterator methods.
490  */
491 struct module_func_block* iter_get_funcblock(void);
492 
493 /**
494  * Get iterator state as a string
495  * @param state: to convert
496  * @return constant string that is printable.
497  */
498 const char* iter_state_to_string(enum iter_state state);
499 
500 /**
501  * See if iterator state is a response state
502  * @param s: to inspect
503  * @return true if response state.
504  */
505 int iter_state_is_responsestate(enum iter_state s);
506 
507 /** iterator init */
508 int iter_init(struct module_env* env, int id);
509 
510 /** iterator deinit */
511 void iter_deinit(struct module_env* env, int id);
512 
513 /** iterator operate on a query */
514 void iter_operate(struct module_qstate* qstate, enum module_ev event, int id,
515 	struct outbound_entry* outbound);
516 
517 /**
518  * Return priming query results to interested super querystates.
519  *
520  * Sets the delegation point and delegation message (not nonRD queries).
521  * This is a callback from walk_supers.
522  *
523  * @param qstate: query state that finished.
524  * @param id: module id.
525  * @param super: the qstate to inform.
526  */
527 void iter_inform_super(struct module_qstate* qstate, int id,
528 	struct module_qstate* super);
529 
530 /** iterator cleanup query state */
531 void iter_clear(struct module_qstate* qstate, int id);
532 
533 /** iterator alloc size routine */
534 size_t iter_get_mem(struct module_env* env, int id);
535 
536 #endif /* ITERATOR_ITERATOR_H */
537