xref: /openbsd/usr.sbin/nsd/query.c (revision a6445c1d)
1 /*
2  * query.c -- nsd(8) the resolver.
3  *
4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
5  *
6  * See LICENSE for the license.
7  *
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <netinet/in.h>
15 #include <arpa/inet.h>
16 #include <assert.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <limits.h>
20 #include <stddef.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <time.h>
25 #include <unistd.h>
26 #include <netdb.h>
27 
28 #include "answer.h"
29 #include "axfr.h"
30 #include "dns.h"
31 #include "dname.h"
32 #include "nsd.h"
33 #include "namedb.h"
34 #include "query.h"
35 #include "util.h"
36 #include "options.h"
37 #include "nsec3.h"
38 #include "tsig.h"
39 
40 /* [Bug #253] Adding unnecessary NS RRset may lead to undesired truncation.
41  * This function determines if the final response packet needs the NS RRset
42  * included. Currently, it will only return negative if QTYPE == DNSKEY|DS.
43  * This way, resolvers won't fallback to TCP unnecessarily when priming
44  * trust anchors.
45  */
46 static int answer_needs_ns(struct query  *query);
47 
48 static int add_rrset(struct query  *query,
49 		     answer_type    *answer,
50 		     rr_section_type section,
51 		     domain_type    *owner,
52 		     rrset_type     *rrset);
53 
54 static void answer_authoritative(struct nsd	  *nsd,
55 				 struct query     *q,
56 				 answer_type      *answer,
57 				 size_t            domain_number,
58 				 int               exact,
59 				 domain_type      *closest_match,
60 				 domain_type      *closest_encloser,
61 				 const dname_type *qname);
62 
63 static void answer_lookup_zone(struct nsd *nsd, struct query *q,
64 			       answer_type *answer, size_t domain_number,
65 			       int exact, domain_type *closest_match,
66 			       domain_type *closest_encloser,
67 			       const dname_type *qname);
68 
69 void
70 query_put_dname_offset(struct query *q, domain_type *domain, uint16_t offset)
71 {
72 	assert(q);
73 	assert(domain);
74 	assert(domain->number > 0);
75 
76 	if (offset > MAX_COMPRESSION_OFFSET)
77 		return;
78 	if (q->compressed_dname_count >= MAX_COMPRESSED_DNAMES)
79 		return;
80 
81 	q->compressed_dname_offsets[domain->number] = offset;
82 	q->compressed_dnames[q->compressed_dname_count] = domain;
83 	++q->compressed_dname_count;
84 }
85 
86 void
87 query_clear_dname_offsets(struct query *q, size_t max_offset)
88 {
89 	while (q->compressed_dname_count > 0
90 	       && (q->compressed_dname_offsets[q->compressed_dnames[q->compressed_dname_count - 1]->number]
91 		   >= max_offset))
92 	{
93 		q->compressed_dname_offsets[q->compressed_dnames[q->compressed_dname_count - 1]->number] = 0;
94 		--q->compressed_dname_count;
95 	}
96 }
97 
98 void
99 query_clear_compression_tables(struct query *q)
100 {
101 	uint16_t i;
102 
103 	for (i = 0; i < q->compressed_dname_count; ++i) {
104 		assert(q->compressed_dnames);
105 		q->compressed_dname_offsets[q->compressed_dnames[i]->number] = 0;
106 	}
107 	q->compressed_dname_count = 0;
108 }
109 
110 void
111 query_add_compression_domain(struct query *q, domain_type *domain, uint16_t offset)
112 {
113 	while (domain->parent) {
114 		DEBUG(DEBUG_NAME_COMPRESSION, 2,
115 		      (LOG_INFO, "query dname: %s, number: %lu, offset: %u\n",
116 		       domain_to_string(domain),
117 		       (unsigned long) domain->number,
118 		       offset));
119 		query_put_dname_offset(q, domain, offset);
120 		offset += label_length(dname_name(domain_dname(domain))) + 1;
121 		domain = domain->parent;
122 	}
123 }
124 
125 /*
126  * Generate an error response with the specified RCODE.
127  */
128 query_state_type
129 query_error (struct query *q, nsd_rc_type rcode)
130 {
131 	if (rcode == NSD_RC_DISCARD) {
132 		return QUERY_DISCARDED;
133 	}
134 
135 	buffer_clear(q->packet);
136 
137 	QR_SET(q->packet);	   /* This is an answer.  */
138 	RCODE_SET(q->packet, (int) rcode); /* Error code.  */
139 
140 	/* Truncate the question as well... */
141 	QDCOUNT_SET(q->packet, 0);
142 	ANCOUNT_SET(q->packet, 0);
143 	NSCOUNT_SET(q->packet, 0);
144 	ARCOUNT_SET(q->packet, 0);
145 	buffer_set_position(q->packet, QHEADERSZ);
146 	return QUERY_PROCESSED;
147 }
148 
149 static query_state_type
150 query_formerr (struct query *query)
151 {
152 	int opcode = OPCODE(query->packet);
153 	FLAGS_SET(query->packet, FLAGS(query->packet) & 0x0100U);
154 			/* Preserve the RD flag. Clear the rest. */
155 	OPCODE_SET(query->packet, opcode);
156 	return query_error(query, NSD_RC_FORMAT);
157 }
158 
159 static void
160 query_cleanup(void *data)
161 {
162 	query_type *query = (query_type *) data;
163 	region_destroy(query->region);
164 }
165 
166 query_type *
167 query_create(region_type *region, uint16_t *compressed_dname_offsets,
168 	size_t compressed_dname_size)
169 {
170 	query_type *query
171 		= (query_type *) region_alloc_zero(region, sizeof(query_type));
172 	/* create region with large block size, because the initial chunk
173 	   saves many mallocs in the server */
174 	query->region = region_create_custom(xalloc, free, 16384, 16384/8, 32, 0);
175 	query->compressed_dname_offsets = compressed_dname_offsets;
176 	query->packet = buffer_create(region, QIOBUFSZ);
177 	region_add_cleanup(region, query_cleanup, query);
178 	query->compressed_dname_offsets_size = compressed_dname_size;
179 	tsig_create_record(&query->tsig, region);
180 	query->tsig_prepare_it = 1;
181 	query->tsig_update_it = 1;
182 	query->tsig_sign_it = 1;
183 	return query;
184 }
185 
186 void
187 query_reset(query_type *q, size_t maxlen, int is_tcp)
188 {
189 	/*
190 	 * As long as less than 4Kb (region block size) has been used,
191 	 * this call to free_all is free, the block is saved for re-use,
192 	 * so no malloc() or free() calls are done.
193 	 * at present use of the region is for:
194 	 *   o query qname dname_type (255 max).
195 	 *   o wildcard expansion domain_type (7*ptr+u32+2bytes)+(5*ptr nsec3)
196 	 *   o wildcard expansion for additional section domain_type.
197 	 *   o nsec3 hashed name(s) (3 dnames for a nonexist_proof,
198 	 *     one proof per wildcard and for nx domain).
199 	 */
200 	region_free_all(q->region);
201 	q->addrlen = sizeof(q->addr);
202 	q->maxlen = maxlen;
203 	q->reserved_space = 0;
204 	buffer_clear(q->packet);
205 	edns_init_record(&q->edns);
206 	tsig_init_record(&q->tsig, NULL, NULL);
207 	q->tsig_prepare_it = 1;
208 	q->tsig_update_it = 1;
209 	q->tsig_sign_it = 1;
210 	q->tcp = is_tcp;
211 	q->qname = NULL;
212 	q->qtype = 0;
213 	q->qclass = 0;
214 	q->zone = NULL;
215 	q->opcode = 0;
216 	q->cname_count = 0;
217 	q->delegation_domain = NULL;
218 	q->delegation_rrset = NULL;
219 	q->compressed_dname_count = 0;
220 	q->number_temporary_domains = 0;
221 
222 	q->axfr_is_done = 0;
223 	q->axfr_zone = NULL;
224 	q->axfr_current_domain = NULL;
225 	q->axfr_current_rrset = NULL;
226 	q->axfr_current_rr = 0;
227 
228 #ifdef RATELIMIT
229 	q->wildcard_domain = NULL;
230 #endif
231 }
232 
233 /* get a temporary domain number (or 0=failure) */
234 static domain_type*
235 query_get_tempdomain(struct query *q)
236 {
237 	static domain_type d[EXTRA_DOMAIN_NUMBERS];
238 	if(q->number_temporary_domains >= EXTRA_DOMAIN_NUMBERS)
239 		return 0;
240 	q->number_temporary_domains ++;
241 	memset(&d[q->number_temporary_domains-1], 0, sizeof(domain_type));
242 	d[q->number_temporary_domains-1].number = q->compressed_dname_offsets_size +
243 		q->number_temporary_domains - 1;
244 	return &d[q->number_temporary_domains-1];
245 }
246 
247 static void
248 query_addtxt(struct query  *q,
249 	     const uint8_t *dname,
250 	     uint16_t       klass,
251 	     uint32_t       ttl,
252 	     const char    *txt)
253 {
254 	size_t txt_length = strlen(txt);
255 	uint8_t len = (uint8_t) txt_length;
256 
257 	assert(txt_length <= UCHAR_MAX);
258 
259 	/* Add the dname */
260 	if (dname >= buffer_begin(q->packet)
261 	    && dname <= buffer_current(q->packet))
262 	{
263 		buffer_write_u16(q->packet,
264 				 0xc000 | (dname - buffer_begin(q->packet)));
265 	} else {
266 		buffer_write(q->packet, dname + 1, *dname);
267 	}
268 
269 	buffer_write_u16(q->packet, TYPE_TXT);
270 	buffer_write_u16(q->packet, klass);
271 	buffer_write_u32(q->packet, ttl);
272 	buffer_write_u16(q->packet, len + 1);
273 	buffer_write_u8(q->packet, len);
274 	buffer_write(q->packet, txt, len);
275 }
276 
277 /*
278  * Parse the question section of a query.  The normalized query name
279  * is stored in QUERY->name, the class in QUERY->klass, and the type
280  * in QUERY->type.
281  */
282 static int
283 process_query_section(query_type *query)
284 {
285 	uint8_t qnamebuf[MAXDOMAINLEN];
286 
287 	buffer_set_position(query->packet, QHEADERSZ);
288 	/* Lets parse the query name and convert it to lower case.  */
289 	if(!packet_read_query_section(query->packet, qnamebuf,
290 		&query->qtype, &query->qclass))
291 		return 0;
292 	query->qname = dname_make(query->region, qnamebuf, 1);
293 	query->opcode = OPCODE(query->packet);
294 	return 1;
295 }
296 
297 
298 /*
299  * Process an optional EDNS OPT record.  Sets QUERY->EDNS to 0 if
300  * there was no EDNS record, to -1 if there was an invalid or
301  * unsupported EDNS record, and to 1 otherwise.  Updates QUERY->MAXLEN
302  * if the EDNS record specifies a maximum supported response length.
303  *
304  * Return NSD_RC_FORMAT on failure, NSD_RC_OK on success.
305  */
306 static nsd_rc_type
307 process_edns(nsd_type* nsd, struct query *q)
308 {
309 	if (q->edns.status == EDNS_ERROR) {
310 		/* The only error is VERSION not implemented */
311 		return NSD_RC_FORMAT;
312 	}
313 
314 	if (q->edns.status == EDNS_OK) {
315 		/* Only care about UDP size larger than normal... */
316 		if (!q->tcp && q->edns.maxlen > UDP_MAX_MESSAGE_LEN) {
317 			size_t edns_size;
318 #if defined(INET6)
319 			if (q->addr.ss_family == AF_INET6) {
320 				edns_size = nsd->ipv6_edns_size;
321 			} else
322 #endif
323 			edns_size = nsd->ipv4_edns_size;
324 
325 			if (q->edns.maxlen < edns_size) {
326 				q->maxlen = q->edns.maxlen;
327 			} else {
328 				q->maxlen = edns_size;
329 			}
330 
331 #if defined(INET6) && !defined(IPV6_USE_MIN_MTU) && !defined(IPV6_MTU)
332 			/*
333 			 * Use IPv6 minimum MTU to avoid sending
334 			 * packets that are too large for some links.
335 			 * IPv6 will not automatically fragment in
336 			 * this case (unlike IPv4).
337 			 */
338 			if (q->addr.ss_family == AF_INET6
339 			    && q->maxlen > IPV6_MIN_MTU)
340 			{
341 				q->maxlen = IPV6_MIN_MTU;
342 			}
343 #endif
344 		}
345 
346 		/* Strip the OPT resource record off... */
347 		buffer_set_position(q->packet, q->edns.position);
348 		buffer_set_limit(q->packet, q->edns.position);
349 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) - 1);
350 	}
351 	return NSD_RC_OK;
352 }
353 
354 /*
355  * Processes TSIG.
356  * Sets error when tsig does not verify on the query.
357  */
358 static nsd_rc_type
359 process_tsig(struct query* q)
360 {
361 	if(q->tsig.status == TSIG_ERROR)
362 		return NSD_RC_FORMAT;
363 	if(q->tsig.status == TSIG_OK) {
364 		if(!tsig_from_query(&q->tsig)) {
365 			char a[128];
366 			addr2str(&q->addr, a, sizeof(a));
367 			log_msg(LOG_ERR, "query: bad tsig (%s) for key %s from %s",
368 				tsig_error(q->tsig.error_code),
369 				dname_to_string(q->tsig.key_name, NULL), a);
370 			return NSD_RC_NOTAUTH;
371 		}
372 		buffer_set_limit(q->packet, q->tsig.position);
373 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) - 1);
374 		tsig_prepare(&q->tsig);
375 		tsig_update(&q->tsig, q->packet, buffer_limit(q->packet));
376 		if(!tsig_verify(&q->tsig)) {
377 			char a[128];
378 			addr2str(&q->addr, a, sizeof(a));
379 			log_msg(LOG_ERR, "query: bad tsig signature for key %s from %s",
380 				dname_to_string(q->tsig.key->name, NULL), a);
381 			return NSD_RC_NOTAUTH;
382 		}
383 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "query good tsig signature for %s",
384 			dname_to_string(q->tsig.key->name, NULL)));
385 	}
386 	return NSD_RC_OK;
387 }
388 
389 /*
390  * Check notify acl and forward to xfrd (or return an error).
391  */
392 static query_state_type
393 answer_notify(struct nsd* nsd, struct query *query)
394 {
395 	int acl_num, acl_num_xfr;
396 	acl_options_t *why;
397 	nsd_rc_type rc;
398 
399 	zone_options_t* zone_opt;
400 	DEBUG(DEBUG_XFRD,1, (LOG_INFO, "got notify %s processing acl",
401 		dname_to_string(query->qname, NULL)));
402 
403 	zone_opt = zone_options_find(nsd->options, query->qname);
404 	if(!zone_opt)
405 		return query_error(query, NSD_RC_NXDOMAIN);
406 
407 	if(!nsd->this_child) /* we are in debug mode or something */
408 		return query_error(query, NSD_RC_SERVFAIL);
409 
410 	if(!tsig_find_rr(&query->tsig, query->packet)) {
411 		DEBUG(DEBUG_XFRD,2, (LOG_ERR, "bad tsig RR format"));
412 		return query_error(query, NSD_RC_FORMAT);
413 	}
414 	rc = process_tsig(query);
415 	if(rc != NSD_RC_OK)
416 		return query_error(query, rc);
417 
418 	/* check if it passes acl */
419 	if((acl_num = acl_check_incoming(zone_opt->pattern->allow_notify, query,
420 		&why)) != -1)
421 	{
422 		sig_atomic_t mode = NSD_PASS_TO_XFRD;
423 		int s = nsd->this_child->parent_fd;
424 		uint16_t sz;
425 		uint32_t acl_send = htonl(acl_num);
426 		uint32_t acl_xfr;
427 		size_t pos;
428 
429 		/* Find priority candidate for request XFR. -1 if no match */
430 		acl_num_xfr = acl_check_incoming(
431 			zone_opt->pattern->request_xfr, query, NULL);
432 
433 		acl_xfr = htonl(acl_num_xfr);
434 
435 		assert(why);
436 		DEBUG(DEBUG_XFRD,1, (LOG_INFO, "got notify %s passed acl %s %s",
437 			dname_to_string(query->qname, NULL),
438 			why->ip_address_spec,
439 			why->nokey?"NOKEY":
440 			(why->blocked?"BLOCKED":why->key_name)));
441 		sz = buffer_limit(query->packet);
442 		if(buffer_limit(query->packet) > MAX_PACKET_SIZE)
443 			return query_error(query, NSD_RC_SERVFAIL);
444 		/* forward to xfrd for processing
445 		   Note. Blocking IPC I/O, but acl is OK. */
446 		sz = htons(sz);
447 		if(!write_socket(s, &mode, sizeof(mode)) ||
448 			!write_socket(s, &sz, sizeof(sz)) ||
449 			!write_socket(s, buffer_begin(query->packet),
450 				buffer_limit(query->packet)) ||
451 			!write_socket(s, &acl_send, sizeof(acl_send)) ||
452 			!write_socket(s, &acl_xfr, sizeof(acl_xfr))) {
453 			log_msg(LOG_ERR, "error in IPC notify server2main, %s",
454 				strerror(errno));
455 			return query_error(query, NSD_RC_SERVFAIL);
456 		}
457 
458 		/* create notify reply - keep same query contents */
459 		QR_SET(query->packet);         /* This is an answer.  */
460 		AA_SET(query->packet);	   /* we are authoritative. */
461 		ANCOUNT_SET(query->packet, 0);
462 		NSCOUNT_SET(query->packet, 0);
463 		ARCOUNT_SET(query->packet, 0);
464 		RCODE_SET(query->packet, RCODE_OK); /* Error code.  */
465 		/* position is right after the query */
466 		pos = buffer_position(query->packet);
467 		buffer_clear(query->packet);
468 		buffer_set_position(query->packet, pos);
469 		if(verbosity >= 1) {
470 			char address[128];
471 			addr2str(&query->addr, address, sizeof(address));
472 			VERBOSITY(2, (LOG_INFO, "notify for %s from %s",
473 				dname_to_string(query->qname, NULL), address));
474 		}
475 		/* tsig is added in add_additional later (if needed) */
476 		return QUERY_PROCESSED;
477 	}
478 
479 	if (verbosity >= 1) {
480 		char address[128];
481 		addr2str(&query->addr, address, sizeof(address));
482 		VERBOSITY(1, (LOG_INFO, "notify for zone %s from client %s refused, %s%s",
483 			dname_to_string(query->qname, NULL),
484 			address,
485 			why?why->key_name:"no acl matches",
486 			why?why->ip_address_spec:"."));
487 	}
488 
489 	return query_error(query, NSD_RC_REFUSE);
490 }
491 
492 
493 /*
494  * Answer a query in the CHAOS class.
495  */
496 static query_state_type
497 answer_chaos(struct nsd *nsd, query_type *q)
498 {
499 	AA_CLR(q->packet);
500 	switch (q->qtype) {
501 	case TYPE_ANY:
502 	case TYPE_TXT:
503 		if ((q->qname->name_size == 11
504 		     && memcmp(dname_name(q->qname), "\002id\006server", 11) == 0) ||
505 		    (q->qname->name_size ==  15
506 		     && memcmp(dname_name(q->qname), "\010hostname\004bind", 15) == 0))
507 		{
508 			/* Add ID */
509 			query_addtxt(q,
510 				     buffer_begin(q->packet) + QHEADERSZ,
511 				     CLASS_CH,
512 				     0,
513 				     nsd->identity);
514 			ANCOUNT_SET(q->packet, ANCOUNT(q->packet) + 1);
515 		} else if ((q->qname->name_size == 16
516 			    && memcmp(dname_name(q->qname), "\007version\006server", 16) == 0) ||
517 			   (q->qname->name_size == 14
518 			    && memcmp(dname_name(q->qname), "\007version\004bind", 14) == 0))
519 		{
520 			if(!nsd->options->hide_version) {
521 				/* Add version */
522 				query_addtxt(q,
523 				     buffer_begin(q->packet) + QHEADERSZ,
524 				     CLASS_CH,
525 				     0,
526 				     nsd->version);
527 				ANCOUNT_SET(q->packet, ANCOUNT(q->packet) + 1);
528 			} else {
529 				RCODE_SET(q->packet, RCODE_REFUSE);
530 			}
531 		}
532 		break;
533 	default:
534 		RCODE_SET(q->packet, RCODE_REFUSE);
535 		break;
536 	}
537 
538 	return QUERY_PROCESSED;
539 }
540 
541 
542 /*
543  * Find the covering NSEC for a non-existent domain name.  Normally
544  * the NSEC will be located at CLOSEST_MATCH, except when it is an
545  * empty non-terminal.  In this case the NSEC may be located at the
546  * previous domain name (in canonical ordering).
547  */
548 static domain_type *
549 find_covering_nsec(domain_type *closest_match,
550 		   zone_type   *zone,
551 		   rrset_type **nsec_rrset)
552 {
553 	assert(closest_match);
554 	assert(nsec_rrset);
555 
556 	/* loop away temporary created domains. For real ones it is &RBTREE_NULL */
557 	while (closest_match->rnode == NULL)
558 		closest_match = closest_match->parent;
559 	while (closest_match) {
560 		*nsec_rrset = domain_find_rrset(closest_match, zone, TYPE_NSEC);
561 		if (*nsec_rrset) {
562 			return closest_match;
563 		}
564 		if (closest_match == zone->apex) {
565 			/* Don't look outside the current zone.  */
566 			return NULL;
567 		}
568 		closest_match = domain_previous(closest_match);
569 	}
570 	return NULL;
571 }
572 
573 
574 struct additional_rr_types
575 {
576 	uint16_t        rr_type;
577 	rr_section_type rr_section;
578 };
579 
580 struct additional_rr_types default_additional_rr_types[] = {
581 	{ TYPE_A, ADDITIONAL_A_SECTION },
582 	{ TYPE_AAAA, ADDITIONAL_AAAA_SECTION },
583 	{ 0, (rr_section_type) 0 }
584 };
585 
586 struct additional_rr_types rt_additional_rr_types[] = {
587 	{ TYPE_A, ADDITIONAL_A_SECTION },
588 	{ TYPE_AAAA, ADDITIONAL_AAAA_SECTION },
589 	{ TYPE_X25, ADDITIONAL_OTHER_SECTION },
590 	{ TYPE_ISDN, ADDITIONAL_OTHER_SECTION },
591 	{ 0, (rr_section_type) 0 }
592 };
593 
594 static void
595 add_additional_rrsets(struct query *query, answer_type *answer,
596 		      rrset_type *master_rrset, size_t rdata_index,
597 		      int allow_glue, struct additional_rr_types types[])
598 {
599 	size_t i;
600 
601 	assert(query);
602 	assert(answer);
603 	assert(master_rrset);
604 	assert(rdata_atom_is_domain(rrset_rrtype(master_rrset), rdata_index));
605 
606 	for (i = 0; i < master_rrset->rr_count; ++i) {
607 		int j;
608 		domain_type *additional = rdata_atom_domain(master_rrset->rrs[i].rdatas[rdata_index]);
609 		domain_type *match = additional;
610 
611 		assert(additional);
612 
613 		if (!allow_glue && domain_is_glue(match, query->zone))
614 			continue;
615 
616 		/*
617 		 * Check to see if we need to generate the dependent
618 		 * based on a wildcard domain.
619 		 */
620 		while (!match->is_existing) {
621 			match = match->parent;
622 		}
623 		if (additional != match && domain_wildcard_child(match)) {
624 			domain_type *wildcard_child = domain_wildcard_child(match);
625 			domain_type *temp = (domain_type *) region_alloc(
626 				query->region, sizeof(domain_type));
627 			temp->rnode = NULL;
628 			temp->dname = additional->dname;
629 			temp->number = additional->number;
630 			temp->parent = match;
631 			temp->wildcard_child_closest_match = temp;
632 			temp->rrsets = wildcard_child->rrsets;
633 			temp->is_existing = wildcard_child->is_existing;
634 			additional = temp;
635 		}
636 
637 		for (j = 0; types[j].rr_type != 0; ++j) {
638 			rrset_type *rrset = domain_find_rrset(
639 				additional, query->zone, types[j].rr_type);
640 			if (rrset) {
641 				answer_add_rrset(answer, types[j].rr_section,
642 						 additional, rrset);
643 			}
644 		}
645 	}
646 }
647 
648 static int
649 answer_needs_ns(struct query* query)
650 {
651 	assert(query);
652 	/* Currently, only troublesome for DNSKEY and DS,
653          * cuz their RRSETs are quite large. */
654 	return (query->qtype != TYPE_DNSKEY && query->qtype != TYPE_DS);
655 }
656 
657 static int
658 add_rrset(struct query   *query,
659 	  answer_type    *answer,
660 	  rr_section_type section,
661 	  domain_type    *owner,
662 	  rrset_type     *rrset)
663 {
664 	int result;
665 
666 	assert(query);
667 	assert(answer);
668 	assert(owner);
669 	assert(rrset);
670 	assert(rrset_rrclass(rrset) == CLASS_IN);
671 
672 	result = answer_add_rrset(answer, section, owner, rrset);
673 	switch (rrset_rrtype(rrset)) {
674 	case TYPE_NS:
675 		add_additional_rrsets(query, answer, rrset, 0, 1,
676 				      default_additional_rr_types);
677 		break;
678 	case TYPE_MB:
679 		add_additional_rrsets(query, answer, rrset, 0, 0,
680 				      default_additional_rr_types);
681 		break;
682 	case TYPE_MX:
683 	case TYPE_KX:
684 		add_additional_rrsets(query, answer, rrset, 1, 0,
685 				      default_additional_rr_types);
686 		break;
687 	case TYPE_RT:
688 		add_additional_rrsets(query, answer, rrset, 1, 0,
689 				      rt_additional_rr_types);
690 		break;
691 	default:
692 		break;
693 	}
694 
695 	return result;
696 }
697 
698 
699 /* returns 0 on error, or the domain number for to_name.
700    from_name is changes to to_name by the DNAME rr.
701    DNAME rr is from src to dest.
702    closest encloser encloses the to_name. */
703 static size_t
704 query_synthesize_cname(struct query* q, struct answer* answer, const dname_type* from_name,
705 	const dname_type* to_name, domain_type* src, domain_type* to_closest_encloser,
706 	domain_type** to_closest_match)
707 {
708 	/* add temporary domains for from_name and to_name and all
709 	   their (not allocated yet) parents */
710 	/* any domains below src are not_existing (because of DNAME at src) */
711 	int i;
712 	domain_type* cname_domain;
713 	domain_type* cname_dest;
714 	rrset_type* rrset;
715 
716 	/* allocate source part */
717 	domain_type* lastparent = src;
718 	assert(q && answer && from_name && to_name && src && to_closest_encloser);
719 	assert(to_closest_match);
720 	for(i=0; i < from_name->label_count - domain_dname(src)->label_count; i++)
721 	{
722 		domain_type* newdom = query_get_tempdomain(q);
723 		if(!newdom)
724 			return 0;
725 		newdom->is_existing = 1;
726 		newdom->parent = lastparent;
727 		newdom->dname
728 			= dname_partial_copy(q->region,
729 			from_name, domain_dname(src)->label_count + i + 1);
730 		if(dname_compare(domain_dname(newdom), q->qname) == 0) {
731 			/* 0 good for query name, otherwise new number */
732 			newdom->number = 0;
733 		}
734 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "created temp domain src %d. %s nr %d", i,
735 			domain_to_string(newdom), (int)newdom->number));
736 		lastparent = newdom;
737 	}
738 	cname_domain = lastparent;
739 
740 	/* allocate dest part */
741 	lastparent = to_closest_encloser;
742 	for(i=0; i < to_name->label_count - domain_dname(to_closest_encloser)->label_count;
743 		i++)
744 	{
745 		domain_type* newdom = query_get_tempdomain(q);
746 		if(!newdom)
747 			return 0;
748 		newdom->is_existing = 0;
749 		newdom->parent = lastparent;
750 		newdom->dname
751 			= dname_partial_copy(q->region,
752 			to_name, domain_dname(to_closest_encloser)->label_count + i + 1);
753 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "created temp domain dest %d. %s nr %d", i,
754 			domain_to_string(newdom), (int)newdom->number));
755 		lastparent = newdom;
756 	}
757 	cname_dest = lastparent;
758 	*to_closest_match = cname_dest;
759 
760 	/* allocate the CNAME RR */
761 	rrset = (rrset_type*) region_alloc(q->region, sizeof(rrset_type));
762 	memset(rrset, 0, sizeof(rrset_type));
763 	rrset->zone = q->zone;
764 	rrset->rr_count = 1;
765 	rrset->rrs = (rr_type*) region_alloc(q->region, sizeof(rr_type));
766 	memset(rrset->rrs, 0, sizeof(rr_type));
767 	rrset->rrs->owner = cname_domain;
768 	rrset->rrs->ttl = 0;
769 	rrset->rrs->type = TYPE_CNAME;
770 	rrset->rrs->klass = CLASS_IN;
771 	rrset->rrs->rdata_count = 1;
772 	rrset->rrs->rdatas = (rdata_atom_type*)region_alloc(q->region,
773 		sizeof(rdata_atom_type));
774 	rrset->rrs->rdatas->domain = cname_dest;
775 
776 	if(!add_rrset(q, answer, ANSWER_SECTION, cname_domain, rrset)) {
777 		log_msg(LOG_ERR, "could not add synthesized CNAME rrset to packet");
778 	}
779 
780 	return cname_dest->number;
781 }
782 
783 /*
784  * Answer delegation information.
785  *
786  * DNSSEC: Include the DS RRset if present.  Otherwise include an NSEC
787  * record proving the DS RRset does not exist.
788  */
789 static void
790 answer_delegation(query_type *query, answer_type *answer)
791 {
792 	assert(answer);
793 	assert(query->delegation_domain);
794 	assert(query->delegation_rrset);
795 
796 	if (query->cname_count == 0) {
797 		AA_CLR(query->packet);
798 	} else {
799 		AA_SET(query->packet);
800 	}
801 
802 	add_rrset(query,
803 		  answer,
804 		  AUTHORITY_SECTION,
805 		  query->delegation_domain,
806 		  query->delegation_rrset);
807 	if (query->edns.dnssec_ok && zone_is_secure(query->zone)) {
808 		rrset_type *rrset;
809 		if ((rrset = domain_find_rrset(query->delegation_domain, query->zone, TYPE_DS))) {
810 			add_rrset(query, answer, AUTHORITY_SECTION,
811 				  query->delegation_domain, rrset);
812 #ifdef NSEC3
813 		} else if (query->zone->nsec3_param) {
814 			nsec3_answer_delegation(query, answer);
815 #endif
816 		} else if ((rrset = domain_find_rrset(query->delegation_domain, query->zone, TYPE_NSEC))) {
817 			add_rrset(query, answer, AUTHORITY_SECTION,
818 				  query->delegation_domain, rrset);
819 		}
820 	}
821 }
822 
823 
824 /*
825  * Answer SOA information.
826  */
827 static void
828 answer_soa(struct query *query, answer_type *answer)
829 {
830 	if (query->qclass != CLASS_ANY) {
831 		add_rrset(query, answer,
832 			  AUTHORITY_SECTION,
833 			  query->zone->apex,
834 			  query->zone->soa_nx_rrset);
835 	}
836 }
837 
838 
839 /*
840  * Answer that the domain name exists but there is no RRset with the
841  * requested type.
842  *
843  * DNSSEC: Include the correct NSEC record proving that the type does
844  * not exist.  In the wildcard no data (3.1.3.4) case the wildcard IS
845  * NOT expanded, so the ORIGINAL parameter must point to the original
846  * wildcard entry, not to the generated entry.
847  */
848 static void
849 answer_nodata(struct query *query, answer_type *answer, domain_type *original)
850 {
851 	if (query->cname_count == 0) {
852 		answer_soa(query, answer);
853 	}
854 
855 #ifdef NSEC3
856 	if (query->edns.dnssec_ok && query->zone->nsec3_param) {
857 		nsec3_answer_nodata(query, answer, original);
858 	} else
859 #endif
860 	if (query->edns.dnssec_ok && zone_is_secure(query->zone)) {
861 		domain_type *nsec_domain;
862 		rrset_type *nsec_rrset;
863 
864 		nsec_domain = find_covering_nsec(original, query->zone, &nsec_rrset);
865 		if (nsec_domain) {
866 			add_rrset(query, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
867 		}
868 	}
869 }
870 
871 static void
872 answer_nxdomain(query_type *query, answer_type *answer)
873 {
874 	RCODE_SET(query->packet, RCODE_NXDOMAIN);
875 	answer_soa(query, answer);
876 }
877 
878 
879 /*
880  * Answer domain information (or SOA if we do not have an RRset for
881  * the type specified by the query).
882  */
883 static void
884 answer_domain(struct nsd* nsd, struct query *q, answer_type *answer,
885 	      domain_type *domain, domain_type *original)
886 {
887 	rrset_type *rrset;
888 
889 	if (q->qtype == TYPE_ANY) {
890 		int added = 0;
891 		for (rrset = domain_find_any_rrset(domain, q->zone); rrset; rrset = rrset->next) {
892 			if (rrset->zone == q->zone
893 #ifdef NSEC3
894 				&& rrset_rrtype(rrset) != TYPE_NSEC3
895 #endif
896 			    /*
897 			     * Don't include the RRSIG RRset when
898 			     * DNSSEC is used, because it is added
899 			     * automatically on an per-RRset basis.
900 			     */
901 			    && !(q->edns.dnssec_ok
902 				 && zone_is_secure(q->zone)
903 				 && rrset_rrtype(rrset) == TYPE_RRSIG))
904 			{
905 				add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
906 				++added;
907 			}
908 		}
909 		if (added == 0) {
910 			answer_nodata(q, answer, original);
911 			return;
912 		}
913 #ifdef NSEC3
914 	} else if (q->qtype == TYPE_NSEC3) {
915 		answer_nodata(q, answer, original);
916 		return;
917 #endif
918 	} else if ((rrset = domain_find_rrset(domain, q->zone, q->qtype))) {
919 		add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
920 	} else if ((rrset = domain_find_rrset(domain, q->zone, TYPE_CNAME))) {
921 		int added;
922 
923 		/*
924 		 * If the CNAME is not added it is already in the
925 		 * answer, so we have a CNAME loop.  Don't follow the
926 		 * CNAME target in this case.
927 		 */
928 		added = add_rrset(q, answer, ANSWER_SECTION, domain, rrset);
929 		assert(rrset->rr_count > 0);
930 		if (added) {
931 			/* only process first CNAME record */
932 			domain_type *closest_match = rdata_atom_domain(rrset->rrs[0].rdatas[0]);
933 			domain_type *closest_encloser = closest_match;
934 			zone_type* origzone = q->zone;
935 			++q->cname_count;
936 
937 			while (!closest_encloser->is_existing)
938 				closest_encloser = closest_encloser->parent;
939 
940 			answer_lookup_zone(nsd, q, answer, closest_match->number,
941 					     closest_match == closest_encloser,
942 					     closest_match, closest_encloser,
943 					     domain_dname(closest_match));
944 			q->zone = origzone;
945 		}
946 		return;
947 	} else {
948 		answer_nodata(q, answer, original);
949 		return;
950 	}
951 
952 	if (q->qclass != CLASS_ANY && q->zone->ns_rrset && answer_needs_ns(q)) {
953 		add_rrset(q, answer, OPTIONAL_AUTHORITY_SECTION, q->zone->apex,
954 			  q->zone->ns_rrset);
955 	}
956 }
957 
958 
959 /*
960  * Answer with authoritative data.  If a wildcard is matched the owner
961  * name will be expanded to the domain name specified by
962  * DOMAIN_NUMBER.  DOMAIN_NUMBER 0 (zero) is reserved for the original
963  * query name.
964  *
965  * DNSSEC: Include the necessary NSEC records in case the request
966  * domain name does not exist and/or a wildcard match does not exist.
967  */
968 static void
969 answer_authoritative(struct nsd   *nsd,
970 		     struct query *q,
971 		     answer_type  *answer,
972 		     size_t        domain_number,
973 		     int           exact,
974 		     domain_type  *closest_match,
975 		     domain_type  *closest_encloser,
976 		     const dname_type *qname)
977 {
978 	domain_type *match;
979 	domain_type *original = closest_match;
980 	rrset_type *rrset;
981 
982 #ifdef NSEC3
983 	if(exact && domain_has_only_NSEC3(closest_match, q->zone)) {
984 		exact = 0; /* pretend it does not exist */
985 		if(closest_encloser->parent)
986 			closest_encloser = closest_encloser->parent;
987 	}
988 #endif /* NSEC3 */
989 
990 	if (exact) {
991 		match = closest_match;
992 	} else if ((rrset=domain_find_rrset(closest_encloser, q->zone, TYPE_DNAME))) {
993 		/* process DNAME */
994 		const dname_type* name = qname;
995 		domain_type *dest = rdata_atom_domain(rrset->rrs[0].rdatas[0]);
996 		int added;
997 		assert(rrset->rr_count > 0);
998 		if(domain_number != 0) /* we followed CNAMEs or DNAMEs */
999 			name = domain_dname(closest_match);
1000 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "expanding DNAME for q=%s", dname_to_string(name, NULL)));
1001 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->src is %s",
1002 			domain_to_string(closest_encloser)));
1003 		DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->dest is %s",
1004 			domain_to_string(dest)));
1005 		/* if the DNAME set is not added we have a loop, do not follow */
1006 		added = add_rrset(q, answer, ANSWER_SECTION, closest_encloser, rrset);
1007 		if(added) {
1008 			domain_type* src = closest_encloser;
1009 			const dname_type* newname = dname_replace(q->region, name,
1010 				domain_dname(src), domain_dname(dest));
1011 			size_t newnum = 0;
1012 			zone_type* origzone = q->zone;
1013 			++q->cname_count;
1014 			if(!newname) { /* newname too long */
1015 				RCODE_SET(q->packet, RCODE_YXDOMAIN);
1016 				return;
1017 			}
1018 			DEBUG(DEBUG_QUERY,2, (LOG_INFO, "->result is %s", dname_to_string(newname, NULL)));
1019 			/* follow the DNAME */
1020 			exact = namedb_lookup(nsd->db, newname, &closest_match, &closest_encloser);
1021 			/* synthesize CNAME record */
1022 			newnum = query_synthesize_cname(q, answer, name, newname,
1023 				src, closest_encloser, &closest_match);
1024 			if(!newnum) {
1025 				/* could not synthesize the CNAME. */
1026 				/* return previous CNAMEs to make resolver recurse for us */
1027 				return;
1028 			}
1029 
1030 			while (closest_encloser && !closest_encloser->is_existing)
1031 				closest_encloser = closest_encloser->parent;
1032 			answer_lookup_zone(nsd, q, answer, newnum,
1033 				closest_match == closest_encloser,
1034 				closest_match, closest_encloser, newname);
1035 			q->zone = origzone;
1036 		}
1037 		if(!added)  /* log the error so operator can find looping recursors */
1038 			log_msg(LOG_INFO, "DNAME processing stopped due to loop, qname %s",
1039 				dname_to_string(q->qname, NULL));
1040 		return;
1041 	} else if (domain_wildcard_child(closest_encloser)) {
1042 		/* Generate the domain from the wildcard.  */
1043 		domain_type *wildcard_child = domain_wildcard_child(closest_encloser);
1044 #ifdef RATELIMIT
1045 		q->wildcard_domain = wildcard_child;
1046 #endif
1047 
1048 		match = (domain_type *) region_alloc(q->region,
1049 						     sizeof(domain_type));
1050 		match->rnode = NULL;
1051 		match->dname = wildcard_child->dname;
1052 		match->parent = closest_encloser;
1053 		match->wildcard_child_closest_match = match;
1054 		match->number = domain_number;
1055 		match->rrsets = wildcard_child->rrsets;
1056 		match->is_existing = wildcard_child->is_existing;
1057 #ifdef NSEC3
1058 		match->nsec3 = wildcard_child->nsec3;
1059 		/* copy over these entries:
1060 		match->nsec3_is_exact = wildcard_child->nsec3_is_exact;
1061 		match->nsec3_cover = wildcard_child->nsec3_cover;
1062 		match->nsec3_wcard_child_cover = wildcard_child->nsec3_wcard_child_cover;
1063 		match->nsec3_ds_parent_is_exact = wildcard_child->nsec3_ds_parent_is_exact;
1064 		match->nsec3_ds_parent_cover = wildcard_child->nsec3_ds_parent_cover;
1065 		*/
1066 
1067 		if (q->edns.dnssec_ok && q->zone->nsec3_param) {
1068 			/* Only add nsec3 wildcard data when do bit is set */
1069 			nsec3_answer_wildcard(q, answer, wildcard_child, qname);
1070 		}
1071 #endif
1072 
1073 		/*
1074 		 * Remember the original domain in case a Wildcard No
1075 		 * Data (3.1.3.4) response needs to be generated.  In
1076 		 * this particular case the wildcard IS NOT
1077 		 * expanded.
1078 		 */
1079 		original = wildcard_child;
1080 	} else {
1081 		match = NULL;
1082 	}
1083 
1084 	/* Authorative zone.  */
1085 #ifdef NSEC3
1086 	if (q->edns.dnssec_ok && q->zone->nsec3_param) {
1087 		nsec3_answer_authoritative(&match, q, answer,
1088 			closest_encloser, qname);
1089 	} else
1090 #endif
1091 	if (q->edns.dnssec_ok && zone_is_secure(q->zone)) {
1092 		if (match != closest_encloser) {
1093 			domain_type *nsec_domain;
1094 			rrset_type *nsec_rrset;
1095 
1096 			/*
1097 			 * No match found or generated from wildcard,
1098 			 * include NSEC record.
1099 			 */
1100 			nsec_domain = find_covering_nsec(closest_match, q->zone, &nsec_rrset);
1101 			if (nsec_domain) {
1102 				add_rrset(q, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
1103 			}
1104 		}
1105 		if (!match) {
1106 			domain_type *nsec_domain;
1107 			rrset_type *nsec_rrset;
1108 
1109 			/*
1110 			 * No match and no wildcard.  Include NSEC
1111 			 * proving there is no wildcard.
1112 			 */
1113 			nsec_domain = find_covering_nsec(closest_encloser->wildcard_child_closest_match, q->zone, &nsec_rrset);
1114 			if (nsec_domain) {
1115 				add_rrset(q, answer, AUTHORITY_SECTION, nsec_domain, nsec_rrset);
1116 			}
1117 		}
1118 	}
1119 
1120 #ifdef NSEC3
1121 	if (RCODE(q->packet)!=RCODE_OK) {
1122 		return; /* nsec3 collision failure */
1123 	}
1124 #endif
1125 	if (match) {
1126 		answer_domain(nsd, q, answer, match, original);
1127 	} else {
1128 		answer_nxdomain(q, answer);
1129 	}
1130 }
1131 
1132 /*
1133  * qname may be different after CNAMEs have been followed from query->qname.
1134  */
1135 static void
1136 answer_lookup_zone(struct nsd *nsd, struct query *q, answer_type *answer,
1137 	size_t domain_number, int exact, domain_type *closest_match,
1138 	domain_type *closest_encloser, const dname_type *qname)
1139 {
1140 	q->zone = domain_find_zone(nsd->db, closest_encloser);
1141 	if (!q->zone) {
1142 		/* no zone for this */
1143 		if(q->cname_count == 0)
1144 			RCODE_SET(q->packet, RCODE_REFUSE);
1145 		return;
1146 	}
1147 	if(!q->zone->apex || !q->zone->soa_rrset) {
1148 		/* zone is configured but not loaded */
1149 		if(q->cname_count == 0)
1150 			RCODE_SET(q->packet, RCODE_SERVFAIL);
1151 		return;
1152 	}
1153 
1154 	/*
1155 	 * See RFC 4035 (DNSSEC protocol) section 3.1.4.1 Responding
1156 	 * to Queries for DS RRs.
1157 	 */
1158 	if (exact && q->qtype == TYPE_DS && closest_encloser == q->zone->apex) {
1159 		/*
1160 		 * Type DS query at a zone cut, use the responsible
1161 		 * parent zone to generate the answer if we are
1162 		 * authoritative for the parent zone.
1163 		 */
1164 		zone_type *zone = domain_find_parent_zone(q->zone);
1165 		if (zone)
1166 			q->zone = zone;
1167 	}
1168 
1169 	/* see if the zone has expired (for secondary zones) */
1170 	if(q->zone && q->zone->opts && q->zone->opts->pattern &&
1171 		q->zone->opts->pattern->request_xfr != 0 && !q->zone->is_ok) {
1172 		if(q->cname_count == 0)
1173 			RCODE_SET(q->packet, RCODE_SERVFAIL);
1174 		return;
1175 	}
1176 
1177 	if (exact && q->qtype == TYPE_DS && closest_encloser == q->zone->apex) {
1178 		/*
1179 		 * Type DS query at the zone apex (and the server is
1180 		 * not authoratitive for the parent zone).
1181 		 */
1182 		if (q->qclass == CLASS_ANY) {
1183 			AA_CLR(q->packet);
1184 		} else {
1185 			AA_SET(q->packet);
1186 		}
1187 		answer_nodata(q, answer, closest_encloser);
1188 	} else {
1189 		q->delegation_domain = domain_find_ns_rrsets(
1190 			closest_encloser, q->zone, &q->delegation_rrset);
1191 
1192 		if (!q->delegation_domain
1193 		    || (exact && q->qtype == TYPE_DS && closest_encloser == q->delegation_domain))
1194 		{
1195 			if (q->qclass == CLASS_ANY) {
1196 				AA_CLR(q->packet);
1197 			} else {
1198 				AA_SET(q->packet);
1199 			}
1200 			answer_authoritative(nsd, q, answer, domain_number, exact,
1201 					     closest_match, closest_encloser, qname);
1202 		}
1203 		else {
1204 			answer_delegation(q, answer);
1205 		}
1206 	}
1207 }
1208 
1209 static void
1210 answer_query(struct nsd *nsd, struct query *q)
1211 {
1212 	domain_type *closest_match;
1213 	domain_type *closest_encloser;
1214 	int exact;
1215 	uint16_t offset;
1216 	answer_type answer;
1217 
1218 	answer_init(&answer);
1219 
1220 	exact = namedb_lookup(nsd->db, q->qname, &closest_match, &closest_encloser);
1221 	if (!closest_encloser->is_existing) {
1222 		exact = 0;
1223 		while (closest_encloser != NULL && !closest_encloser->is_existing)
1224 			closest_encloser = closest_encloser->parent;
1225 	}
1226 	if(!closest_encloser) {
1227 		RCODE_SET(q->packet, RCODE_SERVFAIL);
1228 		return;
1229 	}
1230 
1231 	answer_lookup_zone(nsd, q, &answer, 0, exact, closest_match,
1232 		closest_encloser, q->qname);
1233 
1234 	offset = dname_label_offsets(q->qname)[domain_dname(closest_encloser)->label_count - 1] + QHEADERSZ;
1235 	query_add_compression_domain(q, closest_encloser, offset);
1236 	encode_answer(q, &answer);
1237 	query_clear_compression_tables(q);
1238 }
1239 
1240 void
1241 query_prepare_response(query_type *q)
1242 {
1243 	uint16_t flags;
1244 
1245 	/*
1246 	 * Preserve the data up-to the current packet's limit.
1247 	 */
1248 	buffer_set_position(q->packet, buffer_limit(q->packet));
1249 	buffer_set_limit(q->packet, buffer_capacity(q->packet));
1250 
1251 	/*
1252 	 * Reserve space for the EDNS records if required.
1253 	 */
1254 	q->reserved_space = edns_reserved_space(&q->edns);
1255 	q->reserved_space += tsig_reserved_space(&q->tsig);
1256 
1257 	/* Update the flags.  */
1258 	flags = FLAGS(q->packet);
1259 	flags &= 0x0100U;	/* Preserve the RD flag.  */
1260 				/* CD flag must be cleared for auth answers */
1261 	flags |= 0x8000U;	/* Set the QR flag.  */
1262 	FLAGS_SET(q->packet, flags);
1263 }
1264 
1265 /*
1266  * Processes the query.
1267  *
1268  */
1269 query_state_type
1270 query_process(query_type *q, nsd_type *nsd)
1271 {
1272 	/* The query... */
1273 	nsd_rc_type rc;
1274 	query_state_type query_state;
1275 	uint16_t arcount;
1276 
1277 	/* Sanity checks */
1278 	if (buffer_limit(q->packet) < QHEADERSZ) {
1279 		/* packet too small to contain DNS header.
1280 		Now packet investigation macros will work without problems. */
1281 		return QUERY_DISCARDED;
1282 	}
1283 	if (QR(q->packet)) {
1284 		/* Not a query? Drop it on the floor. */
1285 		return QUERY_DISCARDED;
1286 	}
1287 
1288 	if (RCODE(q->packet) != RCODE_OK || !process_query_section(q)) {
1289 		return query_formerr(q);
1290 	}
1291 
1292 	/* Update statistics.  */
1293 	STATUP2(nsd, opcode, q->opcode);
1294 	STATUP2(nsd, qtype, q->qtype);
1295 	STATUP2(nsd, qclass, q->qclass);
1296 
1297 	if (q->opcode != OPCODE_QUERY) {
1298 		if (q->opcode == OPCODE_NOTIFY) {
1299 			return answer_notify(nsd, q);
1300 		} else {
1301 			return query_error(q, NSD_RC_IMPL);
1302 		}
1303 	}
1304 
1305 	/* Dont bother to answer more than one question at once... */
1306 	if (QDCOUNT(q->packet) != 1) {
1307 		FLAGS_SET(q->packet, 0);
1308 		return query_formerr(q);
1309 	}
1310 	/* Ignore settings of flags */
1311 
1312 	/* Dont allow any records in the answer or authority section...
1313 	   except for IXFR queries. */
1314 	if (ANCOUNT(q->packet) != 0 ||
1315 		(q->qtype!=TYPE_IXFR && NSCOUNT(q->packet) != 0)) {
1316 		return query_formerr(q);
1317 	}
1318 	if(q->qtype==TYPE_IXFR && NSCOUNT(q->packet) > 0) {
1319 		int i; /* skip ixfr soa information data here */
1320 		for(i=0; i< NSCOUNT(q->packet); i++)
1321 			if(!packet_skip_rr(q->packet, 0))
1322 				return query_formerr(q);
1323 	}
1324 
1325 	arcount = ARCOUNT(q->packet);
1326 	if (arcount > 0) {
1327 		/* According to draft-ietf-dnsext-rfc2671bis-edns0-10:
1328 		 * "The placement flexibility for the OPT RR does not
1329 		 * override the need for the TSIG or SIG(0) RRs to be
1330 		 * the last in the additional section whenever they are
1331 		 * present."
1332 		 * So we should not have to check for TSIG RR before
1333 		 * OPT RR. Keep the code for backwards compatibility.
1334 		 */
1335 
1336 		/* see if tsig is before edns record */
1337 		if (!tsig_parse_rr(&q->tsig, q->packet))
1338 			return query_formerr(q);
1339 		if(q->tsig.status != TSIG_NOT_PRESENT)
1340 			--arcount;
1341 	}
1342 	/* See if there is an OPT RR. */
1343 	if (arcount > 0) {
1344 		if (edns_parse_record(&q->edns, q->packet))
1345 			--arcount;
1346 	}
1347 	/* See if there is a TSIG RR. */
1348 	if (arcount > 0 && q->tsig.status == TSIG_NOT_PRESENT) {
1349 		/* see if tsig is after the edns record */
1350 		if (!tsig_parse_rr(&q->tsig, q->packet))
1351 			return query_formerr(q);
1352 		if(q->tsig.status != TSIG_NOT_PRESENT)
1353 			--arcount;
1354 	}
1355 	/* If more RRs left in Add. Section, FORMERR. */
1356 	if (arcount > 0) {
1357 		return query_formerr(q);
1358 	}
1359 
1360 	/* Do we have any trailing garbage? */
1361 #ifdef	STRICT_MESSAGE_PARSE
1362 	if (buffer_remaining(q->packet) > 0) {
1363 		/* If we're strict.... */
1364 		return query_formerr(q);
1365 	}
1366 #endif
1367 	/* Remove trailing garbage.  */
1368 	buffer_set_limit(q->packet, buffer_position(q->packet));
1369 
1370 	rc = process_tsig(q);
1371 	if (rc != NSD_RC_OK) {
1372 		return query_error(q, rc);
1373 	}
1374 	rc = process_edns(nsd, q);
1375 	if (rc != NSD_RC_OK) {
1376 		/* We should not return FORMERR, but BADVERS (=16).
1377 		 * BADVERS is created with Ext. RCODE, followed by RCODE.
1378 		 * Ext. RCODE is set to 1, RCODE must be 0 (getting 0x10 = 16).
1379 		 * Thus RCODE = NOERROR = NSD_RC_OK. */
1380 		return query_error(q, NSD_RC_OK);
1381 	}
1382 
1383 	query_prepare_response(q);
1384 
1385 	if (q->qclass != CLASS_IN && q->qclass != CLASS_ANY) {
1386 		if (q->qclass == CLASS_CH) {
1387 			return answer_chaos(nsd, q);
1388 		} else {
1389 			return query_error(q, NSD_RC_REFUSE);
1390 		}
1391 	}
1392 
1393 	query_state = answer_axfr_ixfr(nsd, q);
1394 	if (query_state == QUERY_PROCESSED || query_state == QUERY_IN_AXFR) {
1395 		return query_state;
1396 	}
1397 
1398 	answer_query(nsd, q);
1399 
1400 	return QUERY_PROCESSED;
1401 }
1402 
1403 void
1404 query_add_optional(query_type *q, nsd_type *nsd)
1405 {
1406 	struct edns_data *edns = &nsd->edns_ipv4;
1407 #if defined(INET6)
1408 	if (q->addr.ss_family == AF_INET6) {
1409 		edns = &nsd->edns_ipv6;
1410 	}
1411 #endif
1412 	if (RCODE(q->packet) == RCODE_FORMAT) {
1413 		return;
1414 	}
1415 	switch (q->edns.status) {
1416 	case EDNS_NOT_PRESENT:
1417 		break;
1418 	case EDNS_OK:
1419 		if (q->edns.dnssec_ok)	edns->ok[7] = 0x80;
1420 		else			edns->ok[7] = 0x00;
1421 		buffer_write(q->packet, edns->ok, OPT_LEN);
1422 		if (nsd->nsid_len > 0 && q->edns.nsid == 1 &&
1423 				!query_overflow_nsid(q, nsd->nsid_len)) {
1424 			/* rdata length */
1425 			buffer_write(q->packet, edns->rdata_nsid, OPT_RDATA);
1426 			/* nsid opt header */
1427 			buffer_write(q->packet, edns->nsid, OPT_HDR);
1428 			/* nsid payload */
1429 			buffer_write(q->packet, nsd->nsid, nsd->nsid_len);
1430 		}  else {
1431 			/* fill with NULLs */
1432 			buffer_write(q->packet, edns->rdata_none, OPT_RDATA);
1433 		}
1434 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1435 		STATUP(nsd, edns);
1436 		break;
1437 	case EDNS_ERROR:
1438 		if (q->edns.dnssec_ok)	edns->error[7] = 0x80;
1439 		else			edns->error[7] = 0x00;
1440 		buffer_write(q->packet, edns->error, OPT_LEN);
1441 		buffer_write(q->packet, edns->rdata_none, OPT_RDATA);
1442 		ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1443 		STATUP(nsd, ednserr);
1444 		break;
1445 	}
1446 
1447 	if (q->tsig.status != TSIG_NOT_PRESENT) {
1448 		if (q->tsig.status == TSIG_ERROR ||
1449 			q->tsig.error_code != TSIG_ERROR_NOERROR) {
1450 			tsig_error_reply(&q->tsig);
1451 			tsig_append_rr(&q->tsig, q->packet);
1452 			ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1453 		} else if(q->tsig.status == TSIG_OK &&
1454 			q->tsig.error_code == TSIG_ERROR_NOERROR)
1455 		{
1456 			if(q->tsig_prepare_it)
1457 				tsig_prepare(&q->tsig);
1458 			if(q->tsig_update_it)
1459 				tsig_update(&q->tsig, q->packet, buffer_position(q->packet));
1460 			if(q->tsig_sign_it) {
1461 				tsig_sign(&q->tsig);
1462 				tsig_append_rr(&q->tsig, q->packet);
1463 				ARCOUNT_SET(q->packet, ARCOUNT(q->packet) + 1);
1464 			}
1465 		}
1466 	}
1467 }
1468