1 /*
2  * Copyright (c) 2012-2015, 2017-2021 by Farsight Security, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *    http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <sys/socket.h>
18 #include <arpa/inet.h>
19 
20 #include "libmy/my_byteorder.h"
21 
22 #include "dnstable-private.h"
23 
24 #include "libmy/ip_arith.h"
25 #include "libmy/hex_decode.h"
26 
27 struct dnstable_query {
28 	dnstable_query_type	q_type;
29 	bool			do_rrtype, do_timeout;
30 	bool			do_time_first_before, do_time_first_after;
31 	bool			do_time_last_before, do_time_last_after;
32 	char			*err;
33 	wdns_name_t		name, bailiwick;
34 	uint32_t		rrtype;
35 	bool			aggregated;
36 	size_t			len_rdata, len_rdata2;
37 	uint8_t			*rdata, *rdata2;
38 	struct timespec		timeout;
39 	uint64_t		time_first_before, time_first_after;
40 	uint64_t		time_last_before, time_last_after;
41 	uint64_t		offset;
42 	bool			has_v_type;
43 	uint8_t			v_type;
44 };
45 
46 struct query_iter {
47 	struct dnstable_query	*query;
48 	const struct mtbl_source *source;
49 	struct mtbl_iter	*m_iter, *m_iter2;
50 	ubuf			*key, *key2;
51 };
52 
53 static void
query_set_err(struct dnstable_query * q,const char * err)54 query_set_err(struct dnstable_query *q, const char *err)
55 {
56 	my_free(q->err);
57 	q->err = my_strdup(err);
58 }
59 
60 static dnstable_res
query_load_name(struct dnstable_query * q,wdns_name_t * name,const char * s_name)61 query_load_name(struct dnstable_query *q, wdns_name_t *name, const char *s_name)
62 {
63 	my_free(name->data);
64 	name->len = 0;
65 	if (s_name == NULL)
66 		return (dnstable_res_success);
67 	if (wdns_str_to_name(s_name, name) != wdns_res_success) {
68 		query_set_err(q, "wdns_str_to_name() failed");
69 		return (dnstable_res_failure);
70 	}
71 	wdns_downcase_name(name);
72 	return (dnstable_res_success);
73 }
74 
75 static dnstable_res
query_load_address(struct dnstable_query * q,const char * data,uint8_t ** addr,size_t * len_addr)76 query_load_address(struct dnstable_query *q, const char *data, uint8_t **addr, size_t *len_addr)
77 {
78 	uint8_t buf[16];
79 	my_free(*addr);
80 	if (inet_pton(AF_INET, data, buf)) {
81 		*len_addr = 4;
82 		*addr = my_malloc(4);
83 		memcpy(*addr, buf, 4);
84 		return (dnstable_res_success);
85 	} else if (inet_pton(AF_INET6, data, buf)) {
86 		*len_addr = 16;
87 		*addr = my_malloc(16);
88 		memcpy(*addr, buf, 16);
89 		return (dnstable_res_success);
90 	}
91 	query_set_err(q, "inet_pton() failed");
92 	return (dnstable_res_failure);
93 }
94 
95 struct dnstable_query *
dnstable_query_init(dnstable_query_type q_type)96 dnstable_query_init(dnstable_query_type q_type)
97 {
98 	assert(q_type == DNSTABLE_QUERY_TYPE_RRSET ||
99 	       q_type == DNSTABLE_QUERY_TYPE_RDATA_NAME ||
100 	       q_type == DNSTABLE_QUERY_TYPE_RDATA_IP ||
101 	       q_type == DNSTABLE_QUERY_TYPE_RDATA_RAW ||
102 	       q_type == DNSTABLE_QUERY_TYPE_TIME_RANGE ||
103 	       q_type == DNSTABLE_QUERY_TYPE_VERSION);
104 	struct dnstable_query *q = my_calloc(1, sizeof(*q));
105 	q->q_type = q_type;
106 	q->aggregated = true;
107 	return (q);
108 }
109 
110 void
dnstable_query_destroy(struct dnstable_query ** q)111 dnstable_query_destroy(struct dnstable_query **q)
112 {
113 	if (*q) {
114 		my_free((*q)->rdata);
115 		my_free((*q)->rdata2);
116 		my_free((*q)->name.data);
117 		my_free((*q)->bailiwick.data);
118 		my_free((*q)->err);
119 		my_free(*q);
120 	}
121 }
122 
123 const char *
dnstable_query_get_error(struct dnstable_query * q)124 dnstable_query_get_error(struct dnstable_query *q) {
125 	if (q->err == NULL)
126 		q->err = my_strdup("unknown error");
127 	assert(q->err != NULL);
128 	return (q->err);
129 }
130 
131 dnstable_res
dnstable_query_set_bailiwick(struct dnstable_query * q,const char * s_name)132 dnstable_query_set_bailiwick(struct dnstable_query *q, const char *s_name)
133 {
134 	if (q->q_type != DNSTABLE_QUERY_TYPE_RRSET) {
135 		query_set_err(q, "bailiwick filtering not supported");
136 		return (dnstable_res_failure);
137 	}
138 	return query_load_name(q, &q->bailiwick, s_name);
139 }
140 
141 static dnstable_res
query_set_data_rrset_owner(struct dnstable_query * q,const char * s_name)142 query_set_data_rrset_owner(struct dnstable_query *q, const char *s_name)
143 {
144 	return query_load_name(q, &q->name, s_name);
145 }
146 
147 static dnstable_res
query_set_data_rdata_name(struct dnstable_query * q,const char * s_name)148 query_set_data_rdata_name(struct dnstable_query *q, const char *s_name)
149 {
150 	return query_load_name(q, &q->name, s_name);
151 }
152 
153 static dnstable_res
query_set_data_rdata_raw(struct dnstable_query * q,const char * data)154 query_set_data_rdata_raw(struct dnstable_query *q, const char *data)
155 {
156 	my_free(q->rdata);
157 	if (data == NULL)
158 		return (dnstable_res_success);
159 	return hex_decode(data, &q->rdata, &q->len_rdata);
160 }
161 
162 static dnstable_res
query_set_data_rdata_ip_range(struct dnstable_query * q,const char * data)163 query_set_data_rdata_ip_range(struct dnstable_query *q, const char *data)
164 {
165 	dnstable_res res = dnstable_res_failure;
166 	char *s = my_strdup(data);
167 	char *addr1, *addr2;
168 	char *saveptr = NULL;
169 
170 	if ((addr1 = strtok_r(s, "-", &saveptr)) == NULL) goto out;
171 	if ((addr2 = strtok_r(NULL, "-", &saveptr)) == NULL) goto out;
172 	if (strtok_r(NULL, "-", &saveptr) != NULL) goto out;
173 
174 	if (!query_load_address(q, addr1, &q->rdata, &q->len_rdata)) goto out;
175 	if (!query_load_address(q, addr2, &q->rdata2, &q->len_rdata2)) goto out;
176 	if (q->len_rdata != q->len_rdata2) {
177 		query_set_err(q, "address family mismatch in IP range");
178 		goto out;
179 	}
180 	q->do_rrtype = true;
181 	if (q->len_rdata == 4) {
182 		q->rrtype = WDNS_TYPE_A;
183 	} else if (q->len_rdata == 16) {
184 		q->rrtype = WDNS_TYPE_AAAA;
185 	}
186 	res = dnstable_res_success;
187 out:
188 	my_free(s);
189 	return (res);
190 }
191 
192 static dnstable_res
query_set_data_rdata_ip_prefix(struct dnstable_query * q,const char * data)193 query_set_data_rdata_ip_prefix(struct dnstable_query *q, const char *data)
194 {
195 	dnstable_res res = dnstable_res_failure;
196 	char *s = NULL;
197 	uint8_t *ip = NULL;
198 	size_t len_ip;
199 	char *address, *prefix_length;
200 	char *saveptr, *endptr;
201 	long plen;
202 
203 	s = my_strdup(data);
204 	assert(s != NULL);
205 	if ((address = strtok_r(s, "/", &saveptr)) == NULL) goto out;
206 	if ((prefix_length = strtok_r(NULL, "/", &saveptr)) == NULL) goto out;
207 	if (strtok_r(NULL, "/", &saveptr) != NULL) goto out;
208 
209 	if (!query_load_address(q, address, &ip, &len_ip)) goto out;
210 
211 	errno = 0;
212 	plen = strtol(prefix_length, &endptr, 10);
213 	if (errno != 0 || *endptr != '\0') goto out;
214 
215 	if ((len_ip == 4 && plen > 32) ||
216 	    (len_ip == 16 && plen > 128))
217 	{
218 		res = dnstable_res_failure;
219 		goto out;
220 	}
221 
222 	if (len_ip == 4) {
223 		q->do_rrtype = true;
224 		q->rrtype = WDNS_TYPE_A;
225 
226 		q->len_rdata = len_ip;
227 		q->len_rdata2 = len_ip;
228 		my_free(q->rdata);
229 		my_free(q->rdata2);
230 		q->rdata = my_malloc(len_ip);
231 		q->rdata2 = my_malloc(len_ip);
232 		ip4_lower(ip, plen, q->rdata);
233 		ip4_upper(ip, plen, q->rdata2);
234 		res = dnstable_res_success;
235 	} else if (len_ip == 16) {
236 		q->do_rrtype = true;
237 		q->rrtype = WDNS_TYPE_AAAA;
238 
239 		q->len_rdata = len_ip;
240 		q->len_rdata2 = len_ip;
241 		my_free(q->rdata);
242 		my_free(q->rdata2);
243 		q->rdata = my_malloc(len_ip);
244 		q->rdata2 = my_malloc(len_ip);
245 		ip6_lower(ip, plen, q->rdata);
246 		ip6_upper(ip, plen, q->rdata2);
247 		res = dnstable_res_success;
248 	}
249 
250 out:
251 	if (res != dnstable_res_success)
252 		query_set_err(q, "unable to parse IP prefix");
253 	my_free(ip);
254 	my_free(s);
255 	return (res);
256 }
257 
258 static dnstable_res
query_set_data_rdata_ip_address(struct dnstable_query * q,const char * data)259 query_set_data_rdata_ip_address(struct dnstable_query *q, const char *data)
260 {
261 	my_free(q->rdata2);
262 	if (!query_load_address(q, data, &q->rdata, &q->len_rdata))
263 		return (dnstable_res_failure);
264 	q->do_rrtype = true;
265 	if (q->len_rdata == 4)
266 		q->rrtype = WDNS_TYPE_A;
267 	else if (q->len_rdata == 16)
268 		q->rrtype = WDNS_TYPE_AAAA;
269 	return (dnstable_res_success);
270 }
271 
272 static dnstable_res
query_set_data_rdata_ip(struct dnstable_query * q,const char * data)273 query_set_data_rdata_ip(struct dnstable_query *q, const char *data)
274 {
275 	if (data == NULL) {
276 		my_free(q->rdata);
277 		my_free(q->rdata2);
278 		return (dnstable_res_success);
279 	}
280 
281 	if (strchr(data, '-')) {
282 		return query_set_data_rdata_ip_range(q, data);
283 	} else if (strchr(data, '/')) {
284 		return query_set_data_rdata_ip_prefix(q, data);
285 	} else {
286 		return query_set_data_rdata_ip_address(q, data);
287 	}
288 }
289 
290 static dnstable_res
query_set_version_type(struct dnstable_query * q,const char * data)291 query_set_version_type(struct dnstable_query *q, const char *data)
292 {
293 	dnstable_res res;
294 	dnstable_entry_type type;
295 
296 	if (data == NULL)
297 		return (dnstable_res_success);
298 
299 	res = dnstable_entry_type_from_string(&type, data);
300 	if (res != dnstable_res_success)
301 		return res;
302 
303 	switch(type) {
304 	case ENTRY_TYPE_RRSET:
305 	case ENTRY_TYPE_RRSET_NAME_FWD:
306 	case ENTRY_TYPE_RDATA:
307 	case ENTRY_TYPE_RDATA_NAME_REV:
308 		q->has_v_type = true;
309 		q->v_type = (uint8_t)type;
310 		return (dnstable_res_success);
311 	default:
312 		return (dnstable_res_failure);
313 	}
314 }
315 
316 dnstable_res
dnstable_query_set_data(struct dnstable_query * q,const char * data)317 dnstable_query_set_data(struct dnstable_query *q, const char *data)
318 {
319 	if (q->q_type == DNSTABLE_QUERY_TYPE_RRSET) {
320 		return query_set_data_rrset_owner(q, data);
321 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_NAME) {
322 		return query_set_data_rdata_name(q, data);
323 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_IP) {
324 		return query_set_data_rdata_ip(q, data);
325 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_RAW) {
326 		return query_set_data_rdata_raw(q, data);
327 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_VERSION) {
328 		return query_set_version_type(q, data);
329 	} else {
330 		return (dnstable_res_failure);
331 	}
332 }
333 
334 dnstable_res
dnstable_query_set_rrtype(struct dnstable_query * q,const char * s_rrtype)335 dnstable_query_set_rrtype(struct dnstable_query *q, const char *s_rrtype)
336 {
337 	uint16_t rrtype;
338 
339 	if (s_rrtype == NULL) {
340 		q->do_rrtype = false;
341 		return (dnstable_res_success);
342 	}
343 
344 	if (strcasecmp(s_rrtype, "ANY") == 0 ||
345 	    strcasecmp(s_rrtype, "TYPE255") == 0 || /* ANY == TYPE255 */
346 	    strcasecmp(s_rrtype, "ANY-DNSSEC") == 0)
347 	{
348 		q->do_rrtype = false;
349 		return (dnstable_res_success);
350 	}
351 
352 	rrtype = wdns_str_to_rrtype(s_rrtype);
353 	if (rrtype == 0) {
354 		query_set_err(q, "unknown rrtype mnemonic");
355 		return (dnstable_res_failure);
356 	}
357 	q->rrtype = rrtype;
358 	q->do_rrtype = true;
359 	return (dnstable_res_success);
360 }
361 
362 dnstable_res
dnstable_query_set_offset(struct dnstable_query * q,uint64_t offset)363 dnstable_query_set_offset(struct dnstable_query *q, uint64_t offset)
364 {
365 	q->offset = offset;
366 	return (dnstable_res_success);
367 }
368 
369 dnstable_res
dnstable_query_set_aggregated(struct dnstable_query * q,bool aggregated)370 dnstable_query_set_aggregated(struct dnstable_query *q, bool aggregated)
371 {
372 	q->aggregated = aggregated;
373 	return (dnstable_res_success);
374 }
375 
dnstable_query_is_aggregated(const struct dnstable_query * q)376 bool dnstable_query_is_aggregated(const struct dnstable_query *q)
377 {
378 	return q->aggregated;
379 }
380 
381 dnstable_res
dnstable_query_set_timeout(struct dnstable_query * q,const struct timespec * timeout)382 dnstable_query_set_timeout(struct dnstable_query *q, const struct timespec *timeout)
383 {
384 	if (timeout == NULL) {
385 		q->do_timeout = false;
386 		return (dnstable_res_success);
387 	}
388 
389 	q->do_timeout = true;
390 	q->timeout = *timeout;
391 
392 	return (dnstable_res_success);
393 }
394 
395 #define set_filter_parameter(q, p_name, param, len_param) \
396 do { \
397 	if (param != NULL) { \
398 		(q)->do_##p_name = true; \
399 		memcpy(&(q)->p_name, param, len_param); \
400 	} else { \
401 		(q)->do_##p_name = false; \
402 	} \
403 } while (0)
404 
405 dnstable_res
dnstable_query_set_filter_parameter(struct dnstable_query * q,dnstable_filter_parameter_type p_type,const void * param,const size_t len_param)406 dnstable_query_set_filter_parameter(struct dnstable_query *q,
407 				    dnstable_filter_parameter_type p_type,
408 				    const void *param,
409 				    const size_t len_param)
410 {
411 	if (len_param != sizeof(uint64_t))
412 		return (dnstable_res_failure);
413 
414 	switch (p_type) {
415 	case DNSTABLE_FILTER_PARAMETER_TIME_FIRST_BEFORE:
416 		set_filter_parameter(q, time_first_before, param, len_param);
417 		return (dnstable_res_success);
418 	case DNSTABLE_FILTER_PARAMETER_TIME_FIRST_AFTER:
419 		set_filter_parameter(q, time_first_after, param, len_param);
420 		return (dnstable_res_success);
421 	case DNSTABLE_FILTER_PARAMETER_TIME_LAST_BEFORE:
422 		set_filter_parameter(q, time_last_before, param, len_param);
423 		return (dnstable_res_success);
424 	case DNSTABLE_FILTER_PARAMETER_TIME_LAST_AFTER:
425 		set_filter_parameter(q, time_last_after, param, len_param);
426 		return (dnstable_res_success);
427 	default:
428 		return (dnstable_res_failure);
429 	}
430 }
431 
432 dnstable_res
dnstable_query_filter(struct dnstable_query * q,struct dnstable_entry * e,bool * pass)433 dnstable_query_filter(struct dnstable_query *q, struct dnstable_entry *e, bool *pass)
434 {
435 	dnstable_res res;
436 
437 	if (q->do_rrtype) {
438 		uint16_t rrtype;
439 		res = dnstable_entry_get_rrtype(e, &rrtype);
440 		if (res != dnstable_res_success)
441 			return (res);
442 		if (rrtype != q->rrtype)
443 			goto fail;
444 	}
445 
446 	if (q->do_time_first_before || q->do_time_first_after) {
447 		uint64_t time_first;
448 		res = dnstable_entry_get_time_first(e, &time_first);
449 		if (res != dnstable_res_success)
450 			return (res);
451 
452 		if (q->do_time_first_before && q->time_first_before < time_first)
453 			goto fail;
454 		if (q->do_time_first_after && q->time_first_after > time_first)
455 			goto fail;
456 	}
457 
458 	if (q->do_time_last_before || q->do_time_last_after) {
459 		uint64_t time_last;
460 		res = dnstable_entry_get_time_last(e, &time_last);
461 		if (res != dnstable_res_success)
462 			return (res);
463 
464 		if (q->do_time_last_before && q->time_last_before < time_last)
465 			goto fail;
466 		if (q->do_time_last_after && q->time_last_after > time_last)
467 			goto fail;
468 	}
469 
470 	if (q->q_type == DNSTABLE_QUERY_TYPE_RRSET && q->bailiwick.data != NULL) {
471 		const uint8_t *bailiwick;
472 		size_t len_bailiwick;
473 		res = dnstable_entry_get_bailiwick(e, &bailiwick, &len_bailiwick);
474 		if (res != dnstable_res_success)
475 			return (res);
476 		if (q->bailiwick.len != len_bailiwick)
477 			goto fail;
478 		if (memcmp(q->bailiwick.data, bailiwick, len_bailiwick) != 0)
479 			goto fail;
480 	}
481 
482 	*pass = true;
483 	return (dnstable_res_success);
484 fail:
485 	*pass = false;
486 	return (dnstable_res_success);
487 }
488 
489 static void
query_iter_free(void * clos)490 query_iter_free(void *clos)
491 {
492 	struct query_iter *it = (struct query_iter *) clos;
493 	mtbl_iter_destroy(&it->m_iter);
494 	mtbl_iter_destroy(&it->m_iter2);
495 	ubuf_destroy(&it->key);
496 	ubuf_destroy(&it->key2);
497 	my_free(it);
498 }
499 
500 static void
add_rrtype_to_key(ubuf * key,uint32_t rrtype)501 add_rrtype_to_key(ubuf *key, uint32_t rrtype)
502 {
503 	assert(rrtype != WDNS_TYPE_ANY);
504 	ubuf_reserve(key, ubuf_size(key) + mtbl_varint_length(rrtype));
505 	ubuf_advance(key, mtbl_varint_encode32(ubuf_ptr(key), rrtype));
506 }
507 
508 static dnstable_res
increment_key(ubuf * key,size_t pos)509 increment_key(ubuf *key, size_t pos)
510 {
511 	assert(pos < ubuf_size(key));
512 	for (uint8_t *ptr = ubuf_data(key) + pos; ptr >= ubuf_data(key); ptr--) {
513 		(*ptr)++;
514 		if (*ptr != 0) {
515 			return (dnstable_res_success);
516 		}
517 	}
518 	return (dnstable_res_failure);
519 }
520 
521 static dnstable_res
query_iter_next(void * clos,struct dnstable_entry ** ent)522 query_iter_next(void *clos, struct dnstable_entry **ent)
523 {
524 	struct query_iter *it = (struct query_iter *) clos;
525 	struct timespec expiry = {0};
526 
527 	if (it->query->do_timeout) {
528 		my_gettime(DNSTABLE__CLOCK_MONOTONIC, &expiry);
529 		my_timespec_add(&it->query->timeout, &expiry);
530 	}
531 
532 	for (;;) {
533 		bool pass = false;
534 		dnstable_res res;
535 		const uint8_t *key, *val;
536 		size_t len_key, len_val;
537 		struct timespec now = {0};
538 
539 		if (it->query->do_timeout) {
540 			my_gettime(DNSTABLE__CLOCK_MONOTONIC, &now);
541 			if (my_timespec_cmp(&now, &expiry) >= 0)
542 				return (dnstable_res_timeout);
543 		}
544 		if (mtbl_iter_next(it->m_iter, &key, &len_key, &val, &len_val) != mtbl_res_success)
545 			return (dnstable_res_failure);
546 
547 		*ent = dnstable_entry_decode(key, len_key, val, len_val);
548 		if (*ent == NULL)
549 			continue;
550 
551 		res = dnstable_query_filter(it->query, *ent, &pass);
552 		assert(res == dnstable_res_success);
553 		if (pass) {
554 			/* offset (e.g. skip) initial rows */
555 			if (it->query->offset > 0 && it->query->offset-- > 0)
556 			{
557 				dnstable_entry_destroy(ent);
558 				continue;
559 			}
560 
561 			return (dnstable_res_success);
562 		} else {
563 			dnstable_entry_destroy(ent);
564 			continue;
565 		}
566 	}
567 	return (dnstable_res_failure);
568 }
569 
570 static dnstable_res
query_iter_next_ip(void * clos,struct dnstable_entry ** ent)571 query_iter_next_ip(void *clos, struct dnstable_entry **ent)
572 {
573 	struct query_iter *it = (struct query_iter *) clos;
574 	struct timespec expiry = {0};
575 
576 	if (it->query->do_timeout) {
577 		my_gettime(DNSTABLE__CLOCK_MONOTONIC, &expiry);
578 		my_timespec_add(&(it->query->timeout), &expiry);
579 	}
580 
581 	for (;;) {
582 		bool pass = false;
583 		dnstable_res res;
584 		const uint8_t *key, *val;
585 		size_t len_key, len_val;
586 
587 		if (it->query->do_timeout) {
588 			struct timespec now;
589 			my_gettime(DNSTABLE__CLOCK_MONOTONIC, &now);
590 			if (my_timespec_cmp(&now, &expiry) >= 0)
591 				return (dnstable_res_timeout);
592 		}
593 
594 		if (mtbl_iter_next(it->m_iter, &key, &len_key, &val, &len_val) != mtbl_res_success)
595 			return (dnstable_res_failure);
596 
597 		*ent = dnstable_entry_decode(key, len_key, val, len_val);
598 		if (*ent ==  NULL)
599 			continue;
600 
601 		/*
602 		 * it->key2 != NULL implies an IP prefix/range search, for which
603 		 * we can perform a special optimization to skip past irrelevant
604 		 * entries.
605 		 */
606 		if (it->query->do_rrtype && it->key2 != NULL) {
607 			/* Get the rrtype of the decoded entry. */
608 			uint16_t rrtype;
609 			res = dnstable_entry_get_rrtype(*ent, &rrtype);
610 			if (res != dnstable_res_success) {
611 				dnstable_entry_destroy(ent);
612 				return (res);
613 			}
614 
615 			if (rrtype != it->query->rrtype) {
616 				/*
617 				 * Destroy the current entry. It will not be
618 				 * processed since it's the wrong rrtype.
619 				 */
620 				dnstable_entry_destroy(ent);
621 
622 				/*
623 				 * Create a new start key with the prefix of the
624 				 * current entry's key, plus the target rrtype.
625 				 * This ends up being an IP address derived from
626 				 * the first 4 or 16 bytes of the current key's
627 				 * rdata, sandwiched between the entry type byte
628 				 * and the rrtype.
629 				 *
630 				 * This is helpful when the query rrtype is AAAA
631 				 * (28), which comes numerically after many
632 				 * common rrtypes.
633 				 */
634 				ubuf *new_key = ubuf_init(ubuf_size(it->key));
635 				size_t rrtype_len = mtbl_varint_length(it->query->rrtype);
636 				size_t key_prefix_len = ubuf_size(it->key) - rrtype_len;
637 				if (key_prefix_len <= len_key) {
638 					ubuf_append(new_key, key, key_prefix_len);
639 				} else {
640 					/* Zero fill short keys. */
641 					ubuf_reserve(new_key, key_prefix_len);
642 					ubuf_append(new_key, key, len_key);
643 					ubuf_advance(new_key, key_prefix_len - len_key);
644 					memset(ubuf_data(new_key) + len_key, 0,
645 					       key_prefix_len - len_key);
646 				}
647 				add_rrtype_to_key(new_key, it->query->rrtype);
648 
649 				/*
650 				 * Check if the key that we just generated sorts
651 				 * prior to the current entry's key. If so, it's
652 				 * OK to skip ahead to the next IP address,
653 				 * because we must have *already* consumed any
654 				 * entries between the key we just generated up
655 				 * to the current entry's key.
656 				 *
657 				 * This check is very likely to succeed for IPv4
658 				 * addresses, since rrtype A (1) is the lowest
659 				 * rrtype value in use, but less likely to
660 				 * succeed for IPv6 addresses since rrtype AAAA
661 				 * (28) sorts after many common rrtypes.
662 				 */
663 				if (bytes_compare(ubuf_data(new_key), ubuf_size(new_key),
664 						  key, len_key) <= 0)
665 				{
666 					/*
667 					 * Increment the IP address in the
668 					 * middle of our key by one. This
669 					 * correctly handles octet overflow,
670 					 * e.g. 10.0.255.255 -> 10.1.0.0.
671 					 *
672 					 * This potentially eliminates a large
673 					 * number of irrelevant entries, which
674 					 * we would otherwise have to retrieve
675 					 * and filter out.
676 					 */
677 					res = increment_key(new_key, key_prefix_len - 1);
678 
679 					/*
680 					 * If increment_key() failed, then we
681 					 * were already at the all-ones A/AAAA
682 					 * address. Entries up to and including
683 					 * that address have already been
684 					 * consumed, so stop iterating now.
685 					 */
686 					if (res != dnstable_res_success) {
687 						ubuf_destroy(&new_key);
688 						mtbl_iter_destroy(&it->m_iter);
689 						return (dnstable_res_failure);
690 					}
691 				}
692 
693 				/*
694 				 * Safety check: we should have generated a key
695 				 * containing embedded data exactly as long as
696 				 * an IP address, and thus the key should be
697 				 * exactly as long as the original search key.
698 				 */
699 				assert(ubuf_size(new_key) == ubuf_size(it->key));
700 
701 				/*
702 				 * Seek to the newly generated key.
703 				 */
704 				if (mtbl_iter_seek(it->m_iter, ubuf_data(new_key), ubuf_size(new_key)) != mtbl_res_success) {
705 					ubuf_destroy(&new_key);
706 					return (dnstable_res_failure);
707 				}
708 				ubuf_destroy(&new_key);
709 
710 				/*
711 				 * Restart processing starting from the new key.
712 				 */
713 				continue;
714 			}
715 		}
716 
717 		res = dnstable_query_filter(it->query, *ent, &pass);
718 		assert(res == dnstable_res_success);
719 		if (pass) {
720 			/* offset (e.g. skip) initial rows */
721 			if (it->query->offset > 0 && it->query->offset-- > 0)
722 			{
723 				dnstable_entry_destroy(ent);
724 				continue;
725 			}
726 
727 			return (dnstable_res_success);
728 		} else {
729 			dnstable_entry_destroy(ent);
730 			continue;
731 		}
732 	}
733 	return (dnstable_res_failure);
734 }
735 
736 /* this assumes it is called on an entry type with possible new rrtype indexes */
737 static dnstable_res
query_iter_next_name_indirect(void * clos,struct dnstable_entry ** ent,uint8_t type_byte)738 query_iter_next_name_indirect(void *clos, struct dnstable_entry **ent, uint8_t type_byte)
739 {
740 	struct query_iter *it = (struct query_iter *) clos;
741 	const uint8_t *key, *val;
742 	size_t len_key, len_val;
743 	bool pass = false;
744 	dnstable_res res;
745 	struct timespec expiry = {0};
746 
747 	if (it->query->do_timeout) {
748 		my_gettime(DNSTABLE__CLOCK_MONOTONIC, &expiry);
749 		my_timespec_add(&(it->query->timeout), &expiry);
750 	}
751 
752 	for (;;) {
753 		struct timespec now = {0};
754 
755 		if (it->query->do_timeout) {
756 			my_gettime(DNSTABLE__CLOCK_MONOTONIC, &now);
757 			if (my_timespec_cmp(&now, &expiry) >= 0)
758 				return (dnstable_res_timeout);
759 		}
760 
761 		if (it->m_iter == NULL) {
762 			uint16_t wanted_rrtype = it->query->rrtype;
763 
764 			if (mtbl_iter_next(it->m_iter2,
765 					   &key, &len_key,
766 					   &val, &len_val) != mtbl_res_success)
767 			{
768 				return (dnstable_res_failure);
769 			}
770 
771 			/* use the new rrtype indexes */
772 			if (it->query->do_rrtype && !rrtype_test(type_byte, wanted_rrtype, val, len_val))
773 				continue;
774 
775 			ubuf_clip(it->key, 0);
776 			ubuf_reserve(it->key, len_key + mtbl_varint_length(wanted_rrtype));
777 			ubuf_add(it->key, type_byte);
778 			if (wdns_reverse_name(key + 1, len_key - 1, ubuf_ptr(it->key))
779 			    != wdns_res_success)
780 				return (dnstable_res_failure);
781 			ubuf_advance(it->key, len_key - 1);
782 			if (it->query->do_rrtype &&
783 				(type_byte == ENTRY_TYPE_RRSET))
784 				add_rrtype_to_key(it->key, wanted_rrtype);
785 			else if (it->query->do_rrtype) {
786 				switch(wanted_rrtype) {
787 				case WDNS_TYPE_NS:
788 				case WDNS_TYPE_CNAME:
789 				case WDNS_TYPE_DNAME:
790 				case WDNS_TYPE_PTR:
791 				case WDNS_TYPE_MX:
792 				case WDNS_TYPE_SRV:
793 					add_rrtype_to_key(it->key, wanted_rrtype);
794 				}
795 			}
796 			it->m_iter = mtbl_source_get_prefix(it->source,
797 							    ubuf_data(it->key),
798 							    ubuf_size(it->key));
799 			if (it->m_iter == NULL)
800 				continue;
801 		}
802 		assert(it->m_iter != NULL);
803 		if (mtbl_iter_next(it->m_iter,
804 				   &key, &len_key,
805 				   &val, &len_val) != mtbl_res_success)
806 		{
807 			mtbl_iter_destroy(&it->m_iter);
808 			continue;
809 		}
810 
811 		*ent = dnstable_entry_decode(key, len_key, val, len_val);
812 		if (*ent == NULL)
813 			continue;
814 
815 		res = dnstable_query_filter(it->query, *ent, &pass);
816 		assert(res == dnstable_res_success);
817 		if (pass) {
818 			/* offset (e.g. skip) initial rows */
819 			if (it->query->offset > 0 && it->query->offset-- > 0)
820 			{
821 				dnstable_entry_destroy(ent);
822 				continue;
823 			}
824 
825 			return (dnstable_res_success);
826 		} else {
827 			dnstable_entry_destroy(ent);
828 			continue;
829 		}
830 	}
831 	return (dnstable_res_failure);
832 }
833 
834 static dnstable_res
query_iter_next_rrset_name_fwd(void * clos,struct dnstable_entry ** ent)835 query_iter_next_rrset_name_fwd(void *clos, struct dnstable_entry **ent)
836 {
837 	return query_iter_next_name_indirect(clos, ent, ENTRY_TYPE_RRSET);
838 }
839 
840 static dnstable_res
query_iter_next_rdata_name_rev(void * clos,struct dnstable_entry ** ent)841 query_iter_next_rdata_name_rev(void *clos, struct dnstable_entry **ent)
842 {
843 	return query_iter_next_name_indirect(clos, ent, ENTRY_TYPE_RDATA);
844 }
845 
846 static struct dnstable_iter *
query_init_rrset_right_wildcard(struct query_iter * it)847 query_init_rrset_right_wildcard(struct query_iter *it)
848 {
849 	/* key: type byte */
850 	ubuf_add(it->key, ENTRY_TYPE_RRSET_NAME_FWD);
851 
852 	/* key: rrset owner name, less trailing "\x01\x2a\x00" */
853 	ubuf_append(it->key, it->query->name.data, it->query->name.len - 3);
854 
855 	it->m_iter2 = mtbl_source_get_prefix(it->source,
856 					     ubuf_data(it->key),
857 					     ubuf_size(it->key));
858 
859 	return dnstable_iter_init(query_iter_next_rrset_name_fwd, query_iter_free, it);
860 }
861 
862 static struct dnstable_iter *
query_init_rrset_left_wildcard(struct query_iter * it)863 query_init_rrset_left_wildcard(struct query_iter *it)
864 {
865 	uint8_t name[WDNS_MAXLEN_NAME];
866 
867 	/* key: type byte */
868 	ubuf_add(it->key, ENTRY_TYPE_RRSET);
869 
870 	/* key: rrset owner name (label-reversed),
871 	 * less leading "\x01\x2a" and trailing "\x00" */
872 	size_t len = it->query->name.len - 2;
873 	if (wdns_reverse_name(it->query->name.data + 2, len, name) != wdns_res_success)
874 		return (NULL);
875 	ubuf_append(it->key, name, len - 1);
876 
877 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
878 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
879 }
880 
881 static inline bool
is_right_wildcard(wdns_name_t * name)882 is_right_wildcard(wdns_name_t *name)
883 {
884 	if (name->len >= 3 &&
885 	    name->data[name->len - 3] == '\x01' &&
886 	    name->data[name->len - 2] == '*')
887 	{
888 		return (true);
889 	}
890 	return (false);
891 }
892 
893 static inline bool
is_left_wildcard(wdns_name_t * name)894 is_left_wildcard(wdns_name_t *name)
895 {
896 	if (name->len >= 3 &&
897 	    name->data[0] == '\x01' &&
898 	    name->data[1] == '*')
899 	{
900 		return (true);
901 	}
902 	return (false);
903 }
904 
905 static struct dnstable_iter *
query_init_rrset(struct query_iter * it)906 query_init_rrset(struct query_iter *it)
907 {
908 	uint8_t name[WDNS_MAXLEN_NAME];
909 	it->key = ubuf_init(64);
910 	if (is_left_wildcard(&it->query->name))
911 		return query_init_rrset_left_wildcard(it);
912 	if (is_right_wildcard(&it->query->name))
913 		return query_init_rrset_right_wildcard(it);
914 
915 	/* key: type byte */
916 	ubuf_add(it->key, ENTRY_TYPE_RRSET);
917 
918 	/* key: rrset owner name (label-reversed) */
919 	if (wdns_reverse_name(it->query->name.data, it->query->name.len, name)
920 	    != wdns_res_success)
921 	{
922 		ubuf_destroy(&it->key);
923 		return (NULL);
924 	}
925 	ubuf_append(it->key, name, it->query->name.len);
926 
927 	if (it->query->do_rrtype) {
928 		/* key: rrtype */
929 		add_rrtype_to_key(it->key, it->query->rrtype);
930 
931 		if (it->query->bailiwick.data != NULL) {
932 			/* key: bailiwick name (label-reversed) */
933 			if (wdns_reverse_name(it->query->bailiwick.data,
934 					      it->query->bailiwick.len,
935 					      name)
936 			    != wdns_res_success)
937 			{
938 				ubuf_destroy(&it->key);
939 				return (NULL);
940 			}
941 			ubuf_append(it->key, name, it->query->bailiwick.len);
942 		}
943 	}
944 
945 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
946 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
947 }
948 
949 static struct dnstable_iter *
query_init_rdata_right_wildcard(struct query_iter * it)950 query_init_rdata_right_wildcard(struct query_iter *it)
951 {
952 	/* key: type byte */
953 	ubuf_add(it->key, ENTRY_TYPE_RDATA);
954 
955 	/* key: rdata name, less trailing "\x01\x2a\x00" */
956 	ubuf_append(it->key, it->query->name.data, it->query->name.len - 3);
957 
958 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
959 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
960 }
961 
962 static struct dnstable_iter *
query_init_rdata_left_wildcard(struct query_iter * it)963 query_init_rdata_left_wildcard(struct query_iter *it)
964 {
965 	uint8_t name[WDNS_MAXLEN_NAME];
966 
967 	/* key: type byte */
968 	ubuf_add(it->key, ENTRY_TYPE_RDATA_NAME_REV);
969 
970 	/* key: rdata name (label-reversed), less leading "\x01\x2a" and trailing "\x00" */
971 	size_t len = it->query->name.len - 2;
972 	if (wdns_reverse_name(it->query->name.data + 2, len, name) != wdns_res_success)
973 		return (NULL);
974 	ubuf_append(it->key, name, len - 1);
975 
976 	it->m_iter2 = mtbl_source_get_prefix(it->source,
977 					     ubuf_data(it->key),
978 					     ubuf_size(it->key));
979 
980 	return dnstable_iter_init(query_iter_next_rdata_name_rev, query_iter_free, it);
981 }
982 
983 static struct dnstable_iter *
query_init_rdata_name(struct query_iter * it)984 query_init_rdata_name(struct query_iter *it)
985 {
986 	it->key = ubuf_init(64);
987 
988 	if (is_right_wildcard(&it->query->name))
989 		return query_init_rdata_right_wildcard(it);
990 	if (is_left_wildcard(&it->query->name))
991 		return query_init_rdata_left_wildcard(it);
992 
993 	/* key: type byte */
994 	ubuf_add(it->key, ENTRY_TYPE_RDATA);
995 
996 	/* key: rdata name */
997 	ubuf_append(it->key, it->query->name.data, it->query->name.len);
998 
999 	/* key: rrtype */
1000 	if (it->query->do_rrtype) {
1001 		switch(it->query->rrtype) {
1002 		case WDNS_TYPE_NS:
1003 		case WDNS_TYPE_CNAME:
1004 		case WDNS_TYPE_DNAME:
1005 		case WDNS_TYPE_PTR:
1006 		case WDNS_TYPE_MX:
1007 		case WDNS_TYPE_SRV:
1008 			add_rrtype_to_key(it->key, it->query->rrtype);
1009 		}
1010 	}
1011 
1012 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
1013 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
1014 }
1015 
1016 static struct dnstable_iter *
query_init_rdata_ip(struct query_iter * it)1017 query_init_rdata_ip(struct query_iter *it)
1018 {
1019 	assert(it->query->do_rrtype);
1020 	assert(it->query->rdata != NULL);
1021 
1022 	it->key = ubuf_init(64);
1023 
1024 	/* key: type byte, rdata, rrtype */
1025 	ubuf_add(it->key, ENTRY_TYPE_RDATA);
1026 	ubuf_append(it->key, it->query->rdata, it->query->len_rdata);
1027 	add_rrtype_to_key(it->key, it->query->rrtype);
1028 
1029 	if (it->query->rdata2 != NULL) {
1030 		it->key2 = ubuf_init(64);
1031 
1032 		/* key2: type byte, rdata2, rrtype */
1033 		ubuf_add(it->key2, ENTRY_TYPE_RDATA);
1034 		ubuf_append(it->key2, it->query->rdata2, it->query->len_rdata2);
1035 		add_rrtype_to_key(it->key2, it->query->rrtype);
1036 
1037 		/* increment key2 starting from the last byte */
1038 		increment_key(it->key2, ubuf_size(it->key2) - 1);
1039 	}
1040 
1041 	if (it->key2 == NULL) {
1042 		it->m_iter = mtbl_source_get_prefix(it->source,
1043 						    ubuf_data(it->key), ubuf_size(it->key));
1044 	} else {
1045 		it->m_iter = mtbl_source_get_range(it->source,
1046 						   ubuf_data(it->key), ubuf_size(it->key),
1047 						   ubuf_data(it->key2), ubuf_size(it->key2));
1048 	}
1049 	return dnstable_iter_init(query_iter_next_ip, query_iter_free, it);
1050 }
1051 
1052 static struct dnstable_iter *
query_init_rdata_raw(struct query_iter * it)1053 query_init_rdata_raw(struct query_iter *it)
1054 {
1055 	it->key = ubuf_init(64);
1056 
1057 	/* key: type byte */
1058 	ubuf_add(it->key, ENTRY_TYPE_RDATA);
1059 
1060 	/* key: rdata */
1061 	ubuf_append(it->key, it->query->rdata, it->query->len_rdata);
1062 
1063 	/*
1064 	 * Note: even though this function does not use
1065 	 * it->query->do_rrtype nor call add_rrtype_to_key(), in the
1066 	 * post-query filter processing in dnstable_query_filter(), if
1067 	 * do_rrtype is set then the results will be filtered by
1068 	 * rrtype.
1069 	 */
1070 
1071 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
1072 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
1073 }
1074 
1075 static struct dnstable_iter *
query_init_time_range(struct query_iter * it)1076 query_init_time_range(struct query_iter *it)
1077 {
1078 	it->key = ubuf_init(1);
1079 	ubuf_add(it->key, ENTRY_TYPE_TIME_RANGE);
1080 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
1081 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
1082 }
1083 
1084 static struct dnstable_iter *
query_init_version(struct query_iter * it)1085 query_init_version(struct query_iter *it)
1086 {
1087 	it->key = ubuf_init(1);
1088 	ubuf_add(it->key, ENTRY_TYPE_VERSION);
1089 	if (it->query->has_v_type)
1090 		ubuf_add(it->key, it->query->v_type);
1091 	it->m_iter = mtbl_source_get_prefix(it->source, ubuf_data(it->key), ubuf_size(it->key));
1092 	return dnstable_iter_init(query_iter_next, query_iter_free, it);
1093 }
1094 
1095 struct dnstable_iter *
dnstable_query_iter(struct dnstable_query * q,const struct mtbl_source * source)1096 dnstable_query_iter(struct dnstable_query *q, const struct mtbl_source *source)
1097 {
1098 	struct dnstable_iter *d_it;
1099 	struct query_iter *it = my_calloc(1, sizeof(*it));
1100 	it->query = q;
1101 	it->source = source;
1102 	if (q->q_type == DNSTABLE_QUERY_TYPE_RRSET) {
1103 		d_it = query_init_rrset(it);
1104 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_NAME) {
1105 		d_it = query_init_rdata_name(it);
1106 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_IP) {
1107 		d_it = query_init_rdata_ip(it);
1108 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_RDATA_RAW) {
1109 		d_it = query_init_rdata_raw(it);
1110 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_TIME_RANGE) {
1111 		d_it = query_init_time_range(it);
1112 	} else if (q->q_type == DNSTABLE_QUERY_TYPE_VERSION) {
1113 		d_it = query_init_version(it);
1114 	} else {
1115 		assert(0);
1116 	}
1117 	if (d_it == NULL)
1118 		query_iter_free(it);
1119 	return (d_it);
1120 }
1121