xref: /illumos-gate/usr/src/cmd/bhyve/gdb.c (revision eda3ef2d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #ifdef __FreeBSD__
36 #include <sys/endian.h>
37 #else
38 #include <endian.h>
39 #endif
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <sys/socket.h>
43 #include <machine/atomic.h>
44 #include <machine/specialreg.h>
45 #include <machine/vmm.h>
46 #include <netinet/in.h>
47 #include <assert.h>
48 #ifndef WITHOUT_CAPSICUM
49 #include <capsicum_helpers.h>
50 #endif
51 #include <err.h>
52 #include <errno.h>
53 #include <fcntl.h>
54 #include <pthread.h>
55 #include <pthread_np.h>
56 #include <stdbool.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <sysexits.h>
61 #include <unistd.h>
62 #include <vmmapi.h>
63 
64 #include "bhyverun.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define	GDB_SIGNAL_TRAP		5
73 
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76 
77 static struct mevent *read_event, *write_event;
78 
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool stop_pending, first_stop;
83 #ifdef __FreeBSD__
84 static int stepping_vcpu, stopped_vcpu;
85 #else
86 static int stepping_vcpu = -1, stopped_vcpu = -1;
87 #endif
88 
89 /*
90  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
91  * read buffer, 'start' is unused and 'len' contains the number of
92  * valid bytes in the buffer.  For a write buffer, 'start' is set to
93  * the index of the next byte in 'data' to send, and 'len' contains
94  * the remaining number of valid bytes to send.
95  */
96 struct io_buffer {
97 	uint8_t *data;
98 	size_t capacity;
99 	size_t start;
100 	size_t len;
101 };
102 
103 static struct io_buffer cur_comm, cur_resp;
104 static uint8_t cur_csum;
105 static int cur_vcpu;
106 static struct vmctx *ctx;
107 static int cur_fd = -1;
108 
109 const int gdb_regset[] = {
110 	VM_REG_GUEST_RAX,
111 	VM_REG_GUEST_RBX,
112 	VM_REG_GUEST_RCX,
113 	VM_REG_GUEST_RDX,
114 	VM_REG_GUEST_RSI,
115 	VM_REG_GUEST_RDI,
116 	VM_REG_GUEST_RBP,
117 	VM_REG_GUEST_RSP,
118 	VM_REG_GUEST_R8,
119 	VM_REG_GUEST_R9,
120 	VM_REG_GUEST_R10,
121 	VM_REG_GUEST_R11,
122 	VM_REG_GUEST_R12,
123 	VM_REG_GUEST_R13,
124 	VM_REG_GUEST_R14,
125 	VM_REG_GUEST_R15,
126 	VM_REG_GUEST_RIP,
127 	VM_REG_GUEST_RFLAGS,
128 	VM_REG_GUEST_CS,
129 	VM_REG_GUEST_SS,
130 	VM_REG_GUEST_DS,
131 	VM_REG_GUEST_ES,
132 	VM_REG_GUEST_FS,
133 	VM_REG_GUEST_GS
134 };
135 
136 const int gdb_regsize[] = {
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	8,
146 	8,
147 	8,
148 	8,
149 	8,
150 	8,
151 	8,
152 	8,
153 	8,
154 	4,
155 	4,
156 	4,
157 	4,
158 	4,
159 	4,
160 	4
161 };
162 
163 #ifdef GDB_LOG
164 #include <stdarg.h>
165 #include <stdio.h>
166 
167 static void __printflike(1, 2)
168 debug(const char *fmt, ...)
169 {
170 	static FILE *logfile;
171 	va_list ap;
172 
173 	if (logfile == NULL) {
174 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
175 		if (logfile == NULL)
176 			return;
177 #ifndef WITHOUT_CAPSICUM
178 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
179 			fclose(logfile);
180 			logfile = NULL;
181 			return;
182 		}
183 #endif
184 		setlinebuf(logfile);
185 	}
186 	va_start(ap, fmt);
187 	vfprintf(logfile, fmt, ap);
188 	va_end(ap);
189 }
190 #else
191 #define debug(...)
192 #endif
193 
194 static int
195 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
196 {
197 	uint64_t regs[4];
198 	const int regset[4] = {
199 		VM_REG_GUEST_CR0,
200 		VM_REG_GUEST_CR3,
201 		VM_REG_GUEST_CR4,
202 		VM_REG_GUEST_EFER
203 	};
204 
205 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
206 		return (-1);
207 
208 	/*
209 	 * For the debugger, always pretend to be the kernel (CPL 0),
210 	 * and if long-mode is enabled, always parse addresses as if
211 	 * in 64-bit mode.
212 	 */
213 	paging->cr3 = regs[1];
214 	paging->cpl = 0;
215 	if (regs[3] & EFER_LMA)
216 		paging->cpu_mode = CPU_MODE_64BIT;
217 	else if (regs[0] & CR0_PE)
218 		paging->cpu_mode = CPU_MODE_PROTECTED;
219 	else
220 		paging->cpu_mode = CPU_MODE_REAL;
221 	if (!(regs[0] & CR0_PG))
222 		paging->paging_mode = PAGING_MODE_FLAT;
223 	else if (!(regs[2] & CR4_PAE))
224 		paging->paging_mode = PAGING_MODE_32;
225 	else if (regs[3] & EFER_LME)
226 		paging->paging_mode = PAGING_MODE_64;
227 	else
228 		paging->paging_mode = PAGING_MODE_PAE;
229 	return (0);
230 }
231 
232 /*
233  * Map a guest virtual address to a physical address (for a given vcpu).
234  * If a guest virtual address is valid, return 1.  If the address is
235  * not valid, return 0.  If an error occurs obtaining the mapping,
236  * return -1.
237  */
238 static int
239 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
240 {
241 	struct vm_guest_paging paging;
242 	int fault;
243 
244 	if (guest_paging_info(vcpu, &paging) == -1)
245 		return (-1);
246 
247 	/*
248 	 * Always use PROT_READ.  We really care if the VA is
249 	 * accessible, not if the current vCPU can write.
250 	 */
251 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
252 	    &fault) == -1)
253 		return (-1);
254 	if (fault)
255 		return (0);
256 	return (1);
257 }
258 
259 static void
260 io_buffer_reset(struct io_buffer *io)
261 {
262 
263 	io->start = 0;
264 	io->len = 0;
265 }
266 
267 /* Available room for adding data. */
268 static size_t
269 io_buffer_avail(struct io_buffer *io)
270 {
271 
272 	return (io->capacity - (io->start + io->len));
273 }
274 
275 static uint8_t *
276 io_buffer_head(struct io_buffer *io)
277 {
278 
279 	return (io->data + io->start);
280 }
281 
282 static uint8_t *
283 io_buffer_tail(struct io_buffer *io)
284 {
285 
286 	return (io->data + io->start + io->len);
287 }
288 
289 static void
290 io_buffer_advance(struct io_buffer *io, size_t amount)
291 {
292 
293 	assert(amount <= io->len);
294 	io->start += amount;
295 	io->len -= amount;
296 }
297 
298 static void
299 io_buffer_consume(struct io_buffer *io, size_t amount)
300 {
301 
302 	io_buffer_advance(io, amount);
303 	if (io->len == 0) {
304 		io->start = 0;
305 		return;
306 	}
307 
308 	/*
309 	 * XXX: Consider making this move optional and compacting on a
310 	 * future read() before realloc().
311 	 */
312 	memmove(io->data, io_buffer_head(io), io->len);
313 	io->start = 0;
314 }
315 
316 static void
317 io_buffer_grow(struct io_buffer *io, size_t newsize)
318 {
319 	uint8_t *new_data;
320 	size_t avail, new_cap;
321 
322 	avail = io_buffer_avail(io);
323 	if (newsize <= avail)
324 		return;
325 
326 	new_cap = io->capacity + (newsize - avail);
327 	new_data = realloc(io->data, new_cap);
328 	if (new_data == NULL)
329 		err(1, "Failed to grow GDB I/O buffer");
330 	io->data = new_data;
331 	io->capacity = new_cap;
332 }
333 
334 static bool
335 response_pending(void)
336 {
337 
338 	if (cur_resp.start == 0 && cur_resp.len == 0)
339 		return (false);
340 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
341 		return (false);
342 	return (true);
343 }
344 
345 static void
346 close_connection(void)
347 {
348 
349 	/*
350 	 * XXX: This triggers a warning because mevent does the close
351 	 * before the EV_DELETE.
352 	 */
353 	pthread_mutex_lock(&gdb_lock);
354 	mevent_delete(write_event);
355 	mevent_delete_close(read_event);
356 	write_event = NULL;
357 	read_event = NULL;
358 	io_buffer_reset(&cur_comm);
359 	io_buffer_reset(&cur_resp);
360 	cur_fd = -1;
361 
362 	/* Resume any stopped vCPUs. */
363 	gdb_resume_vcpus();
364 	pthread_mutex_unlock(&gdb_lock);
365 }
366 
367 static uint8_t
368 hex_digit(uint8_t nibble)
369 {
370 
371 	if (nibble <= 9)
372 		return (nibble + '0');
373 	else
374 		return (nibble + 'a' - 10);
375 }
376 
377 static uint8_t
378 parse_digit(uint8_t v)
379 {
380 
381 	if (v >= '0' && v <= '9')
382 		return (v - '0');
383 	if (v >= 'a' && v <= 'f')
384 		return (v - 'a' + 10);
385 	if (v >= 'A' && v <= 'F')
386 		return (v - 'A' + 10);
387 	return (0xF);
388 }
389 
390 /* Parses big-endian hexadecimal. */
391 static uintmax_t
392 parse_integer(const uint8_t *p, size_t len)
393 {
394 	uintmax_t v;
395 
396 	v = 0;
397 	while (len > 0) {
398 		v <<= 4;
399 		v |= parse_digit(*p);
400 		p++;
401 		len--;
402 	}
403 	return (v);
404 }
405 
406 static uint8_t
407 parse_byte(const uint8_t *p)
408 {
409 
410 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
411 }
412 
413 static void
414 send_pending_data(int fd)
415 {
416 	ssize_t nwritten;
417 
418 	if (cur_resp.len == 0) {
419 		mevent_disable(write_event);
420 		return;
421 	}
422 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
423 	if (nwritten == -1) {
424 		warn("Write to GDB socket failed");
425 		close_connection();
426 	} else {
427 		io_buffer_advance(&cur_resp, nwritten);
428 		if (cur_resp.len == 0)
429 			mevent_disable(write_event);
430 		else
431 			mevent_enable(write_event);
432 	}
433 }
434 
435 /* Append a single character to the output buffer. */
436 static void
437 send_char(uint8_t data)
438 {
439 	io_buffer_grow(&cur_resp, 1);
440 	*io_buffer_tail(&cur_resp) = data;
441 	cur_resp.len++;
442 }
443 
444 /* Append an array of bytes to the output buffer. */
445 static void
446 send_data(const uint8_t *data, size_t len)
447 {
448 
449 	io_buffer_grow(&cur_resp, len);
450 	memcpy(io_buffer_tail(&cur_resp), data, len);
451 	cur_resp.len += len;
452 }
453 
454 static void
455 format_byte(uint8_t v, uint8_t *buf)
456 {
457 
458 	buf[0] = hex_digit(v >> 4);
459 	buf[1] = hex_digit(v & 0xf);
460 }
461 
462 /*
463  * Append a single byte (formatted as two hex characters) to the
464  * output buffer.
465  */
466 static void
467 send_byte(uint8_t v)
468 {
469 	uint8_t buf[2];
470 
471 	format_byte(v, buf);
472 	send_data(buf, sizeof(buf));
473 }
474 
475 static void
476 start_packet(void)
477 {
478 
479 	send_char('$');
480 	cur_csum = 0;
481 }
482 
483 static void
484 finish_packet(void)
485 {
486 
487 	send_char('#');
488 	send_byte(cur_csum);
489 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
490 }
491 
492 /*
493  * Append a single character (for the packet payload) and update the
494  * checksum.
495  */
496 static void
497 append_char(uint8_t v)
498 {
499 
500 	send_char(v);
501 	cur_csum += v;
502 }
503 
504 /*
505  * Append an array of bytes (for the packet payload) and update the
506  * checksum.
507  */
508 static void
509 append_packet_data(const uint8_t *data, size_t len)
510 {
511 
512 	send_data(data, len);
513 	while (len > 0) {
514 		cur_csum += *data;
515 		data++;
516 		len--;
517 	}
518 }
519 
520 static void
521 append_string(const char *str)
522 {
523 
524 #ifdef __FreeBSD__
525 	append_packet_data(str, strlen(str));
526 #else
527 	append_packet_data((const uint8_t *)str, strlen(str));
528 #endif
529 }
530 
531 static void
532 append_byte(uint8_t v)
533 {
534 	uint8_t buf[2];
535 
536 	format_byte(v, buf);
537 	append_packet_data(buf, sizeof(buf));
538 }
539 
540 static void
541 append_unsigned_native(uintmax_t value, size_t len)
542 {
543 	size_t i;
544 
545 	for (i = 0; i < len; i++) {
546 		append_byte(value);
547 		value >>= 8;
548 	}
549 }
550 
551 static void
552 append_unsigned_be(uintmax_t value, size_t len)
553 {
554 	char buf[len * 2];
555 	size_t i;
556 
557 	for (i = 0; i < len; i++) {
558 #ifdef __FreeBSD__
559 		format_byte(value, buf + (len - i - 1) * 2);
560 #else
561 		format_byte(value, (uint8_t *)(buf + (len - i - 1) * 2));
562 #endif
563 		value >>= 8;
564 	}
565 #ifdef __FreeBSD__
566 	append_packet_data(buf, sizeof(buf));
567 #else
568 	append_packet_data((const uint8_t *)buf, sizeof(buf));
569 #endif
570 }
571 
572 static void
573 append_integer(unsigned int value)
574 {
575 
576 	if (value == 0)
577 		append_char('0');
578 	else
579 		append_unsigned_be(value, fls(value) + 7 / 8);
580 }
581 
582 static void
583 append_asciihex(const char *str)
584 {
585 
586 	while (*str != '\0') {
587 		append_byte(*str);
588 		str++;
589 	}
590 }
591 
592 static void
593 send_empty_response(void)
594 {
595 
596 	start_packet();
597 	finish_packet();
598 }
599 
600 static void
601 send_error(int error)
602 {
603 
604 	start_packet();
605 	append_char('E');
606 	append_byte(error);
607 	finish_packet();
608 }
609 
610 static void
611 send_ok(void)
612 {
613 
614 	start_packet();
615 	append_string("OK");
616 	finish_packet();
617 }
618 
619 static int
620 parse_threadid(const uint8_t *data, size_t len)
621 {
622 
623 	if (len == 1 && *data == '0')
624 		return (0);
625 	if (len == 2 && memcmp(data, "-1", 2) == 0)
626 		return (-1);
627 	if (len == 0)
628 		return (-2);
629 	return (parse_integer(data, len));
630 }
631 
632 static void
633 report_stop(void)
634 {
635 
636 	start_packet();
637 	if (stopped_vcpu == -1)
638 		append_char('S');
639 	else
640 		append_char('T');
641 	append_byte(GDB_SIGNAL_TRAP);
642 	if (stopped_vcpu != -1) {
643 		append_string("thread:");
644 		append_integer(stopped_vcpu + 1);
645 		append_char(';');
646 	}
647 	stopped_vcpu = -1;
648 	finish_packet();
649 }
650 
651 static void
652 gdb_finish_suspend_vcpus(void)
653 {
654 
655 	if (first_stop) {
656 		first_stop = false;
657 		stopped_vcpu = -1;
658 	} else if (response_pending())
659 		stop_pending = true;
660 	else {
661 		report_stop();
662 		send_pending_data(cur_fd);
663 	}
664 }
665 
666 static void
667 _gdb_cpu_suspend(int vcpu, bool report_stop)
668 {
669 
670 	debug("$vCPU %d suspending\n", vcpu);
671 	CPU_SET(vcpu, &vcpus_waiting);
672 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
673 		gdb_finish_suspend_vcpus();
674 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
675 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
676 	CPU_CLR(vcpu, &vcpus_waiting);
677 	debug("$vCPU %d resuming\n", vcpu);
678 }
679 
680 void
681 gdb_cpu_add(int vcpu)
682 {
683 
684 	debug("$vCPU %d starting\n", vcpu);
685 	pthread_mutex_lock(&gdb_lock);
686 	CPU_SET(vcpu, &vcpus_active);
687 
688 	/*
689 	 * If a vcpu is added while vcpus are stopped, suspend the new
690 	 * vcpu so that it will pop back out with a debug exit before
691 	 * executing the first instruction.
692 	 */
693 	if (!CPU_EMPTY(&vcpus_suspended)) {
694 		CPU_SET(vcpu, &vcpus_suspended);
695 		_gdb_cpu_suspend(vcpu, false);
696 	}
697 	pthread_mutex_unlock(&gdb_lock);
698 }
699 
700 void
701 gdb_cpu_suspend(int vcpu)
702 {
703 
704 	pthread_mutex_lock(&gdb_lock);
705 	_gdb_cpu_suspend(vcpu, true);
706 	pthread_mutex_unlock(&gdb_lock);
707 }
708 
709 void
710 gdb_cpu_mtrap(int vcpu)
711 {
712 
713 	debug("$vCPU %d MTRAP\n", vcpu);
714 	pthread_mutex_lock(&gdb_lock);
715 	if (vcpu == stepping_vcpu) {
716 		stepping_vcpu = -1;
717 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
718 		vm_suspend_cpu(ctx, vcpu);
719 		assert(stopped_vcpu == -1);
720 		stopped_vcpu = vcpu;
721 		_gdb_cpu_suspend(vcpu, true);
722 	}
723 	pthread_mutex_unlock(&gdb_lock);
724 }
725 
726 static void
727 gdb_suspend_vcpus(void)
728 {
729 
730 	assert(pthread_mutex_isowned_np(&gdb_lock));
731 	debug("suspending all CPUs\n");
732 	vcpus_suspended = vcpus_active;
733 	vm_suspend_cpu(ctx, -1);
734 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
735 		gdb_finish_suspend_vcpus();
736 }
737 
738 static bool
739 gdb_step_vcpu(int vcpu)
740 {
741 	int error, val;
742 
743 	debug("$vCPU %d step\n", vcpu);
744 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
745 	if (error < 0)
746 		return (false);
747 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
748 	vm_resume_cpu(ctx, vcpu);
749 	stepping_vcpu = vcpu;
750 	pthread_cond_broadcast(&idle_vcpus);
751 	return (true);
752 }
753 
754 static void
755 gdb_resume_vcpus(void)
756 {
757 
758 	assert(pthread_mutex_isowned_np(&gdb_lock));
759 	vm_resume_cpu(ctx, -1);
760 	debug("resuming all CPUs\n");
761 	CPU_ZERO(&vcpus_suspended);
762 	pthread_cond_broadcast(&idle_vcpus);
763 }
764 
765 static void
766 gdb_read_regs(void)
767 {
768 	uint64_t regvals[nitems(gdb_regset)];
769 	int i;
770 
771 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
772 	    gdb_regset, regvals) == -1) {
773 		send_error(errno);
774 		return;
775 	}
776 	start_packet();
777 	for (i = 0; i < nitems(regvals); i++)
778 		append_unsigned_native(regvals[i], gdb_regsize[i]);
779 	finish_packet();
780 }
781 
782 static void
783 gdb_read_mem(const uint8_t *data, size_t len)
784 {
785 	uint64_t gpa, gva, val;
786 	uint8_t *cp;
787 	size_t resid, todo, bytes;
788 	bool started;
789 	int error;
790 
791 	/* Skip 'm' */
792 	data += 1;
793 	len -= 1;
794 
795 	/* Parse and consume address. */
796 	cp = memchr(data, ',', len);
797 	if (cp == NULL || cp == data) {
798 		send_error(EINVAL);
799 		return;
800 	}
801 	gva = parse_integer(data, cp - data);
802 	len -= (cp - data) + 1;
803 	data += (cp - data) + 1;
804 
805 	/* Parse length. */
806 	resid = parse_integer(data, len);
807 
808 	started = false;
809 	while (resid > 0) {
810 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
811 		if (error == -1) {
812 			if (started)
813 				finish_packet();
814 			else
815 				send_error(errno);
816 			return;
817 		}
818 		if (error == 0) {
819 			if (started)
820 				finish_packet();
821 			else
822 				send_error(EFAULT);
823 			return;
824 		}
825 
826 		/* Read bytes from current page. */
827 		todo = getpagesize() - gpa % getpagesize();
828 		if (todo > resid)
829 			todo = resid;
830 
831 		cp = paddr_guest2host(ctx, gpa, todo);
832 		if (cp != NULL) {
833 			/*
834 			 * If this page is guest RAM, read it a byte
835 			 * at a time.
836 			 */
837 			if (!started) {
838 				start_packet();
839 				started = true;
840 			}
841 			while (todo > 0) {
842 				append_byte(*cp);
843 				cp++;
844 				gpa++;
845 				gva++;
846 				resid--;
847 				todo--;
848 			}
849 		} else {
850 			/*
851 			 * If this page isn't guest RAM, try to handle
852 			 * it via MMIO.  For MMIO requests, use
853 			 * aligned reads of words when possible.
854 			 */
855 			while (todo > 0) {
856 				if (gpa & 1 || todo == 1)
857 					bytes = 1;
858 				else if (gpa & 2 || todo == 2)
859 					bytes = 2;
860 				else
861 					bytes = 4;
862 				error = read_mem(ctx, cur_vcpu, gpa, &val,
863 				    bytes);
864 				if (error == 0) {
865 					if (!started) {
866 						start_packet();
867 						started = true;
868 					}
869 					gpa += bytes;
870 					gva += bytes;
871 					resid -= bytes;
872 					todo -= bytes;
873 					while (bytes > 0) {
874 						append_byte(val);
875 						val >>= 8;
876 						bytes--;
877 					}
878 				} else {
879 					if (started)
880 						finish_packet();
881 					else
882 						send_error(EFAULT);
883 					return;
884 				}
885 			}
886 		}
887 		assert(resid == 0 || gpa % getpagesize() == 0);
888 	}
889 	if (!started)
890 		start_packet();
891 	finish_packet();
892 }
893 
894 static void
895 gdb_write_mem(const uint8_t *data, size_t len)
896 {
897 	uint64_t gpa, gva, val;
898 	uint8_t *cp;
899 	size_t resid, todo, bytes;
900 	int error;
901 
902 	/* Skip 'M' */
903 	data += 1;
904 	len -= 1;
905 
906 	/* Parse and consume address. */
907 	cp = memchr(data, ',', len);
908 	if (cp == NULL || cp == data) {
909 		send_error(EINVAL);
910 		return;
911 	}
912 	gva = parse_integer(data, cp - data);
913 	len -= (cp - data) + 1;
914 	data += (cp - data) + 1;
915 
916 	/* Parse and consume length. */
917 	cp = memchr(data, ':', len);
918 	if (cp == NULL || cp == data) {
919 		send_error(EINVAL);
920 		return;
921 	}
922 	resid = parse_integer(data, cp - data);
923 	len -= (cp - data) + 1;
924 	data += (cp - data) + 1;
925 
926 	/* Verify the available bytes match the length. */
927 	if (len != resid * 2) {
928 		send_error(EINVAL);
929 		return;
930 	}
931 
932 	while (resid > 0) {
933 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
934 		if (error == -1) {
935 			send_error(errno);
936 			return;
937 		}
938 		if (error == 0) {
939 			send_error(EFAULT);
940 			return;
941 		}
942 
943 		/* Write bytes to current page. */
944 		todo = getpagesize() - gpa % getpagesize();
945 		if (todo > resid)
946 			todo = resid;
947 
948 		cp = paddr_guest2host(ctx, gpa, todo);
949 		if (cp != NULL) {
950 			/*
951 			 * If this page is guest RAM, write it a byte
952 			 * at a time.
953 			 */
954 			while (todo > 0) {
955 				assert(len >= 2);
956 				*cp = parse_byte(data);
957 				data += 2;
958 				len -= 2;
959 				cp++;
960 				gpa++;
961 				gva++;
962 				resid--;
963 				todo--;
964 			}
965 		} else {
966 			/*
967 			 * If this page isn't guest RAM, try to handle
968 			 * it via MMIO.  For MMIO requests, use
969 			 * aligned writes of words when possible.
970 			 */
971 			while (todo > 0) {
972 				if (gpa & 1 || todo == 1) {
973 					bytes = 1;
974 					val = parse_byte(data);
975 				} else if (gpa & 2 || todo == 2) {
976 					bytes = 2;
977 					val = be16toh(parse_integer(data, 4));
978 				} else {
979 					bytes = 4;
980 					val = be32toh(parse_integer(data, 8));
981 				}
982 				error = write_mem(ctx, cur_vcpu, gpa, val,
983 				    bytes);
984 				if (error == 0) {
985 					gpa += bytes;
986 					gva += bytes;
987 					resid -= bytes;
988 					todo -= bytes;
989 					data += 2 * bytes;
990 					len -= 2 * bytes;
991 				} else {
992 					send_error(EFAULT);
993 					return;
994 				}
995 			}
996 		}
997 		assert(resid == 0 || gpa % getpagesize() == 0);
998 	}
999 	assert(len == 0);
1000 	send_ok();
1001 }
1002 
1003 static bool
1004 command_equals(const uint8_t *data, size_t len, const char *cmd)
1005 {
1006 
1007 	if (strlen(cmd) > len)
1008 		return (false);
1009 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1010 }
1011 
1012 static void
1013 check_features(const uint8_t *data, size_t len)
1014 {
1015 	char *feature, *next_feature, *str, *value;
1016 	bool supported;
1017 
1018 	str = malloc(len + 1);
1019 	memcpy(str, data, len);
1020 	str[len] = '\0';
1021 	next_feature = str;
1022 
1023 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1024 		/*
1025 		 * Null features shouldn't exist, but skip if they
1026 		 * do.
1027 		 */
1028 		if (strcmp(feature, "") == 0)
1029 			continue;
1030 
1031 		/*
1032 		 * Look for the value or supported / not supported
1033 		 * flag.
1034 		 */
1035 		value = strchr(feature, '=');
1036 		if (value != NULL) {
1037 			*value = '\0';
1038 			value++;
1039 			supported = true;
1040 		} else {
1041 			value = feature + strlen(feature) - 1;
1042 			switch (*value) {
1043 			case '+':
1044 				supported = true;
1045 				break;
1046 			case '-':
1047 				supported = false;
1048 				break;
1049 			default:
1050 				/*
1051 				 * This is really a protocol error,
1052 				 * but we just ignore malformed
1053 				 * features for ease of
1054 				 * implementation.
1055 				 */
1056 				continue;
1057 			}
1058 			value = NULL;
1059 		}
1060 
1061 		/* No currently supported features. */
1062 #ifndef __FreeBSD__
1063 		/*
1064 		 * The compiler dislikes 'supported' being set but never used.
1065 		 * Make it happy here.
1066 		 */
1067 		if (supported) {
1068 			debug("feature '%s' supported\n", feature);
1069 		}
1070 #endif /* __FreeBSD__ */
1071 	}
1072 	free(str);
1073 
1074 	start_packet();
1075 
1076 	/* This is an arbitrary limit. */
1077 	append_string("PacketSize=4096");
1078 	finish_packet();
1079 }
1080 
1081 static void
1082 gdb_query(const uint8_t *data, size_t len)
1083 {
1084 
1085 	/*
1086 	 * TODO:
1087 	 * - qSearch
1088 	 */
1089 	if (command_equals(data, len, "qAttached")) {
1090 		start_packet();
1091 		append_char('1');
1092 		finish_packet();
1093 	} else if (command_equals(data, len, "qC")) {
1094 		start_packet();
1095 		append_string("QC");
1096 		append_integer(cur_vcpu + 1);
1097 		finish_packet();
1098 	} else if (command_equals(data, len, "qfThreadInfo")) {
1099 		cpuset_t mask;
1100 		bool first;
1101 		int vcpu;
1102 
1103 		if (CPU_EMPTY(&vcpus_active)) {
1104 			send_error(EINVAL);
1105 			return;
1106 		}
1107 		mask = vcpus_active;
1108 		start_packet();
1109 		append_char('m');
1110 		first = true;
1111 		while (!CPU_EMPTY(&mask)) {
1112 			vcpu = CPU_FFS(&mask) - 1;
1113 			CPU_CLR(vcpu, &mask);
1114 			if (first)
1115 				first = false;
1116 			else
1117 				append_char(',');
1118 			append_integer(vcpu + 1);
1119 		}
1120 		finish_packet();
1121 	} else if (command_equals(data, len, "qsThreadInfo")) {
1122 		start_packet();
1123 		append_char('l');
1124 		finish_packet();
1125 	} else if (command_equals(data, len, "qSupported")) {
1126 		data += strlen("qSupported");
1127 		len -= strlen("qSupported");
1128 		check_features(data, len);
1129 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1130 		char buf[16];
1131 		int tid;
1132 
1133 		data += strlen("qThreadExtraInfo");
1134 		len -= strlen("qThreadExtraInfo");
1135 		if (*data != ',') {
1136 			send_error(EINVAL);
1137 			return;
1138 		}
1139 		tid = parse_threadid(data + 1, len - 1);
1140 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1141 			send_error(EINVAL);
1142 			return;
1143 		}
1144 
1145 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1146 		start_packet();
1147 		append_asciihex(buf);
1148 		finish_packet();
1149 	} else
1150 		send_empty_response();
1151 }
1152 
1153 static void
1154 handle_command(const uint8_t *data, size_t len)
1155 {
1156 
1157 	/* Reject packets with a sequence-id. */
1158 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1159 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1160 		send_empty_response();
1161 		return;
1162 	}
1163 
1164 	switch (*data) {
1165 	case 'c':
1166 		if (len != 1) {
1167 			send_error(EINVAL);
1168 			break;
1169 		}
1170 
1171 		/* Don't send a reply until a stop occurs. */
1172 		gdb_resume_vcpus();
1173 		break;
1174 	case 'D':
1175 		send_ok();
1176 
1177 		/* TODO: Resume any stopped CPUs. */
1178 		break;
1179 	case 'g': {
1180 		gdb_read_regs();
1181 		break;
1182 	}
1183 	case 'H': {
1184 		int tid;
1185 
1186 		if (data[1] != 'g' && data[1] != 'c') {
1187 			send_error(EINVAL);
1188 			break;
1189 		}
1190 		tid = parse_threadid(data + 2, len - 2);
1191 		if (tid == -2) {
1192 			send_error(EINVAL);
1193 			break;
1194 		}
1195 
1196 		if (CPU_EMPTY(&vcpus_active)) {
1197 			send_error(EINVAL);
1198 			break;
1199 		}
1200 		if (tid == -1 || tid == 0)
1201 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1202 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1203 			cur_vcpu = tid - 1;
1204 		else {
1205 			send_error(EINVAL);
1206 			break;
1207 		}
1208 		send_ok();
1209 		break;
1210 	}
1211 	case 'm':
1212 		gdb_read_mem(data, len);
1213 		break;
1214 	case 'M':
1215 		gdb_write_mem(data, len);
1216 		break;
1217 	case 'T': {
1218 		int tid;
1219 
1220 		tid = parse_threadid(data + 1, len - 1);
1221 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1222 			send_error(EINVAL);
1223 			return;
1224 		}
1225 		send_ok();
1226 		break;
1227 	}
1228 	case 'q':
1229 		gdb_query(data, len);
1230 		break;
1231 	case 's':
1232 		if (len != 1) {
1233 			send_error(EINVAL);
1234 			break;
1235 		}
1236 
1237 		/* Don't send a reply until a stop occurs. */
1238 		if (!gdb_step_vcpu(cur_vcpu)) {
1239 			send_error(EOPNOTSUPP);
1240 			break;
1241 		}
1242 		break;
1243 	case '?':
1244 		/* XXX: Only if stopped? */
1245 		/* For now, just report that we are always stopped. */
1246 		start_packet();
1247 		append_char('S');
1248 		append_byte(GDB_SIGNAL_TRAP);
1249 		finish_packet();
1250 		break;
1251 	case 'G': /* TODO */
1252 	case 'v':
1253 		/* Handle 'vCont' */
1254 		/* 'vCtrlC' */
1255 	case 'p': /* TODO */
1256 	case 'P': /* TODO */
1257 	case 'Q': /* TODO */
1258 	case 't': /* TODO */
1259 	case 'X': /* TODO */
1260 	case 'z': /* TODO */
1261 	case 'Z': /* TODO */
1262 	default:
1263 		send_empty_response();
1264 	}
1265 }
1266 
1267 /* Check for a valid packet in the command buffer. */
1268 static void
1269 check_command(int fd)
1270 {
1271 	uint8_t *head, *hash, *p, sum;
1272 	size_t avail, plen;
1273 
1274 	for (;;) {
1275 		avail = cur_comm.len;
1276 		if (avail == 0)
1277 			return;
1278 		head = io_buffer_head(&cur_comm);
1279 		switch (*head) {
1280 		case 0x03:
1281 			debug("<- Ctrl-C\n");
1282 			io_buffer_consume(&cur_comm, 1);
1283 
1284 			gdb_suspend_vcpus();
1285 			break;
1286 		case '+':
1287 			/* ACK of previous response. */
1288 			debug("<- +\n");
1289 			if (response_pending())
1290 				io_buffer_reset(&cur_resp);
1291 			io_buffer_consume(&cur_comm, 1);
1292 			if (stop_pending) {
1293 				stop_pending = false;
1294 				report_stop();
1295 				send_pending_data(fd);
1296 			}
1297 			break;
1298 		case '-':
1299 			/* NACK of previous response. */
1300 			debug("<- -\n");
1301 			if (response_pending()) {
1302 				cur_resp.len += cur_resp.start;
1303 				cur_resp.start = 0;
1304 				if (cur_resp.data[0] == '+')
1305 					io_buffer_advance(&cur_resp, 1);
1306 				debug("-> %.*s\n", (int)cur_resp.len,
1307 				    io_buffer_head(&cur_resp));
1308 			}
1309 			io_buffer_consume(&cur_comm, 1);
1310 			send_pending_data(fd);
1311 			break;
1312 		case '$':
1313 			/* Packet. */
1314 
1315 			if (response_pending()) {
1316 				warnx("New GDB command while response in "
1317 				    "progress");
1318 				io_buffer_reset(&cur_resp);
1319 			}
1320 
1321 			/* Is packet complete? */
1322 			hash = memchr(head, '#', avail);
1323 			if (hash == NULL)
1324 				return;
1325 			plen = (hash - head + 1) + 2;
1326 			if (avail < plen)
1327 				return;
1328 			debug("<- %.*s\n", (int)plen, head);
1329 
1330 			/* Verify checksum. */
1331 			for (sum = 0, p = head + 1; p < hash; p++)
1332 				sum += *p;
1333 			if (sum != parse_byte(hash + 1)) {
1334 				io_buffer_consume(&cur_comm, plen);
1335 				debug("-> -\n");
1336 				send_char('-');
1337 				send_pending_data(fd);
1338 				break;
1339 			}
1340 			send_char('+');
1341 
1342 			handle_command(head + 1, hash - (head + 1));
1343 			io_buffer_consume(&cur_comm, plen);
1344 			if (!response_pending()) {
1345 				debug("-> +\n");
1346 			}
1347 			send_pending_data(fd);
1348 			break;
1349 		default:
1350 			/* XXX: Possibly drop connection instead. */
1351 			debug("-> %02x\n", *head);
1352 			io_buffer_consume(&cur_comm, 1);
1353 			break;
1354 		}
1355 	}
1356 }
1357 
1358 static void
1359 gdb_readable(int fd, enum ev_type event, void *arg)
1360 {
1361 	ssize_t nread;
1362 	int pending;
1363 
1364 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1365 		warn("FIONREAD on GDB socket");
1366 		return;
1367 	}
1368 
1369 	/*
1370 	 * 'pending' might be zero due to EOF.  We need to call read
1371 	 * with a non-zero length to detect EOF.
1372 	 */
1373 	if (pending == 0)
1374 		pending = 1;
1375 
1376 	/* Ensure there is room in the command buffer. */
1377 	io_buffer_grow(&cur_comm, pending);
1378 	assert(io_buffer_avail(&cur_comm) >= pending);
1379 
1380 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1381 	if (nread == 0) {
1382 		close_connection();
1383 	} else if (nread == -1) {
1384 		if (errno == EAGAIN)
1385 			return;
1386 
1387 		warn("Read from GDB socket");
1388 		close_connection();
1389 	} else {
1390 		cur_comm.len += nread;
1391 		pthread_mutex_lock(&gdb_lock);
1392 		check_command(fd);
1393 		pthread_mutex_unlock(&gdb_lock);
1394 	}
1395 }
1396 
1397 static void
1398 gdb_writable(int fd, enum ev_type event, void *arg)
1399 {
1400 
1401 	send_pending_data(fd);
1402 }
1403 
1404 static void
1405 new_connection(int fd, enum ev_type event, void *arg)
1406 {
1407 	int optval, s;
1408 
1409 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1410 	if (s == -1) {
1411 		if (arg != NULL)
1412 			err(1, "Failed accepting initial GDB connection");
1413 
1414 		/* Silently ignore errors post-startup. */
1415 		return;
1416 	}
1417 
1418 	optval = 1;
1419 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1420 	    -1) {
1421 		warn("Failed to disable SIGPIPE for GDB connection");
1422 		close(s);
1423 		return;
1424 	}
1425 
1426 	pthread_mutex_lock(&gdb_lock);
1427 	if (cur_fd != -1) {
1428 		close(s);
1429 		warnx("Ignoring additional GDB connection.");
1430 	}
1431 
1432 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1433 	if (read_event == NULL) {
1434 		if (arg != NULL)
1435 			err(1, "Failed to setup initial GDB connection");
1436 		pthread_mutex_unlock(&gdb_lock);
1437 		return;
1438 	}
1439 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1440 	if (write_event == NULL) {
1441 		if (arg != NULL)
1442 			err(1, "Failed to setup initial GDB connection");
1443 		mevent_delete_close(read_event);
1444 		read_event = NULL;
1445 	}
1446 
1447 	cur_fd = s;
1448 	cur_vcpu = 0;
1449 	stepping_vcpu = -1;
1450 	stopped_vcpu = -1;
1451 	stop_pending = false;
1452 
1453 	/* Break on attach. */
1454 	first_stop = true;
1455 	gdb_suspend_vcpus();
1456 	pthread_mutex_unlock(&gdb_lock);
1457 }
1458 
1459 #ifndef WITHOUT_CAPSICUM
1460 void
1461 limit_gdb_socket(int s)
1462 {
1463 	cap_rights_t rights;
1464 	unsigned long ioctls[] = { FIONREAD };
1465 
1466 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1467 	    CAP_SETSOCKOPT, CAP_IOCTL);
1468 	if (caph_rights_limit(s, &rights) == -1)
1469 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1470 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1471 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1472 }
1473 #endif
1474 
1475 void
1476 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1477 {
1478 	struct sockaddr_in sin;
1479 	int error, flags, s;
1480 
1481 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1482 
1483 	error = pthread_mutex_init(&gdb_lock, NULL);
1484 	if (error != 0)
1485 		errc(1, error, "gdb mutex init");
1486 	error = pthread_cond_init(&idle_vcpus, NULL);
1487 	if (error != 0)
1488 		errc(1, error, "gdb cv init");
1489 
1490 	ctx = _ctx;
1491 	s = socket(PF_INET, SOCK_STREAM, 0);
1492 	if (s < 0)
1493 		err(1, "gdb socket create");
1494 
1495 #ifdef __FreeBSD__
1496 	sin.sin_len = sizeof(sin);
1497 #endif
1498 	sin.sin_family = AF_INET;
1499 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1500 	sin.sin_port = htons(sport);
1501 
1502 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1503 		err(1, "gdb socket bind");
1504 
1505 	if (listen(s, 1) < 0)
1506 		err(1, "gdb socket listen");
1507 
1508 	if (wait) {
1509 		/*
1510 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1511 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1512 		 * it starts execution.  The vcpu will remain suspended
1513 		 * until a debugger connects.
1514 		 */
1515 		stepping_vcpu = -1;
1516 		stopped_vcpu = -1;
1517 		CPU_SET(0, &vcpus_suspended);
1518 	}
1519 
1520 	flags = fcntl(s, F_GETFL);
1521 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1522 		err(1, "Failed to mark gdb socket non-blocking");
1523 
1524 #ifndef WITHOUT_CAPSICUM
1525 	limit_gdb_socket(s);
1526 #endif
1527 	mevent_add(s, EVF_READ, new_connection, NULL);
1528 }
1529