xref: /freebsd/usr.sbin/bhyve/gdb.c (revision b00ab754)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #ifndef WITHOUT_CAPSICUM
34 #include <sys/capsicum.h>
35 #endif
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/socket.h>
39 #include <machine/atomic.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 #include <netinet/in.h>
43 #include <assert.h>
44 #ifndef WITHOUT_CAPSICUM
45 #include <capsicum_helpers.h>
46 #endif
47 #include <err.h>
48 #include <errno.h>
49 #include <fcntl.h>
50 #include <pthread.h>
51 #include <pthread_np.h>
52 #include <stdbool.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <sysexits.h>
57 #include <unistd.h>
58 #include <vmmapi.h>
59 
60 #include "bhyverun.h"
61 #include "mem.h"
62 #include "mevent.h"
63 
64 /*
65  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
66  * use SIGTRAP.
67  */
68 #define	GDB_SIGNAL_TRAP		5
69 
70 static void gdb_resume_vcpus(void);
71 static void check_command(int fd);
72 
73 static struct mevent *read_event, *write_event;
74 
75 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
76 static pthread_mutex_t gdb_lock;
77 static pthread_cond_t idle_vcpus;
78 static bool stop_pending, first_stop;
79 static int stepping_vcpu, stopped_vcpu;
80 
81 /*
82  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
83  * read buffer, 'start' is unused and 'len' contains the number of
84  * valid bytes in the buffer.  For a write buffer, 'start' is set to
85  * the index of the next byte in 'data' to send, and 'len' contains
86  * the remaining number of valid bytes to send.
87  */
88 struct io_buffer {
89 	uint8_t *data;
90 	size_t capacity;
91 	size_t start;
92 	size_t len;
93 };
94 
95 static struct io_buffer cur_comm, cur_resp;
96 static uint8_t cur_csum;
97 static int cur_vcpu;
98 static struct vmctx *ctx;
99 static int cur_fd = -1;
100 
101 const int gdb_regset[] = {
102 	VM_REG_GUEST_RAX,
103 	VM_REG_GUEST_RBX,
104 	VM_REG_GUEST_RCX,
105 	VM_REG_GUEST_RDX,
106 	VM_REG_GUEST_RSI,
107 	VM_REG_GUEST_RDI,
108 	VM_REG_GUEST_RBP,
109 	VM_REG_GUEST_RSP,
110 	VM_REG_GUEST_R8,
111 	VM_REG_GUEST_R9,
112 	VM_REG_GUEST_R10,
113 	VM_REG_GUEST_R11,
114 	VM_REG_GUEST_R12,
115 	VM_REG_GUEST_R13,
116 	VM_REG_GUEST_R14,
117 	VM_REG_GUEST_R15,
118 	VM_REG_GUEST_RIP,
119 	VM_REG_GUEST_RFLAGS,
120 	VM_REG_GUEST_CS,
121 	VM_REG_GUEST_SS,
122 	VM_REG_GUEST_DS,
123 	VM_REG_GUEST_ES,
124 	VM_REG_GUEST_FS,
125 	VM_REG_GUEST_GS
126 };
127 
128 const int gdb_regsize[] = {
129 	8,
130 	8,
131 	8,
132 	8,
133 	8,
134 	8,
135 	8,
136 	8,
137 	8,
138 	8,
139 	8,
140 	8,
141 	8,
142 	8,
143 	8,
144 	8,
145 	8,
146 	4,
147 	4,
148 	4,
149 	4,
150 	4,
151 	4,
152 	4
153 };
154 
155 #ifdef GDB_LOG
156 #include <stdarg.h>
157 #include <stdio.h>
158 
159 static void __printflike(1, 2)
160 debug(const char *fmt, ...)
161 {
162 	static FILE *logfile;
163 	va_list ap;
164 
165 	if (logfile == NULL) {
166 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
167 		if (logfile == NULL)
168 			return;
169 #ifndef WITHOUT_CAPSICUM
170 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
171 			fclose(logfile);
172 			logfile = NULL;
173 			return;
174 		}
175 #endif
176 		setlinebuf(logfile);
177 	}
178 	va_start(ap, fmt);
179 	vfprintf(logfile, fmt, ap);
180 	va_end(ap);
181 }
182 #else
183 #define debug(...)
184 #endif
185 
186 static int
187 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
188 {
189 	uint64_t regs[4];
190 	const int regset[4] = {
191 		VM_REG_GUEST_CR0,
192 		VM_REG_GUEST_CR3,
193 		VM_REG_GUEST_CR4,
194 		VM_REG_GUEST_EFER
195 	};
196 
197 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
198 		return (-1);
199 
200 	/*
201 	 * For the debugger, always pretend to be the kernel (CPL 0),
202 	 * and if long-mode is enabled, always parse addresses as if
203 	 * in 64-bit mode.
204 	 */
205 	paging->cr3 = regs[1];
206 	paging->cpl = 0;
207 	if (regs[3] & EFER_LMA)
208 		paging->cpu_mode = CPU_MODE_64BIT;
209 	else if (regs[0] & CR0_PE)
210 		paging->cpu_mode = CPU_MODE_PROTECTED;
211 	else
212 		paging->cpu_mode = CPU_MODE_REAL;
213 	if (!(regs[0] & CR0_PG))
214 		paging->paging_mode = PAGING_MODE_FLAT;
215 	else if (!(regs[2] & CR4_PAE))
216 		paging->paging_mode = PAGING_MODE_32;
217 	else if (regs[3] & EFER_LME)
218 		paging->paging_mode = PAGING_MODE_64;
219 	else
220 		paging->paging_mode = PAGING_MODE_PAE;
221 	return (0);
222 }
223 
224 /*
225  * Map a guest virtual address to a physical address (for a given vcpu).
226  * If a guest virtual address is valid, return 1.  If the address is
227  * not valid, return 0.  If an error occurs obtaining the mapping,
228  * return -1.
229  */
230 static int
231 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
232 {
233 	struct vm_guest_paging paging;
234 	int fault;
235 
236 	if (guest_paging_info(vcpu, &paging) == -1)
237 		return (-1);
238 
239 	/*
240 	 * Always use PROT_READ.  We really care if the VA is
241 	 * accessible, not if the current vCPU can write.
242 	 */
243 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
244 	    &fault) == -1)
245 		return (-1);
246 	if (fault)
247 		return (0);
248 	return (1);
249 }
250 
251 static void
252 io_buffer_reset(struct io_buffer *io)
253 {
254 
255 	io->start = 0;
256 	io->len = 0;
257 }
258 
259 /* Available room for adding data. */
260 static size_t
261 io_buffer_avail(struct io_buffer *io)
262 {
263 
264 	return (io->capacity - (io->start + io->len));
265 }
266 
267 static uint8_t *
268 io_buffer_head(struct io_buffer *io)
269 {
270 
271 	return (io->data + io->start);
272 }
273 
274 static uint8_t *
275 io_buffer_tail(struct io_buffer *io)
276 {
277 
278 	return (io->data + io->start + io->len);
279 }
280 
281 static void
282 io_buffer_advance(struct io_buffer *io, size_t amount)
283 {
284 
285 	assert(amount <= io->len);
286 	io->start += amount;
287 	io->len -= amount;
288 }
289 
290 static void
291 io_buffer_consume(struct io_buffer *io, size_t amount)
292 {
293 
294 	io_buffer_advance(io, amount);
295 	if (io->len == 0) {
296 		io->start = 0;
297 		return;
298 	}
299 
300 	/*
301 	 * XXX: Consider making this move optional and compacting on a
302 	 * future read() before realloc().
303 	 */
304 	memmove(io->data, io_buffer_head(io), io->len);
305 	io->start = 0;
306 }
307 
308 static void
309 io_buffer_grow(struct io_buffer *io, size_t newsize)
310 {
311 	uint8_t *new_data;
312 	size_t avail, new_cap;
313 
314 	avail = io_buffer_avail(io);
315 	if (newsize <= avail)
316 		return;
317 
318 	new_cap = io->capacity + (newsize - avail);
319 	new_data = realloc(io->data, new_cap);
320 	if (new_data == NULL)
321 		err(1, "Failed to grow GDB I/O buffer");
322 	io->data = new_data;
323 	io->capacity = new_cap;
324 }
325 
326 static bool
327 response_pending(void)
328 {
329 
330 	if (cur_resp.start == 0 && cur_resp.len == 0)
331 		return (false);
332 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
333 		return (false);
334 	return (true);
335 }
336 
337 static void
338 close_connection(void)
339 {
340 
341 	/*
342 	 * XXX: This triggers a warning because mevent does the close
343 	 * before the EV_DELETE.
344 	 */
345 	pthread_mutex_lock(&gdb_lock);
346 	mevent_delete(write_event);
347 	mevent_delete_close(read_event);
348 	write_event = NULL;
349 	read_event = NULL;
350 	io_buffer_reset(&cur_comm);
351 	io_buffer_reset(&cur_resp);
352 	cur_fd = -1;
353 
354 	/* Resume any stopped vCPUs. */
355 	gdb_resume_vcpus();
356 	pthread_mutex_unlock(&gdb_lock);
357 }
358 
359 static uint8_t
360 hex_digit(uint8_t nibble)
361 {
362 
363 	if (nibble <= 9)
364 		return (nibble + '0');
365 	else
366 		return (nibble + 'a' - 10);
367 }
368 
369 static uint8_t
370 parse_digit(uint8_t v)
371 {
372 
373 	if (v >= '0' && v <= '9')
374 		return (v - '0');
375 	if (v >= 'a' && v <= 'f')
376 		return (v - 'a' + 10);
377 	if (v >= 'A' && v <= 'F')
378 		return (v - 'A' + 10);
379 	return (0xF);
380 }
381 
382 /* Parses big-endian hexadecimal. */
383 static uintmax_t
384 parse_integer(const uint8_t *p, size_t len)
385 {
386 	uintmax_t v;
387 
388 	v = 0;
389 	while (len > 0) {
390 		v <<= 4;
391 		v |= parse_digit(*p);
392 		p++;
393 		len--;
394 	}
395 	return (v);
396 }
397 
398 static uint8_t
399 parse_byte(const uint8_t *p)
400 {
401 
402 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
403 }
404 
405 static void
406 send_pending_data(int fd)
407 {
408 	ssize_t nwritten;
409 
410 	if (cur_resp.len == 0) {
411 		mevent_disable(write_event);
412 		return;
413 	}
414 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
415 	if (nwritten == -1) {
416 		warn("Write to GDB socket failed");
417 		close_connection();
418 	} else {
419 		io_buffer_advance(&cur_resp, nwritten);
420 		if (cur_resp.len == 0)
421 			mevent_disable(write_event);
422 		else
423 			mevent_enable(write_event);
424 	}
425 }
426 
427 /* Append a single character to the output buffer. */
428 static void
429 send_char(uint8_t data)
430 {
431 	io_buffer_grow(&cur_resp, 1);
432 	*io_buffer_tail(&cur_resp) = data;
433 	cur_resp.len++;
434 }
435 
436 /* Append an array of bytes to the output buffer. */
437 static void
438 send_data(const uint8_t *data, size_t len)
439 {
440 
441 	io_buffer_grow(&cur_resp, len);
442 	memcpy(io_buffer_tail(&cur_resp), data, len);
443 	cur_resp.len += len;
444 }
445 
446 static void
447 format_byte(uint8_t v, uint8_t *buf)
448 {
449 
450 	buf[0] = hex_digit(v >> 4);
451 	buf[1] = hex_digit(v & 0xf);
452 }
453 
454 /*
455  * Append a single byte (formatted as two hex characters) to the
456  * output buffer.
457  */
458 static void
459 send_byte(uint8_t v)
460 {
461 	uint8_t buf[2];
462 
463 	format_byte(v, buf);
464 	send_data(buf, sizeof(buf));
465 }
466 
467 static void
468 start_packet(void)
469 {
470 
471 	send_char('$');
472 	cur_csum = 0;
473 }
474 
475 static void
476 finish_packet(void)
477 {
478 
479 	send_char('#');
480 	send_byte(cur_csum);
481 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
482 }
483 
484 /*
485  * Append a single character (for the packet payload) and update the
486  * checksum.
487  */
488 static void
489 append_char(uint8_t v)
490 {
491 
492 	send_char(v);
493 	cur_csum += v;
494 }
495 
496 /*
497  * Append an array of bytes (for the packet payload) and update the
498  * checksum.
499  */
500 static void
501 append_packet_data(const uint8_t *data, size_t len)
502 {
503 
504 	send_data(data, len);
505 	while (len > 0) {
506 		cur_csum += *data;
507 		data++;
508 		len--;
509 	}
510 }
511 
512 static void
513 append_string(const char *str)
514 {
515 
516 	append_packet_data(str, strlen(str));
517 }
518 
519 static void
520 append_byte(uint8_t v)
521 {
522 	uint8_t buf[2];
523 
524 	format_byte(v, buf);
525 	append_packet_data(buf, sizeof(buf));
526 }
527 
528 static void
529 append_unsigned_native(uintmax_t value, size_t len)
530 {
531 	size_t i;
532 
533 	for (i = 0; i < len; i++) {
534 		append_byte(value);
535 		value >>= 8;
536 	}
537 }
538 
539 static void
540 append_unsigned_be(uintmax_t value, size_t len)
541 {
542 	char buf[len * 2];
543 	size_t i;
544 
545 	for (i = 0; i < len; i++) {
546 		format_byte(value, buf + (len - i - 1) * 2);
547 		value >>= 8;
548 	}
549 	append_packet_data(buf, sizeof(buf));
550 }
551 
552 static void
553 append_integer(unsigned int value)
554 {
555 
556 	if (value == 0)
557 		append_char('0');
558 	else
559 		append_unsigned_be(value, fls(value) + 7 / 8);
560 }
561 
562 static void
563 append_asciihex(const char *str)
564 {
565 
566 	while (*str != '\0') {
567 		append_byte(*str);
568 		str++;
569 	}
570 }
571 
572 static void
573 send_empty_response(void)
574 {
575 
576 	start_packet();
577 	finish_packet();
578 }
579 
580 static void
581 send_error(int error)
582 {
583 
584 	start_packet();
585 	append_char('E');
586 	append_byte(error);
587 	finish_packet();
588 }
589 
590 static void
591 send_ok(void)
592 {
593 
594 	start_packet();
595 	append_string("OK");
596 	finish_packet();
597 }
598 
599 static int
600 parse_threadid(const uint8_t *data, size_t len)
601 {
602 
603 	if (len == 1 && *data == '0')
604 		return (0);
605 	if (len == 2 && memcmp(data, "-1", 2) == 0)
606 		return (-1);
607 	if (len == 0)
608 		return (-2);
609 	return (parse_integer(data, len));
610 }
611 
612 static void
613 report_stop(void)
614 {
615 
616 	start_packet();
617 	if (stopped_vcpu == -1)
618 		append_char('S');
619 	else
620 		append_char('T');
621 	append_byte(GDB_SIGNAL_TRAP);
622 	if (stopped_vcpu != -1) {
623 		append_string("thread:");
624 		append_integer(stopped_vcpu + 1);
625 		append_char(';');
626 	}
627 	stopped_vcpu = -1;
628 	finish_packet();
629 }
630 
631 static void
632 gdb_finish_suspend_vcpus(void)
633 {
634 
635 	if (first_stop) {
636 		first_stop = false;
637 		stopped_vcpu = -1;
638 	} else if (response_pending())
639 		stop_pending = true;
640 	else {
641 		report_stop();
642 		send_pending_data(cur_fd);
643 	}
644 }
645 
646 static void
647 _gdb_cpu_suspend(int vcpu, bool report_stop)
648 {
649 
650 	debug("$vCPU %d suspending\n", vcpu);
651 	CPU_SET(vcpu, &vcpus_waiting);
652 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
653 		gdb_finish_suspend_vcpus();
654 	while (CPU_ISSET(vcpu, &vcpus_suspended) && vcpu != stepping_vcpu)
655 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
656 	CPU_CLR(vcpu, &vcpus_waiting);
657 	debug("$vCPU %d resuming\n", vcpu);
658 }
659 
660 void
661 gdb_cpu_add(int vcpu)
662 {
663 
664 	debug("$vCPU %d starting\n", vcpu);
665 	pthread_mutex_lock(&gdb_lock);
666 	CPU_SET(vcpu, &vcpus_active);
667 
668 	/*
669 	 * If a vcpu is added while vcpus are stopped, suspend the new
670 	 * vcpu so that it will pop back out with a debug exit before
671 	 * executing the first instruction.
672 	 */
673 	if (!CPU_EMPTY(&vcpus_suspended)) {
674 		CPU_SET(vcpu, &vcpus_suspended);
675 		_gdb_cpu_suspend(vcpu, false);
676 	}
677 	pthread_mutex_unlock(&gdb_lock);
678 }
679 
680 void
681 gdb_cpu_suspend(int vcpu)
682 {
683 
684 	pthread_mutex_lock(&gdb_lock);
685 	_gdb_cpu_suspend(vcpu, true);
686 	pthread_mutex_unlock(&gdb_lock);
687 }
688 
689 void
690 gdb_cpu_mtrap(int vcpu)
691 {
692 
693 	debug("$vCPU %d MTRAP\n", vcpu);
694 	pthread_mutex_lock(&gdb_lock);
695 	if (vcpu == stepping_vcpu) {
696 		stepping_vcpu = -1;
697 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
698 		vm_suspend_cpu(ctx, vcpu);
699 		assert(stopped_vcpu == -1);
700 		stopped_vcpu = vcpu;
701 		_gdb_cpu_suspend(vcpu, true);
702 	}
703 	pthread_mutex_unlock(&gdb_lock);
704 }
705 
706 static void
707 gdb_suspend_vcpus(void)
708 {
709 
710 	assert(pthread_mutex_isowned_np(&gdb_lock));
711 	debug("suspending all CPUs\n");
712 	vcpus_suspended = vcpus_active;
713 	vm_suspend_cpu(ctx, -1);
714 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
715 		gdb_finish_suspend_vcpus();
716 }
717 
718 static bool
719 gdb_step_vcpu(int vcpu)
720 {
721 	int error, val;
722 
723 	debug("$vCPU %d step\n", vcpu);
724 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
725 	if (error < 0)
726 		return (false);
727 	error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
728 	vm_resume_cpu(ctx, vcpu);
729 	stepping_vcpu = vcpu;
730 	pthread_cond_broadcast(&idle_vcpus);
731 	return (true);
732 }
733 
734 static void
735 gdb_resume_vcpus(void)
736 {
737 
738 	assert(pthread_mutex_isowned_np(&gdb_lock));
739 	vm_resume_cpu(ctx, -1);
740 	debug("resuming all CPUs\n");
741 	CPU_ZERO(&vcpus_suspended);
742 	pthread_cond_broadcast(&idle_vcpus);
743 }
744 
745 static void
746 gdb_read_regs(void)
747 {
748 	uint64_t regvals[nitems(gdb_regset)];
749 	int i;
750 
751 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
752 	    gdb_regset, regvals) == -1) {
753 		send_error(errno);
754 		return;
755 	}
756 	start_packet();
757 	for (i = 0; i < nitems(regvals); i++)
758 		append_unsigned_native(regvals[i], gdb_regsize[i]);
759 	finish_packet();
760 }
761 
762 static void
763 gdb_read_mem(const uint8_t *data, size_t len)
764 {
765 	uint64_t gpa, gva, val;
766 	uint8_t *cp;
767 	size_t resid, todo, bytes;
768 	bool started;
769 	int error;
770 
771 	cp = memchr(data, ',', len);
772 	if (cp == NULL) {
773 		send_error(EINVAL);
774 		return;
775 	}
776 	gva = parse_integer(data + 1, cp - (data + 1));
777 	resid = parse_integer(cp + 1, len - (cp + 1 - data));
778 	started = false;
779 
780 	while (resid > 0) {
781 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
782 		if (error == -1) {
783 			if (started)
784 				finish_packet();
785 			else
786 				send_error(errno);
787 			return;
788 		}
789 		if (error == 0) {
790 			if (started)
791 				finish_packet();
792 			else
793 				send_error(EFAULT);
794 			return;
795 		}
796 
797 		/* Read bytes from current page. */
798 		todo = getpagesize() - gpa % getpagesize();
799 		if (todo > resid)
800 			todo = resid;
801 
802 		cp = paddr_guest2host(ctx, gpa, todo);
803 		if (cp != NULL) {
804 			/*
805 			 * If this page is guest RAM, read it a byte
806 			 * at a time.
807 			 */
808 			if (!started) {
809 				start_packet();
810 				started = true;
811 			}
812 			while (todo > 0) {
813 				append_byte(*cp);
814 				cp++;
815 				gpa++;
816 				gva++;
817 				resid--;
818 				todo--;
819 			}
820 		} else {
821 			/*
822 			 * If this page isn't guest RAM, try to handle
823 			 * it via MMIO.  For MMIO requests, use
824 			 * aligned reads of words when possible.
825 			 */
826 			while (todo > 0) {
827 				if (gpa & 1 || todo == 1)
828 					bytes = 1;
829 				else if (gpa & 2 || todo == 2)
830 					bytes = 2;
831 				else
832 					bytes = 4;
833 				error = read_mem(ctx, cur_vcpu, gpa, &val,
834 				    bytes);
835 				if (error == 0) {
836 					if (!started) {
837 						start_packet();
838 						started = true;
839 					}
840 					gpa += bytes;
841 					gva += bytes;
842 					resid -= bytes;
843 					todo -= bytes;
844 					while (bytes > 0) {
845 						append_byte(val);
846 						val >>= 8;
847 						bytes--;
848 					}
849 				} else {
850 					if (started)
851 						finish_packet();
852 					else
853 						send_error(EFAULT);
854 					return;
855 				}
856 			}
857 		}
858 		assert(resid == 0 || gpa % getpagesize() == 0);
859 	}
860 	if (!started)
861 		start_packet();
862 	finish_packet();
863 }
864 
865 static bool
866 command_equals(const uint8_t *data, size_t len, const char *cmd)
867 {
868 
869 	if (strlen(cmd) > len)
870 		return (false);
871 	return (memcmp(data, cmd, strlen(cmd)) == 0);
872 }
873 
874 static void
875 gdb_query(const uint8_t *data, size_t len)
876 {
877 
878 	/*
879 	 * TODO:
880 	 * - qSearch
881 	 * - qSupported
882 	 */
883 	if (command_equals(data, len, "qAttached")) {
884 		start_packet();
885 		append_char('1');
886 		finish_packet();
887 	} else if (command_equals(data, len, "qC")) {
888 		start_packet();
889 		append_string("QC");
890 		append_integer(cur_vcpu + 1);
891 		finish_packet();
892 	} else if (command_equals(data, len, "qfThreadInfo")) {
893 		cpuset_t mask;
894 		bool first;
895 		int vcpu;
896 
897 		if (CPU_EMPTY(&vcpus_active)) {
898 			send_error(EINVAL);
899 			return;
900 		}
901 		mask = vcpus_active;
902 		start_packet();
903 		append_char('m');
904 		first = true;
905 		while (!CPU_EMPTY(&mask)) {
906 			vcpu = CPU_FFS(&mask) - 1;
907 			CPU_CLR(vcpu, &mask);
908 			if (first)
909 				first = false;
910 			else
911 				append_char(',');
912 			append_integer(vcpu + 1);
913 		}
914 		finish_packet();
915 	} else if (command_equals(data, len, "qsThreadInfo")) {
916 		start_packet();
917 		append_char('l');
918 		finish_packet();
919 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
920 		char buf[16];
921 		int tid;
922 
923 		data += strlen("qThreadExtraInfo");
924 		len -= strlen("qThreadExtraInfo");
925 		if (*data != ',') {
926 			send_error(EINVAL);
927 			return;
928 		}
929 		tid = parse_threadid(data + 1, len - 1);
930 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
931 			send_error(EINVAL);
932 			return;
933 		}
934 
935 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
936 		start_packet();
937 		append_asciihex(buf);
938 		finish_packet();
939 	} else
940 		send_empty_response();
941 }
942 
943 static void
944 handle_command(const uint8_t *data, size_t len)
945 {
946 
947 	/* Reject packets with a sequence-id. */
948 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
949 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
950 		send_empty_response();
951 		return;
952 	}
953 
954 	switch (*data) {
955 	case 'c':
956 		if (len != 1) {
957 			send_error(EINVAL);
958 			break;
959 		}
960 
961 		/* Don't send a reply until a stop occurs. */
962 		gdb_resume_vcpus();
963 		break;
964 	case 'D':
965 		send_ok();
966 
967 		/* TODO: Resume any stopped CPUs. */
968 		break;
969 	case 'g': {
970 		gdb_read_regs();
971 		break;
972 	}
973 	case 'H': {
974 		int tid;
975 
976 		if (data[1] != 'g' && data[1] != 'c') {
977 			send_error(EINVAL);
978 			break;
979 		}
980 		tid = parse_threadid(data + 2, len - 2);
981 		if (tid == -2) {
982 			send_error(EINVAL);
983 			break;
984 		}
985 
986 		if (CPU_EMPTY(&vcpus_active)) {
987 			send_error(EINVAL);
988 			break;
989 		}
990 		if (tid == -1 || tid == 0)
991 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
992 		else if (CPU_ISSET(tid - 1, &vcpus_active))
993 			cur_vcpu = tid - 1;
994 		else {
995 			send_error(EINVAL);
996 			break;
997 		}
998 		send_ok();
999 		break;
1000 	}
1001 	case 'm':
1002 		gdb_read_mem(data, len);
1003 		break;
1004 	case 'T': {
1005 		int tid;
1006 
1007 		tid = parse_threadid(data + 1, len - 1);
1008 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1009 			send_error(EINVAL);
1010 			return;
1011 		}
1012 		send_ok();
1013 		break;
1014 	}
1015 	case 'q':
1016 		gdb_query(data, len);
1017 		break;
1018 	case 's':
1019 		if (len != 1) {
1020 			send_error(EINVAL);
1021 			break;
1022 		}
1023 
1024 		/* Don't send a reply until a stop occurs. */
1025 		if (!gdb_step_vcpu(cur_vcpu)) {
1026 			send_error(EOPNOTSUPP);
1027 			break;
1028 		}
1029 		break;
1030 	case '?':
1031 		/* XXX: Only if stopped? */
1032 		/* For now, just report that we are always stopped. */
1033 		start_packet();
1034 		append_char('S');
1035 		append_byte(GDB_SIGNAL_TRAP);
1036 		finish_packet();
1037 		break;
1038 	case 'G': /* TODO */
1039 	case 'M': /* TODO */
1040 	case 'v':
1041 		/* Handle 'vCont' */
1042 		/* 'vCtrlC' */
1043 	case 'p': /* TODO */
1044 	case 'P': /* TODO */
1045 	case 'Q': /* TODO */
1046 	case 't': /* TODO */
1047 	case 'X': /* TODO */
1048 	case 'z': /* TODO */
1049 	case 'Z': /* TODO */
1050 	default:
1051 		send_empty_response();
1052 	}
1053 }
1054 
1055 /* Check for a valid packet in the command buffer. */
1056 static void
1057 check_command(int fd)
1058 {
1059 	uint8_t *head, *hash, *p, sum;
1060 	size_t avail, plen;
1061 
1062 	for (;;) {
1063 		avail = cur_comm.len;
1064 		if (avail == 0)
1065 			return;
1066 		head = io_buffer_head(&cur_comm);
1067 		switch (*head) {
1068 		case 0x03:
1069 			debug("<- Ctrl-C\n");
1070 			io_buffer_consume(&cur_comm, 1);
1071 
1072 			gdb_suspend_vcpus();
1073 			break;
1074 		case '+':
1075 			/* ACK of previous response. */
1076 			debug("<- +\n");
1077 			if (response_pending())
1078 				io_buffer_reset(&cur_resp);
1079 			io_buffer_consume(&cur_comm, 1);
1080 			if (stop_pending) {
1081 				stop_pending = false;
1082 				report_stop();
1083 				send_pending_data(fd);
1084 			}
1085 			break;
1086 		case '-':
1087 			/* NACK of previous response. */
1088 			debug("<- -\n");
1089 			if (response_pending()) {
1090 				cur_resp.len += cur_resp.start;
1091 				cur_resp.start = 0;
1092 				if (cur_resp.data[0] == '+')
1093 					io_buffer_advance(&cur_resp, 1);
1094 				debug("-> %.*s\n", (int)cur_resp.len,
1095 				    io_buffer_head(&cur_resp));
1096 			}
1097 			io_buffer_consume(&cur_comm, 1);
1098 			send_pending_data(fd);
1099 			break;
1100 		case '$':
1101 			/* Packet. */
1102 
1103 			if (response_pending()) {
1104 				warnx("New GDB command while response in "
1105 				    "progress");
1106 				io_buffer_reset(&cur_resp);
1107 			}
1108 
1109 			/* Is packet complete? */
1110 			hash = memchr(head, '#', avail);
1111 			if (hash == NULL)
1112 				return;
1113 			plen = (hash - head + 1) + 2;
1114 			if (avail < plen)
1115 				return;
1116 			debug("<- %.*s\n", (int)plen, head);
1117 
1118 			/* Verify checksum. */
1119 			for (sum = 0, p = head + 1; p < hash; p++)
1120 				sum += *p;
1121 			if (sum != parse_byte(hash + 1)) {
1122 				io_buffer_consume(&cur_comm, plen);
1123 				debug("-> -\n");
1124 				send_char('-');
1125 				send_pending_data(fd);
1126 				break;
1127 			}
1128 			send_char('+');
1129 
1130 			handle_command(head + 1, hash - (head + 1));
1131 			io_buffer_consume(&cur_comm, plen);
1132 			if (!response_pending())
1133 				debug("-> +\n");
1134 			send_pending_data(fd);
1135 			break;
1136 		default:
1137 			/* XXX: Possibly drop connection instead. */
1138 			debug("-> %02x\n", *head);
1139 			io_buffer_consume(&cur_comm, 1);
1140 			break;
1141 		}
1142 	}
1143 }
1144 
1145 static void
1146 gdb_readable(int fd, enum ev_type event, void *arg)
1147 {
1148 	ssize_t nread;
1149 	int pending;
1150 
1151 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1152 		warn("FIONREAD on GDB socket");
1153 		return;
1154 	}
1155 
1156 	/*
1157 	 * 'pending' might be zero due to EOF.  We need to call read
1158 	 * with a non-zero length to detect EOF.
1159 	 */
1160 	if (pending == 0)
1161 		pending = 1;
1162 
1163 	/* Ensure there is room in the command buffer. */
1164 	io_buffer_grow(&cur_comm, pending);
1165 	assert(io_buffer_avail(&cur_comm) >= pending);
1166 
1167 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1168 	if (nread == 0) {
1169 		close_connection();
1170 	} else if (nread == -1) {
1171 		if (errno == EAGAIN)
1172 			return;
1173 
1174 		warn("Read from GDB socket");
1175 		close_connection();
1176 	} else {
1177 		cur_comm.len += nread;
1178 		pthread_mutex_lock(&gdb_lock);
1179 		check_command(fd);
1180 		pthread_mutex_unlock(&gdb_lock);
1181 	}
1182 }
1183 
1184 static void
1185 gdb_writable(int fd, enum ev_type event, void *arg)
1186 {
1187 
1188 	send_pending_data(fd);
1189 }
1190 
1191 static void
1192 new_connection(int fd, enum ev_type event, void *arg)
1193 {
1194 	int optval, s;
1195 
1196 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1197 	if (s == -1) {
1198 		if (arg != NULL)
1199 			err(1, "Failed accepting initial GDB connection");
1200 
1201 		/* Silently ignore errors post-startup. */
1202 		return;
1203 	}
1204 
1205 	optval = 1;
1206 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1207 	    -1) {
1208 		warn("Failed to disable SIGPIPE for GDB connection");
1209 		close(s);
1210 		return;
1211 	}
1212 
1213 	pthread_mutex_lock(&gdb_lock);
1214 	if (cur_fd != -1) {
1215 		close(s);
1216 		warnx("Ignoring additional GDB connection.");
1217 	}
1218 
1219 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1220 	if (read_event == NULL) {
1221 		if (arg != NULL)
1222 			err(1, "Failed to setup initial GDB connection");
1223 		pthread_mutex_unlock(&gdb_lock);
1224 		return;
1225 	}
1226 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1227 	if (write_event == NULL) {
1228 		if (arg != NULL)
1229 			err(1, "Failed to setup initial GDB connection");
1230 		mevent_delete_close(read_event);
1231 		read_event = NULL;
1232 	}
1233 
1234 	cur_fd = s;
1235 	cur_vcpu = 0;
1236 	stepping_vcpu = -1;
1237 	stopped_vcpu = -1;
1238 	stop_pending = false;
1239 
1240 	/* Break on attach. */
1241 	first_stop = true;
1242 	gdb_suspend_vcpus();
1243 	pthread_mutex_unlock(&gdb_lock);
1244 }
1245 
1246 #ifndef WITHOUT_CAPSICUM
1247 void
1248 limit_gdb_socket(int s)
1249 {
1250 	cap_rights_t rights;
1251 	unsigned long ioctls[] = { FIONREAD };
1252 
1253 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1254 	    CAP_SETSOCKOPT, CAP_IOCTL);
1255 	if (cap_rights_limit(s, &rights) == -1 && errno != ENOSYS)
1256 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1257 	if (cap_ioctls_limit(s, ioctls, nitems(ioctls)) == -1 && errno != ENOSYS)
1258 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1259 }
1260 #endif
1261 
1262 void
1263 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1264 {
1265 	struct sockaddr_in sin;
1266 	int error, flags, s;
1267 
1268 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1269 
1270 	error = pthread_mutex_init(&gdb_lock, NULL);
1271 	if (error != 0)
1272 		errc(1, error, "gdb mutex init");
1273 	error = pthread_cond_init(&idle_vcpus, NULL);
1274 	if (error != 0)
1275 		errc(1, error, "gdb cv init");
1276 
1277 	ctx = _ctx;
1278 	s = socket(PF_INET, SOCK_STREAM, 0);
1279 	if (s < 0)
1280 		err(1, "gdb socket create");
1281 
1282 	sin.sin_len = sizeof(sin);
1283 	sin.sin_family = AF_INET;
1284 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1285 	sin.sin_port = htons(sport);
1286 
1287 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1288 		err(1, "gdb socket bind");
1289 
1290 	if (listen(s, 1) < 0)
1291 		err(1, "gdb socket listen");
1292 
1293 	if (wait) {
1294 		/*
1295 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1296 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1297 		 * it starts execution.  The vcpu will remain suspended
1298 		 * until a debugger connects.
1299 		 */
1300 		stepping_vcpu = -1;
1301 		stopped_vcpu = -1;
1302 		CPU_SET(0, &vcpus_suspended);
1303 	}
1304 
1305 	flags = fcntl(s, F_GETFL);
1306 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1307 		err(1, "Failed to mark gdb socket non-blocking");
1308 
1309 #ifndef WITHOUT_CAPSICUM
1310 	limit_gdb_socket(s);
1311 #endif
1312 	mevent_add(s, EVF_READ, new_connection, NULL);
1313 }
1314