xref: /freebsd/usr.sbin/bhyve/gdb.c (revision 7cc42f6d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <pthread.h>
52 #include <pthread_np.h>
53 #include <stdbool.h>
54 #include <stdio.h>
55 #include <stdlib.h>
56 #include <string.h>
57 #include <sysexits.h>
58 #include <unistd.h>
59 #include <vmmapi.h>
60 
61 #include "bhyverun.h"
62 #include "gdb.h"
63 #include "mem.h"
64 #include "mevent.h"
65 
66 /*
67  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
68  * use SIGTRAP.
69  */
70 #define	GDB_SIGNAL_TRAP		5
71 
72 static void gdb_resume_vcpus(void);
73 static void check_command(int fd);
74 
75 static struct mevent *read_event, *write_event;
76 
77 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
78 static pthread_mutex_t gdb_lock;
79 static pthread_cond_t idle_vcpus;
80 static bool first_stop, report_next_stop, swbreak_enabled;
81 
82 /*
83  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
84  * read buffer, 'start' is unused and 'len' contains the number of
85  * valid bytes in the buffer.  For a write buffer, 'start' is set to
86  * the index of the next byte in 'data' to send, and 'len' contains
87  * the remaining number of valid bytes to send.
88  */
89 struct io_buffer {
90 	uint8_t *data;
91 	size_t capacity;
92 	size_t start;
93 	size_t len;
94 };
95 
96 struct breakpoint {
97 	uint64_t gpa;
98 	uint8_t shadow_inst;
99 	TAILQ_ENTRY(breakpoint) link;
100 };
101 
102 /*
103  * When a vCPU stops to due to an event that should be reported to the
104  * debugger, information about the event is stored in this structure.
105  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
106  * and stops other vCPUs so the event can be reported.  The
107  * report_stop() function reports the event for the 'stopped_vcpu'
108  * vCPU.  When the debugger resumes execution via continue or step,
109  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
110  * event handlers until the associated event is reported or disabled.
111  *
112  * An idle vCPU will have all of the boolean fields set to false.
113  *
114  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
115  * released to execute the stepped instruction.  When the vCPU reports
116  * the stepping trap, 'stepped' is set.
117  *
118  * When a vCPU hits a breakpoint set by the debug server,
119  * 'hit_swbreak' is set to true.
120  */
121 struct vcpu_state {
122 	bool stepping;
123 	bool stepped;
124 	bool hit_swbreak;
125 };
126 
127 static struct io_buffer cur_comm, cur_resp;
128 static uint8_t cur_csum;
129 static struct vmctx *ctx;
130 static int cur_fd = -1;
131 static TAILQ_HEAD(, breakpoint) breakpoints;
132 static struct vcpu_state *vcpu_state;
133 static int cur_vcpu, stopped_vcpu;
134 
135 const int gdb_regset[] = {
136 	VM_REG_GUEST_RAX,
137 	VM_REG_GUEST_RBX,
138 	VM_REG_GUEST_RCX,
139 	VM_REG_GUEST_RDX,
140 	VM_REG_GUEST_RSI,
141 	VM_REG_GUEST_RDI,
142 	VM_REG_GUEST_RBP,
143 	VM_REG_GUEST_RSP,
144 	VM_REG_GUEST_R8,
145 	VM_REG_GUEST_R9,
146 	VM_REG_GUEST_R10,
147 	VM_REG_GUEST_R11,
148 	VM_REG_GUEST_R12,
149 	VM_REG_GUEST_R13,
150 	VM_REG_GUEST_R14,
151 	VM_REG_GUEST_R15,
152 	VM_REG_GUEST_RIP,
153 	VM_REG_GUEST_RFLAGS,
154 	VM_REG_GUEST_CS,
155 	VM_REG_GUEST_SS,
156 	VM_REG_GUEST_DS,
157 	VM_REG_GUEST_ES,
158 	VM_REG_GUEST_FS,
159 	VM_REG_GUEST_GS
160 };
161 
162 const int gdb_regsize[] = {
163 	8,
164 	8,
165 	8,
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	4,
181 	4,
182 	4,
183 	4,
184 	4,
185 	4,
186 	4
187 };
188 
189 #ifdef GDB_LOG
190 #include <stdarg.h>
191 #include <stdio.h>
192 
193 static void __printflike(1, 2)
194 debug(const char *fmt, ...)
195 {
196 	static FILE *logfile;
197 	va_list ap;
198 
199 	if (logfile == NULL) {
200 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
201 		if (logfile == NULL)
202 			return;
203 #ifndef WITHOUT_CAPSICUM
204 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
205 			fclose(logfile);
206 			logfile = NULL;
207 			return;
208 		}
209 #endif
210 		setlinebuf(logfile);
211 	}
212 	va_start(ap, fmt);
213 	vfprintf(logfile, fmt, ap);
214 	va_end(ap);
215 }
216 #else
217 #define debug(...)
218 #endif
219 
220 static void	remove_all_sw_breakpoints(void);
221 
222 static int
223 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
224 {
225 	uint64_t regs[4];
226 	const int regset[4] = {
227 		VM_REG_GUEST_CR0,
228 		VM_REG_GUEST_CR3,
229 		VM_REG_GUEST_CR4,
230 		VM_REG_GUEST_EFER
231 	};
232 
233 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
234 		return (-1);
235 
236 	/*
237 	 * For the debugger, always pretend to be the kernel (CPL 0),
238 	 * and if long-mode is enabled, always parse addresses as if
239 	 * in 64-bit mode.
240 	 */
241 	paging->cr3 = regs[1];
242 	paging->cpl = 0;
243 	if (regs[3] & EFER_LMA)
244 		paging->cpu_mode = CPU_MODE_64BIT;
245 	else if (regs[0] & CR0_PE)
246 		paging->cpu_mode = CPU_MODE_PROTECTED;
247 	else
248 		paging->cpu_mode = CPU_MODE_REAL;
249 	if (!(regs[0] & CR0_PG))
250 		paging->paging_mode = PAGING_MODE_FLAT;
251 	else if (!(regs[2] & CR4_PAE))
252 		paging->paging_mode = PAGING_MODE_32;
253 	else if (regs[3] & EFER_LME)
254 		paging->paging_mode = (regs[2] & CR4_LA57) ?
255 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
256 	else
257 		paging->paging_mode = PAGING_MODE_PAE;
258 	return (0);
259 }
260 
261 /*
262  * Map a guest virtual address to a physical address (for a given vcpu).
263  * If a guest virtual address is valid, return 1.  If the address is
264  * not valid, return 0.  If an error occurs obtaining the mapping,
265  * return -1.
266  */
267 static int
268 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
269 {
270 	struct vm_guest_paging paging;
271 	int fault;
272 
273 	if (guest_paging_info(vcpu, &paging) == -1)
274 		return (-1);
275 
276 	/*
277 	 * Always use PROT_READ.  We really care if the VA is
278 	 * accessible, not if the current vCPU can write.
279 	 */
280 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
281 	    &fault) == -1)
282 		return (-1);
283 	if (fault)
284 		return (0);
285 	return (1);
286 }
287 
288 static void
289 io_buffer_reset(struct io_buffer *io)
290 {
291 
292 	io->start = 0;
293 	io->len = 0;
294 }
295 
296 /* Available room for adding data. */
297 static size_t
298 io_buffer_avail(struct io_buffer *io)
299 {
300 
301 	return (io->capacity - (io->start + io->len));
302 }
303 
304 static uint8_t *
305 io_buffer_head(struct io_buffer *io)
306 {
307 
308 	return (io->data + io->start);
309 }
310 
311 static uint8_t *
312 io_buffer_tail(struct io_buffer *io)
313 {
314 
315 	return (io->data + io->start + io->len);
316 }
317 
318 static void
319 io_buffer_advance(struct io_buffer *io, size_t amount)
320 {
321 
322 	assert(amount <= io->len);
323 	io->start += amount;
324 	io->len -= amount;
325 }
326 
327 static void
328 io_buffer_consume(struct io_buffer *io, size_t amount)
329 {
330 
331 	io_buffer_advance(io, amount);
332 	if (io->len == 0) {
333 		io->start = 0;
334 		return;
335 	}
336 
337 	/*
338 	 * XXX: Consider making this move optional and compacting on a
339 	 * future read() before realloc().
340 	 */
341 	memmove(io->data, io_buffer_head(io), io->len);
342 	io->start = 0;
343 }
344 
345 static void
346 io_buffer_grow(struct io_buffer *io, size_t newsize)
347 {
348 	uint8_t *new_data;
349 	size_t avail, new_cap;
350 
351 	avail = io_buffer_avail(io);
352 	if (newsize <= avail)
353 		return;
354 
355 	new_cap = io->capacity + (newsize - avail);
356 	new_data = realloc(io->data, new_cap);
357 	if (new_data == NULL)
358 		err(1, "Failed to grow GDB I/O buffer");
359 	io->data = new_data;
360 	io->capacity = new_cap;
361 }
362 
363 static bool
364 response_pending(void)
365 {
366 
367 	if (cur_resp.start == 0 && cur_resp.len == 0)
368 		return (false);
369 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
370 		return (false);
371 	return (true);
372 }
373 
374 static void
375 close_connection(void)
376 {
377 
378 	/*
379 	 * XXX: This triggers a warning because mevent does the close
380 	 * before the EV_DELETE.
381 	 */
382 	pthread_mutex_lock(&gdb_lock);
383 	mevent_delete(write_event);
384 	mevent_delete_close(read_event);
385 	write_event = NULL;
386 	read_event = NULL;
387 	io_buffer_reset(&cur_comm);
388 	io_buffer_reset(&cur_resp);
389 	cur_fd = -1;
390 
391 	remove_all_sw_breakpoints();
392 
393 	/* Clear any pending events. */
394 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
395 
396 	/* Resume any stopped vCPUs. */
397 	gdb_resume_vcpus();
398 	pthread_mutex_unlock(&gdb_lock);
399 }
400 
401 static uint8_t
402 hex_digit(uint8_t nibble)
403 {
404 
405 	if (nibble <= 9)
406 		return (nibble + '0');
407 	else
408 		return (nibble + 'a' - 10);
409 }
410 
411 static uint8_t
412 parse_digit(uint8_t v)
413 {
414 
415 	if (v >= '0' && v <= '9')
416 		return (v - '0');
417 	if (v >= 'a' && v <= 'f')
418 		return (v - 'a' + 10);
419 	if (v >= 'A' && v <= 'F')
420 		return (v - 'A' + 10);
421 	return (0xF);
422 }
423 
424 /* Parses big-endian hexadecimal. */
425 static uintmax_t
426 parse_integer(const uint8_t *p, size_t len)
427 {
428 	uintmax_t v;
429 
430 	v = 0;
431 	while (len > 0) {
432 		v <<= 4;
433 		v |= parse_digit(*p);
434 		p++;
435 		len--;
436 	}
437 	return (v);
438 }
439 
440 static uint8_t
441 parse_byte(const uint8_t *p)
442 {
443 
444 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
445 }
446 
447 static void
448 send_pending_data(int fd)
449 {
450 	ssize_t nwritten;
451 
452 	if (cur_resp.len == 0) {
453 		mevent_disable(write_event);
454 		return;
455 	}
456 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
457 	if (nwritten == -1) {
458 		warn("Write to GDB socket failed");
459 		close_connection();
460 	} else {
461 		io_buffer_advance(&cur_resp, nwritten);
462 		if (cur_resp.len == 0)
463 			mevent_disable(write_event);
464 		else
465 			mevent_enable(write_event);
466 	}
467 }
468 
469 /* Append a single character to the output buffer. */
470 static void
471 send_char(uint8_t data)
472 {
473 	io_buffer_grow(&cur_resp, 1);
474 	*io_buffer_tail(&cur_resp) = data;
475 	cur_resp.len++;
476 }
477 
478 /* Append an array of bytes to the output buffer. */
479 static void
480 send_data(const uint8_t *data, size_t len)
481 {
482 
483 	io_buffer_grow(&cur_resp, len);
484 	memcpy(io_buffer_tail(&cur_resp), data, len);
485 	cur_resp.len += len;
486 }
487 
488 static void
489 format_byte(uint8_t v, uint8_t *buf)
490 {
491 
492 	buf[0] = hex_digit(v >> 4);
493 	buf[1] = hex_digit(v & 0xf);
494 }
495 
496 /*
497  * Append a single byte (formatted as two hex characters) to the
498  * output buffer.
499  */
500 static void
501 send_byte(uint8_t v)
502 {
503 	uint8_t buf[2];
504 
505 	format_byte(v, buf);
506 	send_data(buf, sizeof(buf));
507 }
508 
509 static void
510 start_packet(void)
511 {
512 
513 	send_char('$');
514 	cur_csum = 0;
515 }
516 
517 static void
518 finish_packet(void)
519 {
520 
521 	send_char('#');
522 	send_byte(cur_csum);
523 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
524 }
525 
526 /*
527  * Append a single character (for the packet payload) and update the
528  * checksum.
529  */
530 static void
531 append_char(uint8_t v)
532 {
533 
534 	send_char(v);
535 	cur_csum += v;
536 }
537 
538 /*
539  * Append an array of bytes (for the packet payload) and update the
540  * checksum.
541  */
542 static void
543 append_packet_data(const uint8_t *data, size_t len)
544 {
545 
546 	send_data(data, len);
547 	while (len > 0) {
548 		cur_csum += *data;
549 		data++;
550 		len--;
551 	}
552 }
553 
554 static void
555 append_string(const char *str)
556 {
557 
558 	append_packet_data(str, strlen(str));
559 }
560 
561 static void
562 append_byte(uint8_t v)
563 {
564 	uint8_t buf[2];
565 
566 	format_byte(v, buf);
567 	append_packet_data(buf, sizeof(buf));
568 }
569 
570 static void
571 append_unsigned_native(uintmax_t value, size_t len)
572 {
573 	size_t i;
574 
575 	for (i = 0; i < len; i++) {
576 		append_byte(value);
577 		value >>= 8;
578 	}
579 }
580 
581 static void
582 append_unsigned_be(uintmax_t value, size_t len)
583 {
584 	char buf[len * 2];
585 	size_t i;
586 
587 	for (i = 0; i < len; i++) {
588 		format_byte(value, buf + (len - i - 1) * 2);
589 		value >>= 8;
590 	}
591 	append_packet_data(buf, sizeof(buf));
592 }
593 
594 static void
595 append_integer(unsigned int value)
596 {
597 
598 	if (value == 0)
599 		append_char('0');
600 	else
601 		append_unsigned_be(value, (fls(value) + 7) / 8);
602 }
603 
604 static void
605 append_asciihex(const char *str)
606 {
607 
608 	while (*str != '\0') {
609 		append_byte(*str);
610 		str++;
611 	}
612 }
613 
614 static void
615 send_empty_response(void)
616 {
617 
618 	start_packet();
619 	finish_packet();
620 }
621 
622 static void
623 send_error(int error)
624 {
625 
626 	start_packet();
627 	append_char('E');
628 	append_byte(error);
629 	finish_packet();
630 }
631 
632 static void
633 send_ok(void)
634 {
635 
636 	start_packet();
637 	append_string("OK");
638 	finish_packet();
639 }
640 
641 static int
642 parse_threadid(const uint8_t *data, size_t len)
643 {
644 
645 	if (len == 1 && *data == '0')
646 		return (0);
647 	if (len == 2 && memcmp(data, "-1", 2) == 0)
648 		return (-1);
649 	if (len == 0)
650 		return (-2);
651 	return (parse_integer(data, len));
652 }
653 
654 /*
655  * Report the current stop event to the debugger.  If the stop is due
656  * to an event triggered on a specific vCPU such as a breakpoint or
657  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
658  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
659  * the reporting vCPU for vCPU events.
660  */
661 static void
662 report_stop(bool set_cur_vcpu)
663 {
664 	struct vcpu_state *vs;
665 
666 	start_packet();
667 	if (stopped_vcpu == -1) {
668 		append_char('S');
669 		append_byte(GDB_SIGNAL_TRAP);
670 	} else {
671 		vs = &vcpu_state[stopped_vcpu];
672 		if (set_cur_vcpu)
673 			cur_vcpu = stopped_vcpu;
674 		append_char('T');
675 		append_byte(GDB_SIGNAL_TRAP);
676 		append_string("thread:");
677 		append_integer(stopped_vcpu + 1);
678 		append_char(';');
679 		if (vs->hit_swbreak) {
680 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
681 			if (swbreak_enabled)
682 				append_string("swbreak:;");
683 		} else if (vs->stepped)
684 			debug("$vCPU %d reporting step\n", stopped_vcpu);
685 		else
686 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
687 	}
688 	finish_packet();
689 	report_next_stop = false;
690 }
691 
692 /*
693  * If this stop is due to a vCPU event, clear that event to mark it as
694  * acknowledged.
695  */
696 static void
697 discard_stop(void)
698 {
699 	struct vcpu_state *vs;
700 
701 	if (stopped_vcpu != -1) {
702 		vs = &vcpu_state[stopped_vcpu];
703 		vs->hit_swbreak = false;
704 		vs->stepped = false;
705 		stopped_vcpu = -1;
706 	}
707 	report_next_stop = true;
708 }
709 
710 static void
711 gdb_finish_suspend_vcpus(void)
712 {
713 
714 	if (first_stop) {
715 		first_stop = false;
716 		stopped_vcpu = -1;
717 	} else if (report_next_stop) {
718 		assert(!response_pending());
719 		report_stop(true);
720 		send_pending_data(cur_fd);
721 	}
722 }
723 
724 /*
725  * vCPU threads invoke this function whenever the vCPU enters the
726  * debug server to pause or report an event.  vCPU threads wait here
727  * as long as the debug server keeps them suspended.
728  */
729 static void
730 _gdb_cpu_suspend(int vcpu, bool report_stop)
731 {
732 
733 	debug("$vCPU %d suspending\n", vcpu);
734 	CPU_SET(vcpu, &vcpus_waiting);
735 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
736 		gdb_finish_suspend_vcpus();
737 	while (CPU_ISSET(vcpu, &vcpus_suspended))
738 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
739 	CPU_CLR(vcpu, &vcpus_waiting);
740 	debug("$vCPU %d resuming\n", vcpu);
741 }
742 
743 /*
744  * Invoked at the start of a vCPU thread's execution to inform the
745  * debug server about the new thread.
746  */
747 void
748 gdb_cpu_add(int vcpu)
749 {
750 
751 	debug("$vCPU %d starting\n", vcpu);
752 	pthread_mutex_lock(&gdb_lock);
753 	assert(vcpu < guest_ncpus);
754 	CPU_SET(vcpu, &vcpus_active);
755 	if (!TAILQ_EMPTY(&breakpoints)) {
756 		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
757 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
758 	}
759 
760 	/*
761 	 * If a vcpu is added while vcpus are stopped, suspend the new
762 	 * vcpu so that it will pop back out with a debug exit before
763 	 * executing the first instruction.
764 	 */
765 	if (!CPU_EMPTY(&vcpus_suspended)) {
766 		CPU_SET(vcpu, &vcpus_suspended);
767 		_gdb_cpu_suspend(vcpu, false);
768 	}
769 	pthread_mutex_unlock(&gdb_lock);
770 }
771 
772 /*
773  * Invoked by vCPU before resuming execution.  This enables stepping
774  * if the vCPU is marked as stepping.
775  */
776 static void
777 gdb_cpu_resume(int vcpu)
778 {
779 	struct vcpu_state *vs;
780 	int error;
781 
782 	vs = &vcpu_state[vcpu];
783 
784 	/*
785 	 * Any pending event should already be reported before
786 	 * resuming.
787 	 */
788 	assert(vs->hit_swbreak == false);
789 	assert(vs->stepped == false);
790 	if (vs->stepping) {
791 		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
792 		assert(error == 0);
793 	}
794 }
795 
796 /*
797  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
798  * has been suspended due to an event on different vCPU or in response
799  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
800  */
801 void
802 gdb_cpu_suspend(int vcpu)
803 {
804 
805 	pthread_mutex_lock(&gdb_lock);
806 	_gdb_cpu_suspend(vcpu, true);
807 	gdb_cpu_resume(vcpu);
808 	pthread_mutex_unlock(&gdb_lock);
809 }
810 
811 static void
812 gdb_suspend_vcpus(void)
813 {
814 
815 	assert(pthread_mutex_isowned_np(&gdb_lock));
816 	debug("suspending all CPUs\n");
817 	vcpus_suspended = vcpus_active;
818 	vm_suspend_cpu(ctx, -1);
819 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
820 		gdb_finish_suspend_vcpus();
821 }
822 
823 /*
824  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
825  * the VT-x-specific MTRAP exit.
826  */
827 void
828 gdb_cpu_mtrap(int vcpu)
829 {
830 	struct vcpu_state *vs;
831 
832 	debug("$vCPU %d MTRAP\n", vcpu);
833 	pthread_mutex_lock(&gdb_lock);
834 	vs = &vcpu_state[vcpu];
835 	if (vs->stepping) {
836 		vs->stepping = false;
837 		vs->stepped = true;
838 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
839 		while (vs->stepped) {
840 			if (stopped_vcpu == -1) {
841 				debug("$vCPU %d reporting step\n", vcpu);
842 				stopped_vcpu = vcpu;
843 				gdb_suspend_vcpus();
844 			}
845 			_gdb_cpu_suspend(vcpu, true);
846 		}
847 		gdb_cpu_resume(vcpu);
848 	}
849 	pthread_mutex_unlock(&gdb_lock);
850 }
851 
852 static struct breakpoint *
853 find_breakpoint(uint64_t gpa)
854 {
855 	struct breakpoint *bp;
856 
857 	TAILQ_FOREACH(bp, &breakpoints, link) {
858 		if (bp->gpa == gpa)
859 			return (bp);
860 	}
861 	return (NULL);
862 }
863 
864 void
865 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
866 {
867 	struct breakpoint *bp;
868 	struct vcpu_state *vs;
869 	uint64_t gpa;
870 	int error;
871 
872 	pthread_mutex_lock(&gdb_lock);
873 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
874 	assert(error == 1);
875 	bp = find_breakpoint(gpa);
876 	if (bp != NULL) {
877 		vs = &vcpu_state[vcpu];
878 		assert(vs->stepping == false);
879 		assert(vs->stepped == false);
880 		assert(vs->hit_swbreak == false);
881 		vs->hit_swbreak = true;
882 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
883 		for (;;) {
884 			if (stopped_vcpu == -1) {
885 				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
886 				    vmexit->rip);
887 				stopped_vcpu = vcpu;
888 				gdb_suspend_vcpus();
889 			}
890 			_gdb_cpu_suspend(vcpu, true);
891 			if (!vs->hit_swbreak) {
892 				/* Breakpoint reported. */
893 				break;
894 			}
895 			bp = find_breakpoint(gpa);
896 			if (bp == NULL) {
897 				/* Breakpoint was removed. */
898 				vs->hit_swbreak = false;
899 				break;
900 			}
901 		}
902 		gdb_cpu_resume(vcpu);
903 	} else {
904 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
905 		    vmexit->rip);
906 		error = vm_set_register(ctx, vcpu,
907 		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
908 		assert(error == 0);
909 		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
910 		assert(error == 0);
911 	}
912 	pthread_mutex_unlock(&gdb_lock);
913 }
914 
915 static bool
916 gdb_step_vcpu(int vcpu)
917 {
918 	int error, val;
919 
920 	debug("$vCPU %d step\n", vcpu);
921 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
922 	if (error < 0)
923 		return (false);
924 
925 	discard_stop();
926 	vcpu_state[vcpu].stepping = true;
927 	vm_resume_cpu(ctx, vcpu);
928 	CPU_CLR(vcpu, &vcpus_suspended);
929 	pthread_cond_broadcast(&idle_vcpus);
930 	return (true);
931 }
932 
933 static void
934 gdb_resume_vcpus(void)
935 {
936 
937 	assert(pthread_mutex_isowned_np(&gdb_lock));
938 	vm_resume_cpu(ctx, -1);
939 	debug("resuming all CPUs\n");
940 	CPU_ZERO(&vcpus_suspended);
941 	pthread_cond_broadcast(&idle_vcpus);
942 }
943 
944 static void
945 gdb_read_regs(void)
946 {
947 	uint64_t regvals[nitems(gdb_regset)];
948 	int i;
949 
950 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
951 	    gdb_regset, regvals) == -1) {
952 		send_error(errno);
953 		return;
954 	}
955 	start_packet();
956 	for (i = 0; i < nitems(regvals); i++)
957 		append_unsigned_native(regvals[i], gdb_regsize[i]);
958 	finish_packet();
959 }
960 
961 static void
962 gdb_read_mem(const uint8_t *data, size_t len)
963 {
964 	uint64_t gpa, gva, val;
965 	uint8_t *cp;
966 	size_t resid, todo, bytes;
967 	bool started;
968 	int error;
969 
970 	/* Skip 'm' */
971 	data += 1;
972 	len -= 1;
973 
974 	/* Parse and consume address. */
975 	cp = memchr(data, ',', len);
976 	if (cp == NULL || cp == data) {
977 		send_error(EINVAL);
978 		return;
979 	}
980 	gva = parse_integer(data, cp - data);
981 	len -= (cp - data) + 1;
982 	data += (cp - data) + 1;
983 
984 	/* Parse length. */
985 	resid = parse_integer(data, len);
986 
987 	started = false;
988 	while (resid > 0) {
989 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
990 		if (error == -1) {
991 			if (started)
992 				finish_packet();
993 			else
994 				send_error(errno);
995 			return;
996 		}
997 		if (error == 0) {
998 			if (started)
999 				finish_packet();
1000 			else
1001 				send_error(EFAULT);
1002 			return;
1003 		}
1004 
1005 		/* Read bytes from current page. */
1006 		todo = getpagesize() - gpa % getpagesize();
1007 		if (todo > resid)
1008 			todo = resid;
1009 
1010 		cp = paddr_guest2host(ctx, gpa, todo);
1011 		if (cp != NULL) {
1012 			/*
1013 			 * If this page is guest RAM, read it a byte
1014 			 * at a time.
1015 			 */
1016 			if (!started) {
1017 				start_packet();
1018 				started = true;
1019 			}
1020 			while (todo > 0) {
1021 				append_byte(*cp);
1022 				cp++;
1023 				gpa++;
1024 				gva++;
1025 				resid--;
1026 				todo--;
1027 			}
1028 		} else {
1029 			/*
1030 			 * If this page isn't guest RAM, try to handle
1031 			 * it via MMIO.  For MMIO requests, use
1032 			 * aligned reads of words when possible.
1033 			 */
1034 			while (todo > 0) {
1035 				if (gpa & 1 || todo == 1)
1036 					bytes = 1;
1037 				else if (gpa & 2 || todo == 2)
1038 					bytes = 2;
1039 				else
1040 					bytes = 4;
1041 				error = read_mem(ctx, cur_vcpu, gpa, &val,
1042 				    bytes);
1043 				if (error == 0) {
1044 					if (!started) {
1045 						start_packet();
1046 						started = true;
1047 					}
1048 					gpa += bytes;
1049 					gva += bytes;
1050 					resid -= bytes;
1051 					todo -= bytes;
1052 					while (bytes > 0) {
1053 						append_byte(val);
1054 						val >>= 8;
1055 						bytes--;
1056 					}
1057 				} else {
1058 					if (started)
1059 						finish_packet();
1060 					else
1061 						send_error(EFAULT);
1062 					return;
1063 				}
1064 			}
1065 		}
1066 		assert(resid == 0 || gpa % getpagesize() == 0);
1067 	}
1068 	if (!started)
1069 		start_packet();
1070 	finish_packet();
1071 }
1072 
1073 static void
1074 gdb_write_mem(const uint8_t *data, size_t len)
1075 {
1076 	uint64_t gpa, gva, val;
1077 	uint8_t *cp;
1078 	size_t resid, todo, bytes;
1079 	int error;
1080 
1081 	/* Skip 'M' */
1082 	data += 1;
1083 	len -= 1;
1084 
1085 	/* Parse and consume address. */
1086 	cp = memchr(data, ',', len);
1087 	if (cp == NULL || cp == data) {
1088 		send_error(EINVAL);
1089 		return;
1090 	}
1091 	gva = parse_integer(data, cp - data);
1092 	len -= (cp - data) + 1;
1093 	data += (cp - data) + 1;
1094 
1095 	/* Parse and consume length. */
1096 	cp = memchr(data, ':', len);
1097 	if (cp == NULL || cp == data) {
1098 		send_error(EINVAL);
1099 		return;
1100 	}
1101 	resid = parse_integer(data, cp - data);
1102 	len -= (cp - data) + 1;
1103 	data += (cp - data) + 1;
1104 
1105 	/* Verify the available bytes match the length. */
1106 	if (len != resid * 2) {
1107 		send_error(EINVAL);
1108 		return;
1109 	}
1110 
1111 	while (resid > 0) {
1112 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1113 		if (error == -1) {
1114 			send_error(errno);
1115 			return;
1116 		}
1117 		if (error == 0) {
1118 			send_error(EFAULT);
1119 			return;
1120 		}
1121 
1122 		/* Write bytes to current page. */
1123 		todo = getpagesize() - gpa % getpagesize();
1124 		if (todo > resid)
1125 			todo = resid;
1126 
1127 		cp = paddr_guest2host(ctx, gpa, todo);
1128 		if (cp != NULL) {
1129 			/*
1130 			 * If this page is guest RAM, write it a byte
1131 			 * at a time.
1132 			 */
1133 			while (todo > 0) {
1134 				assert(len >= 2);
1135 				*cp = parse_byte(data);
1136 				data += 2;
1137 				len -= 2;
1138 				cp++;
1139 				gpa++;
1140 				gva++;
1141 				resid--;
1142 				todo--;
1143 			}
1144 		} else {
1145 			/*
1146 			 * If this page isn't guest RAM, try to handle
1147 			 * it via MMIO.  For MMIO requests, use
1148 			 * aligned writes of words when possible.
1149 			 */
1150 			while (todo > 0) {
1151 				if (gpa & 1 || todo == 1) {
1152 					bytes = 1;
1153 					val = parse_byte(data);
1154 				} else if (gpa & 2 || todo == 2) {
1155 					bytes = 2;
1156 					val = be16toh(parse_integer(data, 4));
1157 				} else {
1158 					bytes = 4;
1159 					val = be32toh(parse_integer(data, 8));
1160 				}
1161 				error = write_mem(ctx, cur_vcpu, gpa, val,
1162 				    bytes);
1163 				if (error == 0) {
1164 					gpa += bytes;
1165 					gva += bytes;
1166 					resid -= bytes;
1167 					todo -= bytes;
1168 					data += 2 * bytes;
1169 					len -= 2 * bytes;
1170 				} else {
1171 					send_error(EFAULT);
1172 					return;
1173 				}
1174 			}
1175 		}
1176 		assert(resid == 0 || gpa % getpagesize() == 0);
1177 	}
1178 	assert(len == 0);
1179 	send_ok();
1180 }
1181 
1182 static bool
1183 set_breakpoint_caps(bool enable)
1184 {
1185 	cpuset_t mask;
1186 	int vcpu;
1187 
1188 	mask = vcpus_active;
1189 	while (!CPU_EMPTY(&mask)) {
1190 		vcpu = CPU_FFS(&mask) - 1;
1191 		CPU_CLR(vcpu, &mask);
1192 		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1193 		    enable ? 1 : 0) < 0)
1194 			return (false);
1195 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1196 		    enable ? "en" : "dis");
1197 	}
1198 	return (true);
1199 }
1200 
1201 static void
1202 remove_all_sw_breakpoints(void)
1203 {
1204 	struct breakpoint *bp, *nbp;
1205 	uint8_t *cp;
1206 
1207 	if (TAILQ_EMPTY(&breakpoints))
1208 		return;
1209 
1210 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1211 		debug("remove breakpoint at %#lx\n", bp->gpa);
1212 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1213 		*cp = bp->shadow_inst;
1214 		TAILQ_REMOVE(&breakpoints, bp, link);
1215 		free(bp);
1216 	}
1217 	TAILQ_INIT(&breakpoints);
1218 	set_breakpoint_caps(false);
1219 }
1220 
1221 static void
1222 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1223 {
1224 	struct breakpoint *bp;
1225 	uint64_t gpa;
1226 	uint8_t *cp;
1227 	int error;
1228 
1229 	if (kind != 1) {
1230 		send_error(EINVAL);
1231 		return;
1232 	}
1233 
1234 	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1235 	if (error == -1) {
1236 		send_error(errno);
1237 		return;
1238 	}
1239 	if (error == 0) {
1240 		send_error(EFAULT);
1241 		return;
1242 	}
1243 
1244 	cp = paddr_guest2host(ctx, gpa, 1);
1245 
1246 	/* Only permit breakpoints in guest RAM. */
1247 	if (cp == NULL) {
1248 		send_error(EFAULT);
1249 		return;
1250 	}
1251 
1252 	/* Find any existing breakpoint. */
1253 	bp = find_breakpoint(gpa);
1254 
1255 	/*
1256 	 * Silently ignore duplicate commands since the protocol
1257 	 * requires these packets to be idempotent.
1258 	 */
1259 	if (insert) {
1260 		if (bp == NULL) {
1261 			if (TAILQ_EMPTY(&breakpoints) &&
1262 			    !set_breakpoint_caps(true)) {
1263 				send_empty_response();
1264 				return;
1265 			}
1266 			bp = malloc(sizeof(*bp));
1267 			bp->gpa = gpa;
1268 			bp->shadow_inst = *cp;
1269 			*cp = 0xcc;	/* INT 3 */
1270 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1271 			debug("new breakpoint at %#lx\n", gpa);
1272 		}
1273 	} else {
1274 		if (bp != NULL) {
1275 			debug("remove breakpoint at %#lx\n", gpa);
1276 			*cp = bp->shadow_inst;
1277 			TAILQ_REMOVE(&breakpoints, bp, link);
1278 			free(bp);
1279 			if (TAILQ_EMPTY(&breakpoints))
1280 				set_breakpoint_caps(false);
1281 		}
1282 	}
1283 	send_ok();
1284 }
1285 
1286 static void
1287 parse_breakpoint(const uint8_t *data, size_t len)
1288 {
1289 	uint64_t gva;
1290 	uint8_t *cp;
1291 	bool insert;
1292 	int kind, type;
1293 
1294 	insert = data[0] == 'Z';
1295 
1296 	/* Skip 'Z/z' */
1297 	data += 1;
1298 	len -= 1;
1299 
1300 	/* Parse and consume type. */
1301 	cp = memchr(data, ',', len);
1302 	if (cp == NULL || cp == data) {
1303 		send_error(EINVAL);
1304 		return;
1305 	}
1306 	type = parse_integer(data, cp - data);
1307 	len -= (cp - data) + 1;
1308 	data += (cp - data) + 1;
1309 
1310 	/* Parse and consume address. */
1311 	cp = memchr(data, ',', len);
1312 	if (cp == NULL || cp == data) {
1313 		send_error(EINVAL);
1314 		return;
1315 	}
1316 	gva = parse_integer(data, cp - data);
1317 	len -= (cp - data) + 1;
1318 	data += (cp - data) + 1;
1319 
1320 	/* Parse and consume kind. */
1321 	cp = memchr(data, ';', len);
1322 	if (cp == data) {
1323 		send_error(EINVAL);
1324 		return;
1325 	}
1326 	if (cp != NULL) {
1327 		/*
1328 		 * We do not advertise support for either the
1329 		 * ConditionalBreakpoints or BreakpointCommands
1330 		 * features, so we should not be getting conditions or
1331 		 * commands from the remote end.
1332 		 */
1333 		send_empty_response();
1334 		return;
1335 	}
1336 	kind = parse_integer(data, len);
1337 	data += len;
1338 	len = 0;
1339 
1340 	switch (type) {
1341 	case 0:
1342 		update_sw_breakpoint(gva, kind, insert);
1343 		break;
1344 	default:
1345 		send_empty_response();
1346 		break;
1347 	}
1348 }
1349 
1350 static bool
1351 command_equals(const uint8_t *data, size_t len, const char *cmd)
1352 {
1353 
1354 	if (strlen(cmd) > len)
1355 		return (false);
1356 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1357 }
1358 
1359 static void
1360 check_features(const uint8_t *data, size_t len)
1361 {
1362 	char *feature, *next_feature, *str, *value;
1363 	bool supported;
1364 
1365 	str = malloc(len + 1);
1366 	memcpy(str, data, len);
1367 	str[len] = '\0';
1368 	next_feature = str;
1369 
1370 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1371 		/*
1372 		 * Null features shouldn't exist, but skip if they
1373 		 * do.
1374 		 */
1375 		if (strcmp(feature, "") == 0)
1376 			continue;
1377 
1378 		/*
1379 		 * Look for the value or supported / not supported
1380 		 * flag.
1381 		 */
1382 		value = strchr(feature, '=');
1383 		if (value != NULL) {
1384 			*value = '\0';
1385 			value++;
1386 			supported = true;
1387 		} else {
1388 			value = feature + strlen(feature) - 1;
1389 			switch (*value) {
1390 			case '+':
1391 				supported = true;
1392 				break;
1393 			case '-':
1394 				supported = false;
1395 				break;
1396 			default:
1397 				/*
1398 				 * This is really a protocol error,
1399 				 * but we just ignore malformed
1400 				 * features for ease of
1401 				 * implementation.
1402 				 */
1403 				continue;
1404 			}
1405 			value = NULL;
1406 		}
1407 
1408 		if (strcmp(feature, "swbreak") == 0)
1409 			swbreak_enabled = supported;
1410 	}
1411 	free(str);
1412 
1413 	start_packet();
1414 
1415 	/* This is an arbitrary limit. */
1416 	append_string("PacketSize=4096");
1417 	append_string(";swbreak+");
1418 	finish_packet();
1419 }
1420 
1421 static void
1422 gdb_query(const uint8_t *data, size_t len)
1423 {
1424 
1425 	/*
1426 	 * TODO:
1427 	 * - qSearch
1428 	 */
1429 	if (command_equals(data, len, "qAttached")) {
1430 		start_packet();
1431 		append_char('1');
1432 		finish_packet();
1433 	} else if (command_equals(data, len, "qC")) {
1434 		start_packet();
1435 		append_string("QC");
1436 		append_integer(cur_vcpu + 1);
1437 		finish_packet();
1438 	} else if (command_equals(data, len, "qfThreadInfo")) {
1439 		cpuset_t mask;
1440 		bool first;
1441 		int vcpu;
1442 
1443 		if (CPU_EMPTY(&vcpus_active)) {
1444 			send_error(EINVAL);
1445 			return;
1446 		}
1447 		mask = vcpus_active;
1448 		start_packet();
1449 		append_char('m');
1450 		first = true;
1451 		while (!CPU_EMPTY(&mask)) {
1452 			vcpu = CPU_FFS(&mask) - 1;
1453 			CPU_CLR(vcpu, &mask);
1454 			if (first)
1455 				first = false;
1456 			else
1457 				append_char(',');
1458 			append_integer(vcpu + 1);
1459 		}
1460 		finish_packet();
1461 	} else if (command_equals(data, len, "qsThreadInfo")) {
1462 		start_packet();
1463 		append_char('l');
1464 		finish_packet();
1465 	} else if (command_equals(data, len, "qSupported")) {
1466 		data += strlen("qSupported");
1467 		len -= strlen("qSupported");
1468 		check_features(data, len);
1469 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1470 		char buf[16];
1471 		int tid;
1472 
1473 		data += strlen("qThreadExtraInfo");
1474 		len -= strlen("qThreadExtraInfo");
1475 		if (*data != ',') {
1476 			send_error(EINVAL);
1477 			return;
1478 		}
1479 		tid = parse_threadid(data + 1, len - 1);
1480 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1481 			send_error(EINVAL);
1482 			return;
1483 		}
1484 
1485 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1486 		start_packet();
1487 		append_asciihex(buf);
1488 		finish_packet();
1489 	} else
1490 		send_empty_response();
1491 }
1492 
1493 static void
1494 handle_command(const uint8_t *data, size_t len)
1495 {
1496 
1497 	/* Reject packets with a sequence-id. */
1498 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1499 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1500 		send_empty_response();
1501 		return;
1502 	}
1503 
1504 	switch (*data) {
1505 	case 'c':
1506 		if (len != 1) {
1507 			send_error(EINVAL);
1508 			break;
1509 		}
1510 
1511 		discard_stop();
1512 		gdb_resume_vcpus();
1513 		break;
1514 	case 'D':
1515 		send_ok();
1516 
1517 		/* TODO: Resume any stopped CPUs. */
1518 		break;
1519 	case 'g': {
1520 		gdb_read_regs();
1521 		break;
1522 	}
1523 	case 'H': {
1524 		int tid;
1525 
1526 		if (data[1] != 'g' && data[1] != 'c') {
1527 			send_error(EINVAL);
1528 			break;
1529 		}
1530 		tid = parse_threadid(data + 2, len - 2);
1531 		if (tid == -2) {
1532 			send_error(EINVAL);
1533 			break;
1534 		}
1535 
1536 		if (CPU_EMPTY(&vcpus_active)) {
1537 			send_error(EINVAL);
1538 			break;
1539 		}
1540 		if (tid == -1 || tid == 0)
1541 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1542 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1543 			cur_vcpu = tid - 1;
1544 		else {
1545 			send_error(EINVAL);
1546 			break;
1547 		}
1548 		send_ok();
1549 		break;
1550 	}
1551 	case 'm':
1552 		gdb_read_mem(data, len);
1553 		break;
1554 	case 'M':
1555 		gdb_write_mem(data, len);
1556 		break;
1557 	case 'T': {
1558 		int tid;
1559 
1560 		tid = parse_threadid(data + 1, len - 1);
1561 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1562 			send_error(EINVAL);
1563 			return;
1564 		}
1565 		send_ok();
1566 		break;
1567 	}
1568 	case 'q':
1569 		gdb_query(data, len);
1570 		break;
1571 	case 's':
1572 		if (len != 1) {
1573 			send_error(EINVAL);
1574 			break;
1575 		}
1576 
1577 		/* Don't send a reply until a stop occurs. */
1578 		if (!gdb_step_vcpu(cur_vcpu)) {
1579 			send_error(EOPNOTSUPP);
1580 			break;
1581 		}
1582 		break;
1583 	case 'z':
1584 	case 'Z':
1585 		parse_breakpoint(data, len);
1586 		break;
1587 	case '?':
1588 		report_stop(false);
1589 		break;
1590 	case 'G': /* TODO */
1591 	case 'v':
1592 		/* Handle 'vCont' */
1593 		/* 'vCtrlC' */
1594 	case 'p': /* TODO */
1595 	case 'P': /* TODO */
1596 	case 'Q': /* TODO */
1597 	case 't': /* TODO */
1598 	case 'X': /* TODO */
1599 	default:
1600 		send_empty_response();
1601 	}
1602 }
1603 
1604 /* Check for a valid packet in the command buffer. */
1605 static void
1606 check_command(int fd)
1607 {
1608 	uint8_t *head, *hash, *p, sum;
1609 	size_t avail, plen;
1610 
1611 	for (;;) {
1612 		avail = cur_comm.len;
1613 		if (avail == 0)
1614 			return;
1615 		head = io_buffer_head(&cur_comm);
1616 		switch (*head) {
1617 		case 0x03:
1618 			debug("<- Ctrl-C\n");
1619 			io_buffer_consume(&cur_comm, 1);
1620 
1621 			gdb_suspend_vcpus();
1622 			break;
1623 		case '+':
1624 			/* ACK of previous response. */
1625 			debug("<- +\n");
1626 			if (response_pending())
1627 				io_buffer_reset(&cur_resp);
1628 			io_buffer_consume(&cur_comm, 1);
1629 			if (stopped_vcpu != -1 && report_next_stop) {
1630 				report_stop(true);
1631 				send_pending_data(fd);
1632 			}
1633 			break;
1634 		case '-':
1635 			/* NACK of previous response. */
1636 			debug("<- -\n");
1637 			if (response_pending()) {
1638 				cur_resp.len += cur_resp.start;
1639 				cur_resp.start = 0;
1640 				if (cur_resp.data[0] == '+')
1641 					io_buffer_advance(&cur_resp, 1);
1642 				debug("-> %.*s\n", (int)cur_resp.len,
1643 				    io_buffer_head(&cur_resp));
1644 			}
1645 			io_buffer_consume(&cur_comm, 1);
1646 			send_pending_data(fd);
1647 			break;
1648 		case '$':
1649 			/* Packet. */
1650 
1651 			if (response_pending()) {
1652 				warnx("New GDB command while response in "
1653 				    "progress");
1654 				io_buffer_reset(&cur_resp);
1655 			}
1656 
1657 			/* Is packet complete? */
1658 			hash = memchr(head, '#', avail);
1659 			if (hash == NULL)
1660 				return;
1661 			plen = (hash - head + 1) + 2;
1662 			if (avail < plen)
1663 				return;
1664 			debug("<- %.*s\n", (int)plen, head);
1665 
1666 			/* Verify checksum. */
1667 			for (sum = 0, p = head + 1; p < hash; p++)
1668 				sum += *p;
1669 			if (sum != parse_byte(hash + 1)) {
1670 				io_buffer_consume(&cur_comm, plen);
1671 				debug("-> -\n");
1672 				send_char('-');
1673 				send_pending_data(fd);
1674 				break;
1675 			}
1676 			send_char('+');
1677 
1678 			handle_command(head + 1, hash - (head + 1));
1679 			io_buffer_consume(&cur_comm, plen);
1680 			if (!response_pending())
1681 				debug("-> +\n");
1682 			send_pending_data(fd);
1683 			break;
1684 		default:
1685 			/* XXX: Possibly drop connection instead. */
1686 			debug("-> %02x\n", *head);
1687 			io_buffer_consume(&cur_comm, 1);
1688 			break;
1689 		}
1690 	}
1691 }
1692 
1693 static void
1694 gdb_readable(int fd, enum ev_type event, void *arg)
1695 {
1696 	ssize_t nread;
1697 	int pending;
1698 
1699 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1700 		warn("FIONREAD on GDB socket");
1701 		return;
1702 	}
1703 
1704 	/*
1705 	 * 'pending' might be zero due to EOF.  We need to call read
1706 	 * with a non-zero length to detect EOF.
1707 	 */
1708 	if (pending == 0)
1709 		pending = 1;
1710 
1711 	/* Ensure there is room in the command buffer. */
1712 	io_buffer_grow(&cur_comm, pending);
1713 	assert(io_buffer_avail(&cur_comm) >= pending);
1714 
1715 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1716 	if (nread == 0) {
1717 		close_connection();
1718 	} else if (nread == -1) {
1719 		if (errno == EAGAIN)
1720 			return;
1721 
1722 		warn("Read from GDB socket");
1723 		close_connection();
1724 	} else {
1725 		cur_comm.len += nread;
1726 		pthread_mutex_lock(&gdb_lock);
1727 		check_command(fd);
1728 		pthread_mutex_unlock(&gdb_lock);
1729 	}
1730 }
1731 
1732 static void
1733 gdb_writable(int fd, enum ev_type event, void *arg)
1734 {
1735 
1736 	send_pending_data(fd);
1737 }
1738 
1739 static void
1740 new_connection(int fd, enum ev_type event, void *arg)
1741 {
1742 	int optval, s;
1743 
1744 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1745 	if (s == -1) {
1746 		if (arg != NULL)
1747 			err(1, "Failed accepting initial GDB connection");
1748 
1749 		/* Silently ignore errors post-startup. */
1750 		return;
1751 	}
1752 
1753 	optval = 1;
1754 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1755 	    -1) {
1756 		warn("Failed to disable SIGPIPE for GDB connection");
1757 		close(s);
1758 		return;
1759 	}
1760 
1761 	pthread_mutex_lock(&gdb_lock);
1762 	if (cur_fd != -1) {
1763 		close(s);
1764 		warnx("Ignoring additional GDB connection.");
1765 	}
1766 
1767 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1768 	if (read_event == NULL) {
1769 		if (arg != NULL)
1770 			err(1, "Failed to setup initial GDB connection");
1771 		pthread_mutex_unlock(&gdb_lock);
1772 		return;
1773 	}
1774 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1775 	if (write_event == NULL) {
1776 		if (arg != NULL)
1777 			err(1, "Failed to setup initial GDB connection");
1778 		mevent_delete_close(read_event);
1779 		read_event = NULL;
1780 	}
1781 
1782 	cur_fd = s;
1783 	cur_vcpu = 0;
1784 	stopped_vcpu = -1;
1785 
1786 	/* Break on attach. */
1787 	first_stop = true;
1788 	report_next_stop = false;
1789 	gdb_suspend_vcpus();
1790 	pthread_mutex_unlock(&gdb_lock);
1791 }
1792 
1793 #ifndef WITHOUT_CAPSICUM
1794 void
1795 limit_gdb_socket(int s)
1796 {
1797 	cap_rights_t rights;
1798 	unsigned long ioctls[] = { FIONREAD };
1799 
1800 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1801 	    CAP_SETSOCKOPT, CAP_IOCTL);
1802 	if (caph_rights_limit(s, &rights) == -1)
1803 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1804 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1805 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1806 }
1807 #endif
1808 
1809 void
1810 init_gdb(struct vmctx *_ctx, int sport, bool wait)
1811 {
1812 	struct sockaddr_in sin;
1813 	int error, flags, s;
1814 
1815 	debug("==> starting on %d, %swaiting\n", sport, wait ? "" : "not ");
1816 
1817 	error = pthread_mutex_init(&gdb_lock, NULL);
1818 	if (error != 0)
1819 		errc(1, error, "gdb mutex init");
1820 	error = pthread_cond_init(&idle_vcpus, NULL);
1821 	if (error != 0)
1822 		errc(1, error, "gdb cv init");
1823 
1824 	ctx = _ctx;
1825 	s = socket(PF_INET, SOCK_STREAM, 0);
1826 	if (s < 0)
1827 		err(1, "gdb socket create");
1828 
1829 	sin.sin_len = sizeof(sin);
1830 	sin.sin_family = AF_INET;
1831 	sin.sin_addr.s_addr = htonl(INADDR_ANY);
1832 	sin.sin_port = htons(sport);
1833 
1834 	if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0)
1835 		err(1, "gdb socket bind");
1836 
1837 	if (listen(s, 1) < 0)
1838 		err(1, "gdb socket listen");
1839 
1840 	stopped_vcpu = -1;
1841 	TAILQ_INIT(&breakpoints);
1842 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1843 	if (wait) {
1844 		/*
1845 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1846 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1847 		 * it starts execution.  The vcpu will remain suspended
1848 		 * until a debugger connects.
1849 		 */
1850 		CPU_SET(0, &vcpus_suspended);
1851 		stopped_vcpu = 0;
1852 	}
1853 
1854 	flags = fcntl(s, F_GETFL);
1855 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1856 		err(1, "Failed to mark gdb socket non-blocking");
1857 
1858 #ifndef WITHOUT_CAPSICUM
1859 	limit_gdb_socket(s);
1860 #endif
1861 	mevent_add(s, EVF_READ, new_connection, NULL);
1862 }
1863