xref: /freebsd/usr.sbin/bhyve/gdb.c (revision c1d255d3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2017-2018 John H. Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #ifndef WITHOUT_CAPSICUM
33 #include <sys/capsicum.h>
34 #endif
35 #include <sys/endian.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/queue.h>
39 #include <sys/socket.h>
40 #include <machine/atomic.h>
41 #include <machine/specialreg.h>
42 #include <machine/vmm.h>
43 #include <netinet/in.h>
44 #include <assert.h>
45 #ifndef WITHOUT_CAPSICUM
46 #include <capsicum_helpers.h>
47 #endif
48 #include <err.h>
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <netdb.h>
52 #include <pthread.h>
53 #include <pthread_np.h>
54 #include <stdbool.h>
55 #include <stdio.h>
56 #include <stdlib.h>
57 #include <string.h>
58 #include <sysexits.h>
59 #include <unistd.h>
60 #include <vmmapi.h>
61 
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "gdb.h"
65 #include "mem.h"
66 #include "mevent.h"
67 
68 /*
69  * GDB_SIGNAL_* numbers are part of the GDB remote protocol.  Most stops
70  * use SIGTRAP.
71  */
72 #define	GDB_SIGNAL_TRAP		5
73 
74 static void gdb_resume_vcpus(void);
75 static void check_command(int fd);
76 
77 static struct mevent *read_event, *write_event;
78 
79 static cpuset_t vcpus_active, vcpus_suspended, vcpus_waiting;
80 static pthread_mutex_t gdb_lock;
81 static pthread_cond_t idle_vcpus;
82 static bool first_stop, report_next_stop, swbreak_enabled;
83 
84 /*
85  * An I/O buffer contains 'capacity' bytes of room at 'data'.  For a
86  * read buffer, 'start' is unused and 'len' contains the number of
87  * valid bytes in the buffer.  For a write buffer, 'start' is set to
88  * the index of the next byte in 'data' to send, and 'len' contains
89  * the remaining number of valid bytes to send.
90  */
91 struct io_buffer {
92 	uint8_t *data;
93 	size_t capacity;
94 	size_t start;
95 	size_t len;
96 };
97 
98 struct breakpoint {
99 	uint64_t gpa;
100 	uint8_t shadow_inst;
101 	TAILQ_ENTRY(breakpoint) link;
102 };
103 
104 /*
105  * When a vCPU stops to due to an event that should be reported to the
106  * debugger, information about the event is stored in this structure.
107  * The vCPU thread then sets 'stopped_vcpu' if it is not already set
108  * and stops other vCPUs so the event can be reported.  The
109  * report_stop() function reports the event for the 'stopped_vcpu'
110  * vCPU.  When the debugger resumes execution via continue or step,
111  * the event for 'stopped_vcpu' is cleared.  vCPUs will loop in their
112  * event handlers until the associated event is reported or disabled.
113  *
114  * An idle vCPU will have all of the boolean fields set to false.
115  *
116  * When a vCPU is stepped, 'stepping' is set to true when the vCPU is
117  * released to execute the stepped instruction.  When the vCPU reports
118  * the stepping trap, 'stepped' is set.
119  *
120  * When a vCPU hits a breakpoint set by the debug server,
121  * 'hit_swbreak' is set to true.
122  */
123 struct vcpu_state {
124 	bool stepping;
125 	bool stepped;
126 	bool hit_swbreak;
127 };
128 
129 static struct io_buffer cur_comm, cur_resp;
130 static uint8_t cur_csum;
131 static struct vmctx *ctx;
132 static int cur_fd = -1;
133 static TAILQ_HEAD(, breakpoint) breakpoints;
134 static struct vcpu_state *vcpu_state;
135 static int cur_vcpu, stopped_vcpu;
136 static bool gdb_active = false;
137 
138 const int gdb_regset[] = {
139 	VM_REG_GUEST_RAX,
140 	VM_REG_GUEST_RBX,
141 	VM_REG_GUEST_RCX,
142 	VM_REG_GUEST_RDX,
143 	VM_REG_GUEST_RSI,
144 	VM_REG_GUEST_RDI,
145 	VM_REG_GUEST_RBP,
146 	VM_REG_GUEST_RSP,
147 	VM_REG_GUEST_R8,
148 	VM_REG_GUEST_R9,
149 	VM_REG_GUEST_R10,
150 	VM_REG_GUEST_R11,
151 	VM_REG_GUEST_R12,
152 	VM_REG_GUEST_R13,
153 	VM_REG_GUEST_R14,
154 	VM_REG_GUEST_R15,
155 	VM_REG_GUEST_RIP,
156 	VM_REG_GUEST_RFLAGS,
157 	VM_REG_GUEST_CS,
158 	VM_REG_GUEST_SS,
159 	VM_REG_GUEST_DS,
160 	VM_REG_GUEST_ES,
161 	VM_REG_GUEST_FS,
162 	VM_REG_GUEST_GS
163 };
164 
165 const int gdb_regsize[] = {
166 	8,
167 	8,
168 	8,
169 	8,
170 	8,
171 	8,
172 	8,
173 	8,
174 	8,
175 	8,
176 	8,
177 	8,
178 	8,
179 	8,
180 	8,
181 	8,
182 	8,
183 	4,
184 	4,
185 	4,
186 	4,
187 	4,
188 	4,
189 	4
190 };
191 
192 #ifdef GDB_LOG
193 #include <stdarg.h>
194 #include <stdio.h>
195 
196 static void __printflike(1, 2)
197 debug(const char *fmt, ...)
198 {
199 	static FILE *logfile;
200 	va_list ap;
201 
202 	if (logfile == NULL) {
203 		logfile = fopen("/tmp/bhyve_gdb.log", "w");
204 		if (logfile == NULL)
205 			return;
206 #ifndef WITHOUT_CAPSICUM
207 		if (caph_limit_stream(fileno(logfile), CAPH_WRITE) == -1) {
208 			fclose(logfile);
209 			logfile = NULL;
210 			return;
211 		}
212 #endif
213 		setlinebuf(logfile);
214 	}
215 	va_start(ap, fmt);
216 	vfprintf(logfile, fmt, ap);
217 	va_end(ap);
218 }
219 #else
220 #define debug(...)
221 #endif
222 
223 static void	remove_all_sw_breakpoints(void);
224 
225 static int
226 guest_paging_info(int vcpu, struct vm_guest_paging *paging)
227 {
228 	uint64_t regs[4];
229 	const int regset[4] = {
230 		VM_REG_GUEST_CR0,
231 		VM_REG_GUEST_CR3,
232 		VM_REG_GUEST_CR4,
233 		VM_REG_GUEST_EFER
234 	};
235 
236 	if (vm_get_register_set(ctx, vcpu, nitems(regset), regset, regs) == -1)
237 		return (-1);
238 
239 	/*
240 	 * For the debugger, always pretend to be the kernel (CPL 0),
241 	 * and if long-mode is enabled, always parse addresses as if
242 	 * in 64-bit mode.
243 	 */
244 	paging->cr3 = regs[1];
245 	paging->cpl = 0;
246 	if (regs[3] & EFER_LMA)
247 		paging->cpu_mode = CPU_MODE_64BIT;
248 	else if (regs[0] & CR0_PE)
249 		paging->cpu_mode = CPU_MODE_PROTECTED;
250 	else
251 		paging->cpu_mode = CPU_MODE_REAL;
252 	if (!(regs[0] & CR0_PG))
253 		paging->paging_mode = PAGING_MODE_FLAT;
254 	else if (!(regs[2] & CR4_PAE))
255 		paging->paging_mode = PAGING_MODE_32;
256 	else if (regs[3] & EFER_LME)
257 		paging->paging_mode = (regs[2] & CR4_LA57) ?
258 		    PAGING_MODE_64_LA57 :  PAGING_MODE_64;
259 	else
260 		paging->paging_mode = PAGING_MODE_PAE;
261 	return (0);
262 }
263 
264 /*
265  * Map a guest virtual address to a physical address (for a given vcpu).
266  * If a guest virtual address is valid, return 1.  If the address is
267  * not valid, return 0.  If an error occurs obtaining the mapping,
268  * return -1.
269  */
270 static int
271 guest_vaddr2paddr(int vcpu, uint64_t vaddr, uint64_t *paddr)
272 {
273 	struct vm_guest_paging paging;
274 	int fault;
275 
276 	if (guest_paging_info(vcpu, &paging) == -1)
277 		return (-1);
278 
279 	/*
280 	 * Always use PROT_READ.  We really care if the VA is
281 	 * accessible, not if the current vCPU can write.
282 	 */
283 	if (vm_gla2gpa_nofault(ctx, vcpu, &paging, vaddr, PROT_READ, paddr,
284 	    &fault) == -1)
285 		return (-1);
286 	if (fault)
287 		return (0);
288 	return (1);
289 }
290 
291 static void
292 io_buffer_reset(struct io_buffer *io)
293 {
294 
295 	io->start = 0;
296 	io->len = 0;
297 }
298 
299 /* Available room for adding data. */
300 static size_t
301 io_buffer_avail(struct io_buffer *io)
302 {
303 
304 	return (io->capacity - (io->start + io->len));
305 }
306 
307 static uint8_t *
308 io_buffer_head(struct io_buffer *io)
309 {
310 
311 	return (io->data + io->start);
312 }
313 
314 static uint8_t *
315 io_buffer_tail(struct io_buffer *io)
316 {
317 
318 	return (io->data + io->start + io->len);
319 }
320 
321 static void
322 io_buffer_advance(struct io_buffer *io, size_t amount)
323 {
324 
325 	assert(amount <= io->len);
326 	io->start += amount;
327 	io->len -= amount;
328 }
329 
330 static void
331 io_buffer_consume(struct io_buffer *io, size_t amount)
332 {
333 
334 	io_buffer_advance(io, amount);
335 	if (io->len == 0) {
336 		io->start = 0;
337 		return;
338 	}
339 
340 	/*
341 	 * XXX: Consider making this move optional and compacting on a
342 	 * future read() before realloc().
343 	 */
344 	memmove(io->data, io_buffer_head(io), io->len);
345 	io->start = 0;
346 }
347 
348 static void
349 io_buffer_grow(struct io_buffer *io, size_t newsize)
350 {
351 	uint8_t *new_data;
352 	size_t avail, new_cap;
353 
354 	avail = io_buffer_avail(io);
355 	if (newsize <= avail)
356 		return;
357 
358 	new_cap = io->capacity + (newsize - avail);
359 	new_data = realloc(io->data, new_cap);
360 	if (new_data == NULL)
361 		err(1, "Failed to grow GDB I/O buffer");
362 	io->data = new_data;
363 	io->capacity = new_cap;
364 }
365 
366 static bool
367 response_pending(void)
368 {
369 
370 	if (cur_resp.start == 0 && cur_resp.len == 0)
371 		return (false);
372 	if (cur_resp.start + cur_resp.len == 1 && cur_resp.data[0] == '+')
373 		return (false);
374 	return (true);
375 }
376 
377 static void
378 close_connection(void)
379 {
380 
381 	/*
382 	 * XXX: This triggers a warning because mevent does the close
383 	 * before the EV_DELETE.
384 	 */
385 	pthread_mutex_lock(&gdb_lock);
386 	mevent_delete(write_event);
387 	mevent_delete_close(read_event);
388 	write_event = NULL;
389 	read_event = NULL;
390 	io_buffer_reset(&cur_comm);
391 	io_buffer_reset(&cur_resp);
392 	cur_fd = -1;
393 
394 	remove_all_sw_breakpoints();
395 
396 	/* Clear any pending events. */
397 	memset(vcpu_state, 0, guest_ncpus * sizeof(*vcpu_state));
398 
399 	/* Resume any stopped vCPUs. */
400 	gdb_resume_vcpus();
401 	pthread_mutex_unlock(&gdb_lock);
402 }
403 
404 static uint8_t
405 hex_digit(uint8_t nibble)
406 {
407 
408 	if (nibble <= 9)
409 		return (nibble + '0');
410 	else
411 		return (nibble + 'a' - 10);
412 }
413 
414 static uint8_t
415 parse_digit(uint8_t v)
416 {
417 
418 	if (v >= '0' && v <= '9')
419 		return (v - '0');
420 	if (v >= 'a' && v <= 'f')
421 		return (v - 'a' + 10);
422 	if (v >= 'A' && v <= 'F')
423 		return (v - 'A' + 10);
424 	return (0xF);
425 }
426 
427 /* Parses big-endian hexadecimal. */
428 static uintmax_t
429 parse_integer(const uint8_t *p, size_t len)
430 {
431 	uintmax_t v;
432 
433 	v = 0;
434 	while (len > 0) {
435 		v <<= 4;
436 		v |= parse_digit(*p);
437 		p++;
438 		len--;
439 	}
440 	return (v);
441 }
442 
443 static uint8_t
444 parse_byte(const uint8_t *p)
445 {
446 
447 	return (parse_digit(p[0]) << 4 | parse_digit(p[1]));
448 }
449 
450 static void
451 send_pending_data(int fd)
452 {
453 	ssize_t nwritten;
454 
455 	if (cur_resp.len == 0) {
456 		mevent_disable(write_event);
457 		return;
458 	}
459 	nwritten = write(fd, io_buffer_head(&cur_resp), cur_resp.len);
460 	if (nwritten == -1) {
461 		warn("Write to GDB socket failed");
462 		close_connection();
463 	} else {
464 		io_buffer_advance(&cur_resp, nwritten);
465 		if (cur_resp.len == 0)
466 			mevent_disable(write_event);
467 		else
468 			mevent_enable(write_event);
469 	}
470 }
471 
472 /* Append a single character to the output buffer. */
473 static void
474 send_char(uint8_t data)
475 {
476 	io_buffer_grow(&cur_resp, 1);
477 	*io_buffer_tail(&cur_resp) = data;
478 	cur_resp.len++;
479 }
480 
481 /* Append an array of bytes to the output buffer. */
482 static void
483 send_data(const uint8_t *data, size_t len)
484 {
485 
486 	io_buffer_grow(&cur_resp, len);
487 	memcpy(io_buffer_tail(&cur_resp), data, len);
488 	cur_resp.len += len;
489 }
490 
491 static void
492 format_byte(uint8_t v, uint8_t *buf)
493 {
494 
495 	buf[0] = hex_digit(v >> 4);
496 	buf[1] = hex_digit(v & 0xf);
497 }
498 
499 /*
500  * Append a single byte (formatted as two hex characters) to the
501  * output buffer.
502  */
503 static void
504 send_byte(uint8_t v)
505 {
506 	uint8_t buf[2];
507 
508 	format_byte(v, buf);
509 	send_data(buf, sizeof(buf));
510 }
511 
512 static void
513 start_packet(void)
514 {
515 
516 	send_char('$');
517 	cur_csum = 0;
518 }
519 
520 static void
521 finish_packet(void)
522 {
523 
524 	send_char('#');
525 	send_byte(cur_csum);
526 	debug("-> %.*s\n", (int)cur_resp.len, io_buffer_head(&cur_resp));
527 }
528 
529 /*
530  * Append a single character (for the packet payload) and update the
531  * checksum.
532  */
533 static void
534 append_char(uint8_t v)
535 {
536 
537 	send_char(v);
538 	cur_csum += v;
539 }
540 
541 /*
542  * Append an array of bytes (for the packet payload) and update the
543  * checksum.
544  */
545 static void
546 append_packet_data(const uint8_t *data, size_t len)
547 {
548 
549 	send_data(data, len);
550 	while (len > 0) {
551 		cur_csum += *data;
552 		data++;
553 		len--;
554 	}
555 }
556 
557 static void
558 append_string(const char *str)
559 {
560 
561 	append_packet_data(str, strlen(str));
562 }
563 
564 static void
565 append_byte(uint8_t v)
566 {
567 	uint8_t buf[2];
568 
569 	format_byte(v, buf);
570 	append_packet_data(buf, sizeof(buf));
571 }
572 
573 static void
574 append_unsigned_native(uintmax_t value, size_t len)
575 {
576 	size_t i;
577 
578 	for (i = 0; i < len; i++) {
579 		append_byte(value);
580 		value >>= 8;
581 	}
582 }
583 
584 static void
585 append_unsigned_be(uintmax_t value, size_t len)
586 {
587 	char buf[len * 2];
588 	size_t i;
589 
590 	for (i = 0; i < len; i++) {
591 		format_byte(value, buf + (len - i - 1) * 2);
592 		value >>= 8;
593 	}
594 	append_packet_data(buf, sizeof(buf));
595 }
596 
597 static void
598 append_integer(unsigned int value)
599 {
600 
601 	if (value == 0)
602 		append_char('0');
603 	else
604 		append_unsigned_be(value, (fls(value) + 7) / 8);
605 }
606 
607 static void
608 append_asciihex(const char *str)
609 {
610 
611 	while (*str != '\0') {
612 		append_byte(*str);
613 		str++;
614 	}
615 }
616 
617 static void
618 send_empty_response(void)
619 {
620 
621 	start_packet();
622 	finish_packet();
623 }
624 
625 static void
626 send_error(int error)
627 {
628 
629 	start_packet();
630 	append_char('E');
631 	append_byte(error);
632 	finish_packet();
633 }
634 
635 static void
636 send_ok(void)
637 {
638 
639 	start_packet();
640 	append_string("OK");
641 	finish_packet();
642 }
643 
644 static int
645 parse_threadid(const uint8_t *data, size_t len)
646 {
647 
648 	if (len == 1 && *data == '0')
649 		return (0);
650 	if (len == 2 && memcmp(data, "-1", 2) == 0)
651 		return (-1);
652 	if (len == 0)
653 		return (-2);
654 	return (parse_integer(data, len));
655 }
656 
657 /*
658  * Report the current stop event to the debugger.  If the stop is due
659  * to an event triggered on a specific vCPU such as a breakpoint or
660  * stepping trap, stopped_vcpu will be set to the vCPU triggering the
661  * stop.  If 'set_cur_vcpu' is true, then cur_vcpu will be updated to
662  * the reporting vCPU for vCPU events.
663  */
664 static void
665 report_stop(bool set_cur_vcpu)
666 {
667 	struct vcpu_state *vs;
668 
669 	start_packet();
670 	if (stopped_vcpu == -1) {
671 		append_char('S');
672 		append_byte(GDB_SIGNAL_TRAP);
673 	} else {
674 		vs = &vcpu_state[stopped_vcpu];
675 		if (set_cur_vcpu)
676 			cur_vcpu = stopped_vcpu;
677 		append_char('T');
678 		append_byte(GDB_SIGNAL_TRAP);
679 		append_string("thread:");
680 		append_integer(stopped_vcpu + 1);
681 		append_char(';');
682 		if (vs->hit_swbreak) {
683 			debug("$vCPU %d reporting swbreak\n", stopped_vcpu);
684 			if (swbreak_enabled)
685 				append_string("swbreak:;");
686 		} else if (vs->stepped)
687 			debug("$vCPU %d reporting step\n", stopped_vcpu);
688 		else
689 			debug("$vCPU %d reporting ???\n", stopped_vcpu);
690 	}
691 	finish_packet();
692 	report_next_stop = false;
693 }
694 
695 /*
696  * If this stop is due to a vCPU event, clear that event to mark it as
697  * acknowledged.
698  */
699 static void
700 discard_stop(void)
701 {
702 	struct vcpu_state *vs;
703 
704 	if (stopped_vcpu != -1) {
705 		vs = &vcpu_state[stopped_vcpu];
706 		vs->hit_swbreak = false;
707 		vs->stepped = false;
708 		stopped_vcpu = -1;
709 	}
710 	report_next_stop = true;
711 }
712 
713 static void
714 gdb_finish_suspend_vcpus(void)
715 {
716 
717 	if (first_stop) {
718 		first_stop = false;
719 		stopped_vcpu = -1;
720 	} else if (report_next_stop) {
721 		assert(!response_pending());
722 		report_stop(true);
723 		send_pending_data(cur_fd);
724 	}
725 }
726 
727 /*
728  * vCPU threads invoke this function whenever the vCPU enters the
729  * debug server to pause or report an event.  vCPU threads wait here
730  * as long as the debug server keeps them suspended.
731  */
732 static void
733 _gdb_cpu_suspend(int vcpu, bool report_stop)
734 {
735 
736 	debug("$vCPU %d suspending\n", vcpu);
737 	CPU_SET(vcpu, &vcpus_waiting);
738 	if (report_stop && CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
739 		gdb_finish_suspend_vcpus();
740 	while (CPU_ISSET(vcpu, &vcpus_suspended))
741 		pthread_cond_wait(&idle_vcpus, &gdb_lock);
742 	CPU_CLR(vcpu, &vcpus_waiting);
743 	debug("$vCPU %d resuming\n", vcpu);
744 }
745 
746 /*
747  * Invoked at the start of a vCPU thread's execution to inform the
748  * debug server about the new thread.
749  */
750 void
751 gdb_cpu_add(int vcpu)
752 {
753 
754 	if (!gdb_active)
755 		return;
756 	debug("$vCPU %d starting\n", vcpu);
757 	pthread_mutex_lock(&gdb_lock);
758 	assert(vcpu < guest_ncpus);
759 	CPU_SET(vcpu, &vcpus_active);
760 	if (!TAILQ_EMPTY(&breakpoints)) {
761 		vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT, 1);
762 		debug("$vCPU %d enabled breakpoint exits\n", vcpu);
763 	}
764 
765 	/*
766 	 * If a vcpu is added while vcpus are stopped, suspend the new
767 	 * vcpu so that it will pop back out with a debug exit before
768 	 * executing the first instruction.
769 	 */
770 	if (!CPU_EMPTY(&vcpus_suspended)) {
771 		CPU_SET(vcpu, &vcpus_suspended);
772 		_gdb_cpu_suspend(vcpu, false);
773 	}
774 	pthread_mutex_unlock(&gdb_lock);
775 }
776 
777 /*
778  * Invoked by vCPU before resuming execution.  This enables stepping
779  * if the vCPU is marked as stepping.
780  */
781 static void
782 gdb_cpu_resume(int vcpu)
783 {
784 	struct vcpu_state *vs;
785 	int error;
786 
787 	vs = &vcpu_state[vcpu];
788 
789 	/*
790 	 * Any pending event should already be reported before
791 	 * resuming.
792 	 */
793 	assert(vs->hit_swbreak == false);
794 	assert(vs->stepped == false);
795 	if (vs->stepping) {
796 		error = vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 1);
797 		assert(error == 0);
798 	}
799 }
800 
801 /*
802  * Handler for VM_EXITCODE_DEBUG used to suspend a vCPU when the guest
803  * has been suspended due to an event on different vCPU or in response
804  * to a guest-wide suspend such as Ctrl-C or the stop on attach.
805  */
806 void
807 gdb_cpu_suspend(int vcpu)
808 {
809 
810 	if (!gdb_active)
811 		return;
812 	pthread_mutex_lock(&gdb_lock);
813 	_gdb_cpu_suspend(vcpu, true);
814 	gdb_cpu_resume(vcpu);
815 	pthread_mutex_unlock(&gdb_lock);
816 }
817 
818 static void
819 gdb_suspend_vcpus(void)
820 {
821 
822 	assert(pthread_mutex_isowned_np(&gdb_lock));
823 	debug("suspending all CPUs\n");
824 	vcpus_suspended = vcpus_active;
825 	vm_suspend_cpu(ctx, -1);
826 	if (CPU_CMP(&vcpus_waiting, &vcpus_suspended) == 0)
827 		gdb_finish_suspend_vcpus();
828 }
829 
830 /*
831  * Handler for VM_EXITCODE_MTRAP reported when a vCPU single-steps via
832  * the VT-x-specific MTRAP exit.
833  */
834 void
835 gdb_cpu_mtrap(int vcpu)
836 {
837 	struct vcpu_state *vs;
838 
839 	if (!gdb_active)
840 		return;
841 	debug("$vCPU %d MTRAP\n", vcpu);
842 	pthread_mutex_lock(&gdb_lock);
843 	vs = &vcpu_state[vcpu];
844 	if (vs->stepping) {
845 		vs->stepping = false;
846 		vs->stepped = true;
847 		vm_set_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, 0);
848 		while (vs->stepped) {
849 			if (stopped_vcpu == -1) {
850 				debug("$vCPU %d reporting step\n", vcpu);
851 				stopped_vcpu = vcpu;
852 				gdb_suspend_vcpus();
853 			}
854 			_gdb_cpu_suspend(vcpu, true);
855 		}
856 		gdb_cpu_resume(vcpu);
857 	}
858 	pthread_mutex_unlock(&gdb_lock);
859 }
860 
861 static struct breakpoint *
862 find_breakpoint(uint64_t gpa)
863 {
864 	struct breakpoint *bp;
865 
866 	TAILQ_FOREACH(bp, &breakpoints, link) {
867 		if (bp->gpa == gpa)
868 			return (bp);
869 	}
870 	return (NULL);
871 }
872 
873 void
874 gdb_cpu_breakpoint(int vcpu, struct vm_exit *vmexit)
875 {
876 	struct breakpoint *bp;
877 	struct vcpu_state *vs;
878 	uint64_t gpa;
879 	int error;
880 
881 	if (!gdb_active) {
882 		fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
883 		exit(4);
884 	}
885 	pthread_mutex_lock(&gdb_lock);
886 	error = guest_vaddr2paddr(vcpu, vmexit->rip, &gpa);
887 	assert(error == 1);
888 	bp = find_breakpoint(gpa);
889 	if (bp != NULL) {
890 		vs = &vcpu_state[vcpu];
891 		assert(vs->stepping == false);
892 		assert(vs->stepped == false);
893 		assert(vs->hit_swbreak == false);
894 		vs->hit_swbreak = true;
895 		vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, vmexit->rip);
896 		for (;;) {
897 			if (stopped_vcpu == -1) {
898 				debug("$vCPU %d reporting breakpoint at rip %#lx\n", vcpu,
899 				    vmexit->rip);
900 				stopped_vcpu = vcpu;
901 				gdb_suspend_vcpus();
902 			}
903 			_gdb_cpu_suspend(vcpu, true);
904 			if (!vs->hit_swbreak) {
905 				/* Breakpoint reported. */
906 				break;
907 			}
908 			bp = find_breakpoint(gpa);
909 			if (bp == NULL) {
910 				/* Breakpoint was removed. */
911 				vs->hit_swbreak = false;
912 				break;
913 			}
914 		}
915 		gdb_cpu_resume(vcpu);
916 	} else {
917 		debug("$vCPU %d injecting breakpoint at rip %#lx\n", vcpu,
918 		    vmexit->rip);
919 		error = vm_set_register(ctx, vcpu,
920 		    VM_REG_GUEST_ENTRY_INST_LENGTH, vmexit->u.bpt.inst_length);
921 		assert(error == 0);
922 		error = vm_inject_exception(ctx, vcpu, IDT_BP, 0, 0, 0);
923 		assert(error == 0);
924 	}
925 	pthread_mutex_unlock(&gdb_lock);
926 }
927 
928 static bool
929 gdb_step_vcpu(int vcpu)
930 {
931 	int error, val;
932 
933 	debug("$vCPU %d step\n", vcpu);
934 	error = vm_get_capability(ctx, vcpu, VM_CAP_MTRAP_EXIT, &val);
935 	if (error < 0)
936 		return (false);
937 
938 	discard_stop();
939 	vcpu_state[vcpu].stepping = true;
940 	vm_resume_cpu(ctx, vcpu);
941 	CPU_CLR(vcpu, &vcpus_suspended);
942 	pthread_cond_broadcast(&idle_vcpus);
943 	return (true);
944 }
945 
946 static void
947 gdb_resume_vcpus(void)
948 {
949 
950 	assert(pthread_mutex_isowned_np(&gdb_lock));
951 	vm_resume_cpu(ctx, -1);
952 	debug("resuming all CPUs\n");
953 	CPU_ZERO(&vcpus_suspended);
954 	pthread_cond_broadcast(&idle_vcpus);
955 }
956 
957 static void
958 gdb_read_regs(void)
959 {
960 	uint64_t regvals[nitems(gdb_regset)];
961 	int i;
962 
963 	if (vm_get_register_set(ctx, cur_vcpu, nitems(gdb_regset),
964 	    gdb_regset, regvals) == -1) {
965 		send_error(errno);
966 		return;
967 	}
968 	start_packet();
969 	for (i = 0; i < nitems(regvals); i++)
970 		append_unsigned_native(regvals[i], gdb_regsize[i]);
971 	finish_packet();
972 }
973 
974 static void
975 gdb_read_mem(const uint8_t *data, size_t len)
976 {
977 	uint64_t gpa, gva, val;
978 	uint8_t *cp;
979 	size_t resid, todo, bytes;
980 	bool started;
981 	int error;
982 
983 	/* Skip 'm' */
984 	data += 1;
985 	len -= 1;
986 
987 	/* Parse and consume address. */
988 	cp = memchr(data, ',', len);
989 	if (cp == NULL || cp == data) {
990 		send_error(EINVAL);
991 		return;
992 	}
993 	gva = parse_integer(data, cp - data);
994 	len -= (cp - data) + 1;
995 	data += (cp - data) + 1;
996 
997 	/* Parse length. */
998 	resid = parse_integer(data, len);
999 
1000 	started = false;
1001 	while (resid > 0) {
1002 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1003 		if (error == -1) {
1004 			if (started)
1005 				finish_packet();
1006 			else
1007 				send_error(errno);
1008 			return;
1009 		}
1010 		if (error == 0) {
1011 			if (started)
1012 				finish_packet();
1013 			else
1014 				send_error(EFAULT);
1015 			return;
1016 		}
1017 
1018 		/* Read bytes from current page. */
1019 		todo = getpagesize() - gpa % getpagesize();
1020 		if (todo > resid)
1021 			todo = resid;
1022 
1023 		cp = paddr_guest2host(ctx, gpa, todo);
1024 		if (cp != NULL) {
1025 			/*
1026 			 * If this page is guest RAM, read it a byte
1027 			 * at a time.
1028 			 */
1029 			if (!started) {
1030 				start_packet();
1031 				started = true;
1032 			}
1033 			while (todo > 0) {
1034 				append_byte(*cp);
1035 				cp++;
1036 				gpa++;
1037 				gva++;
1038 				resid--;
1039 				todo--;
1040 			}
1041 		} else {
1042 			/*
1043 			 * If this page isn't guest RAM, try to handle
1044 			 * it via MMIO.  For MMIO requests, use
1045 			 * aligned reads of words when possible.
1046 			 */
1047 			while (todo > 0) {
1048 				if (gpa & 1 || todo == 1)
1049 					bytes = 1;
1050 				else if (gpa & 2 || todo == 2)
1051 					bytes = 2;
1052 				else
1053 					bytes = 4;
1054 				error = read_mem(ctx, cur_vcpu, gpa, &val,
1055 				    bytes);
1056 				if (error == 0) {
1057 					if (!started) {
1058 						start_packet();
1059 						started = true;
1060 					}
1061 					gpa += bytes;
1062 					gva += bytes;
1063 					resid -= bytes;
1064 					todo -= bytes;
1065 					while (bytes > 0) {
1066 						append_byte(val);
1067 						val >>= 8;
1068 						bytes--;
1069 					}
1070 				} else {
1071 					if (started)
1072 						finish_packet();
1073 					else
1074 						send_error(EFAULT);
1075 					return;
1076 				}
1077 			}
1078 		}
1079 		assert(resid == 0 || gpa % getpagesize() == 0);
1080 	}
1081 	if (!started)
1082 		start_packet();
1083 	finish_packet();
1084 }
1085 
1086 static void
1087 gdb_write_mem(const uint8_t *data, size_t len)
1088 {
1089 	uint64_t gpa, gva, val;
1090 	uint8_t *cp;
1091 	size_t resid, todo, bytes;
1092 	int error;
1093 
1094 	/* Skip 'M' */
1095 	data += 1;
1096 	len -= 1;
1097 
1098 	/* Parse and consume address. */
1099 	cp = memchr(data, ',', len);
1100 	if (cp == NULL || cp == data) {
1101 		send_error(EINVAL);
1102 		return;
1103 	}
1104 	gva = parse_integer(data, cp - data);
1105 	len -= (cp - data) + 1;
1106 	data += (cp - data) + 1;
1107 
1108 	/* Parse and consume length. */
1109 	cp = memchr(data, ':', len);
1110 	if (cp == NULL || cp == data) {
1111 		send_error(EINVAL);
1112 		return;
1113 	}
1114 	resid = parse_integer(data, cp - data);
1115 	len -= (cp - data) + 1;
1116 	data += (cp - data) + 1;
1117 
1118 	/* Verify the available bytes match the length. */
1119 	if (len != resid * 2) {
1120 		send_error(EINVAL);
1121 		return;
1122 	}
1123 
1124 	while (resid > 0) {
1125 		error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1126 		if (error == -1) {
1127 			send_error(errno);
1128 			return;
1129 		}
1130 		if (error == 0) {
1131 			send_error(EFAULT);
1132 			return;
1133 		}
1134 
1135 		/* Write bytes to current page. */
1136 		todo = getpagesize() - gpa % getpagesize();
1137 		if (todo > resid)
1138 			todo = resid;
1139 
1140 		cp = paddr_guest2host(ctx, gpa, todo);
1141 		if (cp != NULL) {
1142 			/*
1143 			 * If this page is guest RAM, write it a byte
1144 			 * at a time.
1145 			 */
1146 			while (todo > 0) {
1147 				assert(len >= 2);
1148 				*cp = parse_byte(data);
1149 				data += 2;
1150 				len -= 2;
1151 				cp++;
1152 				gpa++;
1153 				gva++;
1154 				resid--;
1155 				todo--;
1156 			}
1157 		} else {
1158 			/*
1159 			 * If this page isn't guest RAM, try to handle
1160 			 * it via MMIO.  For MMIO requests, use
1161 			 * aligned writes of words when possible.
1162 			 */
1163 			while (todo > 0) {
1164 				if (gpa & 1 || todo == 1) {
1165 					bytes = 1;
1166 					val = parse_byte(data);
1167 				} else if (gpa & 2 || todo == 2) {
1168 					bytes = 2;
1169 					val = be16toh(parse_integer(data, 4));
1170 				} else {
1171 					bytes = 4;
1172 					val = be32toh(parse_integer(data, 8));
1173 				}
1174 				error = write_mem(ctx, cur_vcpu, gpa, val,
1175 				    bytes);
1176 				if (error == 0) {
1177 					gpa += bytes;
1178 					gva += bytes;
1179 					resid -= bytes;
1180 					todo -= bytes;
1181 					data += 2 * bytes;
1182 					len -= 2 * bytes;
1183 				} else {
1184 					send_error(EFAULT);
1185 					return;
1186 				}
1187 			}
1188 		}
1189 		assert(resid == 0 || gpa % getpagesize() == 0);
1190 	}
1191 	assert(len == 0);
1192 	send_ok();
1193 }
1194 
1195 static bool
1196 set_breakpoint_caps(bool enable)
1197 {
1198 	cpuset_t mask;
1199 	int vcpu;
1200 
1201 	mask = vcpus_active;
1202 	while (!CPU_EMPTY(&mask)) {
1203 		vcpu = CPU_FFS(&mask) - 1;
1204 		CPU_CLR(vcpu, &mask);
1205 		if (vm_set_capability(ctx, vcpu, VM_CAP_BPT_EXIT,
1206 		    enable ? 1 : 0) < 0)
1207 			return (false);
1208 		debug("$vCPU %d %sabled breakpoint exits\n", vcpu,
1209 		    enable ? "en" : "dis");
1210 	}
1211 	return (true);
1212 }
1213 
1214 static void
1215 remove_all_sw_breakpoints(void)
1216 {
1217 	struct breakpoint *bp, *nbp;
1218 	uint8_t *cp;
1219 
1220 	if (TAILQ_EMPTY(&breakpoints))
1221 		return;
1222 
1223 	TAILQ_FOREACH_SAFE(bp, &breakpoints, link, nbp) {
1224 		debug("remove breakpoint at %#lx\n", bp->gpa);
1225 		cp = paddr_guest2host(ctx, bp->gpa, 1);
1226 		*cp = bp->shadow_inst;
1227 		TAILQ_REMOVE(&breakpoints, bp, link);
1228 		free(bp);
1229 	}
1230 	TAILQ_INIT(&breakpoints);
1231 	set_breakpoint_caps(false);
1232 }
1233 
1234 static void
1235 update_sw_breakpoint(uint64_t gva, int kind, bool insert)
1236 {
1237 	struct breakpoint *bp;
1238 	uint64_t gpa;
1239 	uint8_t *cp;
1240 	int error;
1241 
1242 	if (kind != 1) {
1243 		send_error(EINVAL);
1244 		return;
1245 	}
1246 
1247 	error = guest_vaddr2paddr(cur_vcpu, gva, &gpa);
1248 	if (error == -1) {
1249 		send_error(errno);
1250 		return;
1251 	}
1252 	if (error == 0) {
1253 		send_error(EFAULT);
1254 		return;
1255 	}
1256 
1257 	cp = paddr_guest2host(ctx, gpa, 1);
1258 
1259 	/* Only permit breakpoints in guest RAM. */
1260 	if (cp == NULL) {
1261 		send_error(EFAULT);
1262 		return;
1263 	}
1264 
1265 	/* Find any existing breakpoint. */
1266 	bp = find_breakpoint(gpa);
1267 
1268 	/*
1269 	 * Silently ignore duplicate commands since the protocol
1270 	 * requires these packets to be idempotent.
1271 	 */
1272 	if (insert) {
1273 		if (bp == NULL) {
1274 			if (TAILQ_EMPTY(&breakpoints) &&
1275 			    !set_breakpoint_caps(true)) {
1276 				send_empty_response();
1277 				return;
1278 			}
1279 			bp = malloc(sizeof(*bp));
1280 			bp->gpa = gpa;
1281 			bp->shadow_inst = *cp;
1282 			*cp = 0xcc;	/* INT 3 */
1283 			TAILQ_INSERT_TAIL(&breakpoints, bp, link);
1284 			debug("new breakpoint at %#lx\n", gpa);
1285 		}
1286 	} else {
1287 		if (bp != NULL) {
1288 			debug("remove breakpoint at %#lx\n", gpa);
1289 			*cp = bp->shadow_inst;
1290 			TAILQ_REMOVE(&breakpoints, bp, link);
1291 			free(bp);
1292 			if (TAILQ_EMPTY(&breakpoints))
1293 				set_breakpoint_caps(false);
1294 		}
1295 	}
1296 	send_ok();
1297 }
1298 
1299 static void
1300 parse_breakpoint(const uint8_t *data, size_t len)
1301 {
1302 	uint64_t gva;
1303 	uint8_t *cp;
1304 	bool insert;
1305 	int kind, type;
1306 
1307 	insert = data[0] == 'Z';
1308 
1309 	/* Skip 'Z/z' */
1310 	data += 1;
1311 	len -= 1;
1312 
1313 	/* Parse and consume type. */
1314 	cp = memchr(data, ',', len);
1315 	if (cp == NULL || cp == data) {
1316 		send_error(EINVAL);
1317 		return;
1318 	}
1319 	type = parse_integer(data, cp - data);
1320 	len -= (cp - data) + 1;
1321 	data += (cp - data) + 1;
1322 
1323 	/* Parse and consume address. */
1324 	cp = memchr(data, ',', len);
1325 	if (cp == NULL || cp == data) {
1326 		send_error(EINVAL);
1327 		return;
1328 	}
1329 	gva = parse_integer(data, cp - data);
1330 	len -= (cp - data) + 1;
1331 	data += (cp - data) + 1;
1332 
1333 	/* Parse and consume kind. */
1334 	cp = memchr(data, ';', len);
1335 	if (cp == data) {
1336 		send_error(EINVAL);
1337 		return;
1338 	}
1339 	if (cp != NULL) {
1340 		/*
1341 		 * We do not advertise support for either the
1342 		 * ConditionalBreakpoints or BreakpointCommands
1343 		 * features, so we should not be getting conditions or
1344 		 * commands from the remote end.
1345 		 */
1346 		send_empty_response();
1347 		return;
1348 	}
1349 	kind = parse_integer(data, len);
1350 	data += len;
1351 	len = 0;
1352 
1353 	switch (type) {
1354 	case 0:
1355 		update_sw_breakpoint(gva, kind, insert);
1356 		break;
1357 	default:
1358 		send_empty_response();
1359 		break;
1360 	}
1361 }
1362 
1363 static bool
1364 command_equals(const uint8_t *data, size_t len, const char *cmd)
1365 {
1366 
1367 	if (strlen(cmd) > len)
1368 		return (false);
1369 	return (memcmp(data, cmd, strlen(cmd)) == 0);
1370 }
1371 
1372 static void
1373 check_features(const uint8_t *data, size_t len)
1374 {
1375 	char *feature, *next_feature, *str, *value;
1376 	bool supported;
1377 
1378 	str = malloc(len + 1);
1379 	memcpy(str, data, len);
1380 	str[len] = '\0';
1381 	next_feature = str;
1382 
1383 	while ((feature = strsep(&next_feature, ";")) != NULL) {
1384 		/*
1385 		 * Null features shouldn't exist, but skip if they
1386 		 * do.
1387 		 */
1388 		if (strcmp(feature, "") == 0)
1389 			continue;
1390 
1391 		/*
1392 		 * Look for the value or supported / not supported
1393 		 * flag.
1394 		 */
1395 		value = strchr(feature, '=');
1396 		if (value != NULL) {
1397 			*value = '\0';
1398 			value++;
1399 			supported = true;
1400 		} else {
1401 			value = feature + strlen(feature) - 1;
1402 			switch (*value) {
1403 			case '+':
1404 				supported = true;
1405 				break;
1406 			case '-':
1407 				supported = false;
1408 				break;
1409 			default:
1410 				/*
1411 				 * This is really a protocol error,
1412 				 * but we just ignore malformed
1413 				 * features for ease of
1414 				 * implementation.
1415 				 */
1416 				continue;
1417 			}
1418 			value = NULL;
1419 		}
1420 
1421 		if (strcmp(feature, "swbreak") == 0)
1422 			swbreak_enabled = supported;
1423 	}
1424 	free(str);
1425 
1426 	start_packet();
1427 
1428 	/* This is an arbitrary limit. */
1429 	append_string("PacketSize=4096");
1430 	append_string(";swbreak+");
1431 	finish_packet();
1432 }
1433 
1434 static void
1435 gdb_query(const uint8_t *data, size_t len)
1436 {
1437 
1438 	/*
1439 	 * TODO:
1440 	 * - qSearch
1441 	 */
1442 	if (command_equals(data, len, "qAttached")) {
1443 		start_packet();
1444 		append_char('1');
1445 		finish_packet();
1446 	} else if (command_equals(data, len, "qC")) {
1447 		start_packet();
1448 		append_string("QC");
1449 		append_integer(cur_vcpu + 1);
1450 		finish_packet();
1451 	} else if (command_equals(data, len, "qfThreadInfo")) {
1452 		cpuset_t mask;
1453 		bool first;
1454 		int vcpu;
1455 
1456 		if (CPU_EMPTY(&vcpus_active)) {
1457 			send_error(EINVAL);
1458 			return;
1459 		}
1460 		mask = vcpus_active;
1461 		start_packet();
1462 		append_char('m');
1463 		first = true;
1464 		while (!CPU_EMPTY(&mask)) {
1465 			vcpu = CPU_FFS(&mask) - 1;
1466 			CPU_CLR(vcpu, &mask);
1467 			if (first)
1468 				first = false;
1469 			else
1470 				append_char(',');
1471 			append_integer(vcpu + 1);
1472 		}
1473 		finish_packet();
1474 	} else if (command_equals(data, len, "qsThreadInfo")) {
1475 		start_packet();
1476 		append_char('l');
1477 		finish_packet();
1478 	} else if (command_equals(data, len, "qSupported")) {
1479 		data += strlen("qSupported");
1480 		len -= strlen("qSupported");
1481 		check_features(data, len);
1482 	} else if (command_equals(data, len, "qThreadExtraInfo")) {
1483 		char buf[16];
1484 		int tid;
1485 
1486 		data += strlen("qThreadExtraInfo");
1487 		len -= strlen("qThreadExtraInfo");
1488 		if (*data != ',') {
1489 			send_error(EINVAL);
1490 			return;
1491 		}
1492 		tid = parse_threadid(data + 1, len - 1);
1493 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1494 			send_error(EINVAL);
1495 			return;
1496 		}
1497 
1498 		snprintf(buf, sizeof(buf), "vCPU %d", tid - 1);
1499 		start_packet();
1500 		append_asciihex(buf);
1501 		finish_packet();
1502 	} else
1503 		send_empty_response();
1504 }
1505 
1506 static void
1507 handle_command(const uint8_t *data, size_t len)
1508 {
1509 
1510 	/* Reject packets with a sequence-id. */
1511 	if (len >= 3 && data[0] >= '0' && data[0] <= '9' &&
1512 	    data[0] >= '0' && data[0] <= '9' && data[2] == ':') {
1513 		send_empty_response();
1514 		return;
1515 	}
1516 
1517 	switch (*data) {
1518 	case 'c':
1519 		if (len != 1) {
1520 			send_error(EINVAL);
1521 			break;
1522 		}
1523 
1524 		discard_stop();
1525 		gdb_resume_vcpus();
1526 		break;
1527 	case 'D':
1528 		send_ok();
1529 
1530 		/* TODO: Resume any stopped CPUs. */
1531 		break;
1532 	case 'g': {
1533 		gdb_read_regs();
1534 		break;
1535 	}
1536 	case 'H': {
1537 		int tid;
1538 
1539 		if (data[1] != 'g' && data[1] != 'c') {
1540 			send_error(EINVAL);
1541 			break;
1542 		}
1543 		tid = parse_threadid(data + 2, len - 2);
1544 		if (tid == -2) {
1545 			send_error(EINVAL);
1546 			break;
1547 		}
1548 
1549 		if (CPU_EMPTY(&vcpus_active)) {
1550 			send_error(EINVAL);
1551 			break;
1552 		}
1553 		if (tid == -1 || tid == 0)
1554 			cur_vcpu = CPU_FFS(&vcpus_active) - 1;
1555 		else if (CPU_ISSET(tid - 1, &vcpus_active))
1556 			cur_vcpu = tid - 1;
1557 		else {
1558 			send_error(EINVAL);
1559 			break;
1560 		}
1561 		send_ok();
1562 		break;
1563 	}
1564 	case 'm':
1565 		gdb_read_mem(data, len);
1566 		break;
1567 	case 'M':
1568 		gdb_write_mem(data, len);
1569 		break;
1570 	case 'T': {
1571 		int tid;
1572 
1573 		tid = parse_threadid(data + 1, len - 1);
1574 		if (tid <= 0 || !CPU_ISSET(tid - 1, &vcpus_active)) {
1575 			send_error(EINVAL);
1576 			return;
1577 		}
1578 		send_ok();
1579 		break;
1580 	}
1581 	case 'q':
1582 		gdb_query(data, len);
1583 		break;
1584 	case 's':
1585 		if (len != 1) {
1586 			send_error(EINVAL);
1587 			break;
1588 		}
1589 
1590 		/* Don't send a reply until a stop occurs. */
1591 		if (!gdb_step_vcpu(cur_vcpu)) {
1592 			send_error(EOPNOTSUPP);
1593 			break;
1594 		}
1595 		break;
1596 	case 'z':
1597 	case 'Z':
1598 		parse_breakpoint(data, len);
1599 		break;
1600 	case '?':
1601 		report_stop(false);
1602 		break;
1603 	case 'G': /* TODO */
1604 	case 'v':
1605 		/* Handle 'vCont' */
1606 		/* 'vCtrlC' */
1607 	case 'p': /* TODO */
1608 	case 'P': /* TODO */
1609 	case 'Q': /* TODO */
1610 	case 't': /* TODO */
1611 	case 'X': /* TODO */
1612 	default:
1613 		send_empty_response();
1614 	}
1615 }
1616 
1617 /* Check for a valid packet in the command buffer. */
1618 static void
1619 check_command(int fd)
1620 {
1621 	uint8_t *head, *hash, *p, sum;
1622 	size_t avail, plen;
1623 
1624 	for (;;) {
1625 		avail = cur_comm.len;
1626 		if (avail == 0)
1627 			return;
1628 		head = io_buffer_head(&cur_comm);
1629 		switch (*head) {
1630 		case 0x03:
1631 			debug("<- Ctrl-C\n");
1632 			io_buffer_consume(&cur_comm, 1);
1633 
1634 			gdb_suspend_vcpus();
1635 			break;
1636 		case '+':
1637 			/* ACK of previous response. */
1638 			debug("<- +\n");
1639 			if (response_pending())
1640 				io_buffer_reset(&cur_resp);
1641 			io_buffer_consume(&cur_comm, 1);
1642 			if (stopped_vcpu != -1 && report_next_stop) {
1643 				report_stop(true);
1644 				send_pending_data(fd);
1645 			}
1646 			break;
1647 		case '-':
1648 			/* NACK of previous response. */
1649 			debug("<- -\n");
1650 			if (response_pending()) {
1651 				cur_resp.len += cur_resp.start;
1652 				cur_resp.start = 0;
1653 				if (cur_resp.data[0] == '+')
1654 					io_buffer_advance(&cur_resp, 1);
1655 				debug("-> %.*s\n", (int)cur_resp.len,
1656 				    io_buffer_head(&cur_resp));
1657 			}
1658 			io_buffer_consume(&cur_comm, 1);
1659 			send_pending_data(fd);
1660 			break;
1661 		case '$':
1662 			/* Packet. */
1663 
1664 			if (response_pending()) {
1665 				warnx("New GDB command while response in "
1666 				    "progress");
1667 				io_buffer_reset(&cur_resp);
1668 			}
1669 
1670 			/* Is packet complete? */
1671 			hash = memchr(head, '#', avail);
1672 			if (hash == NULL)
1673 				return;
1674 			plen = (hash - head + 1) + 2;
1675 			if (avail < plen)
1676 				return;
1677 			debug("<- %.*s\n", (int)plen, head);
1678 
1679 			/* Verify checksum. */
1680 			for (sum = 0, p = head + 1; p < hash; p++)
1681 				sum += *p;
1682 			if (sum != parse_byte(hash + 1)) {
1683 				io_buffer_consume(&cur_comm, plen);
1684 				debug("-> -\n");
1685 				send_char('-');
1686 				send_pending_data(fd);
1687 				break;
1688 			}
1689 			send_char('+');
1690 
1691 			handle_command(head + 1, hash - (head + 1));
1692 			io_buffer_consume(&cur_comm, plen);
1693 			if (!response_pending())
1694 				debug("-> +\n");
1695 			send_pending_data(fd);
1696 			break;
1697 		default:
1698 			/* XXX: Possibly drop connection instead. */
1699 			debug("-> %02x\n", *head);
1700 			io_buffer_consume(&cur_comm, 1);
1701 			break;
1702 		}
1703 	}
1704 }
1705 
1706 static void
1707 gdb_readable(int fd, enum ev_type event, void *arg)
1708 {
1709 	ssize_t nread;
1710 	int pending;
1711 
1712 	if (ioctl(fd, FIONREAD, &pending) == -1) {
1713 		warn("FIONREAD on GDB socket");
1714 		return;
1715 	}
1716 
1717 	/*
1718 	 * 'pending' might be zero due to EOF.  We need to call read
1719 	 * with a non-zero length to detect EOF.
1720 	 */
1721 	if (pending == 0)
1722 		pending = 1;
1723 
1724 	/* Ensure there is room in the command buffer. */
1725 	io_buffer_grow(&cur_comm, pending);
1726 	assert(io_buffer_avail(&cur_comm) >= pending);
1727 
1728 	nread = read(fd, io_buffer_tail(&cur_comm), io_buffer_avail(&cur_comm));
1729 	if (nread == 0) {
1730 		close_connection();
1731 	} else if (nread == -1) {
1732 		if (errno == EAGAIN)
1733 			return;
1734 
1735 		warn("Read from GDB socket");
1736 		close_connection();
1737 	} else {
1738 		cur_comm.len += nread;
1739 		pthread_mutex_lock(&gdb_lock);
1740 		check_command(fd);
1741 		pthread_mutex_unlock(&gdb_lock);
1742 	}
1743 }
1744 
1745 static void
1746 gdb_writable(int fd, enum ev_type event, void *arg)
1747 {
1748 
1749 	send_pending_data(fd);
1750 }
1751 
1752 static void
1753 new_connection(int fd, enum ev_type event, void *arg)
1754 {
1755 	int optval, s;
1756 
1757 	s = accept4(fd, NULL, NULL, SOCK_NONBLOCK);
1758 	if (s == -1) {
1759 		if (arg != NULL)
1760 			err(1, "Failed accepting initial GDB connection");
1761 
1762 		/* Silently ignore errors post-startup. */
1763 		return;
1764 	}
1765 
1766 	optval = 1;
1767 	if (setsockopt(s, SOL_SOCKET, SO_NOSIGPIPE, &optval, sizeof(optval)) ==
1768 	    -1) {
1769 		warn("Failed to disable SIGPIPE for GDB connection");
1770 		close(s);
1771 		return;
1772 	}
1773 
1774 	pthread_mutex_lock(&gdb_lock);
1775 	if (cur_fd != -1) {
1776 		close(s);
1777 		warnx("Ignoring additional GDB connection.");
1778 	}
1779 
1780 	read_event = mevent_add(s, EVF_READ, gdb_readable, NULL);
1781 	if (read_event == NULL) {
1782 		if (arg != NULL)
1783 			err(1, "Failed to setup initial GDB connection");
1784 		pthread_mutex_unlock(&gdb_lock);
1785 		return;
1786 	}
1787 	write_event = mevent_add(s, EVF_WRITE, gdb_writable, NULL);
1788 	if (write_event == NULL) {
1789 		if (arg != NULL)
1790 			err(1, "Failed to setup initial GDB connection");
1791 		mevent_delete_close(read_event);
1792 		read_event = NULL;
1793 	}
1794 
1795 	cur_fd = s;
1796 	cur_vcpu = 0;
1797 	stopped_vcpu = -1;
1798 
1799 	/* Break on attach. */
1800 	first_stop = true;
1801 	report_next_stop = false;
1802 	gdb_suspend_vcpus();
1803 	pthread_mutex_unlock(&gdb_lock);
1804 }
1805 
1806 #ifndef WITHOUT_CAPSICUM
1807 void
1808 limit_gdb_socket(int s)
1809 {
1810 	cap_rights_t rights;
1811 	unsigned long ioctls[] = { FIONREAD };
1812 
1813 	cap_rights_init(&rights, CAP_ACCEPT, CAP_EVENT, CAP_READ, CAP_WRITE,
1814 	    CAP_SETSOCKOPT, CAP_IOCTL);
1815 	if (caph_rights_limit(s, &rights) == -1)
1816 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1817 	if (caph_ioctls_limit(s, ioctls, nitems(ioctls)) == -1)
1818 		errx(EX_OSERR, "Unable to apply rights for sandbox");
1819 }
1820 #endif
1821 
1822 void
1823 init_gdb(struct vmctx *_ctx)
1824 {
1825 	int error, flags, optval, s;
1826 	struct addrinfo hints;
1827 	struct addrinfo *gdbaddr;
1828 	const char *saddr, *value;
1829 	char *sport;
1830 	bool wait;
1831 
1832 	value = get_config_value("gdb.port");
1833 	if (value == NULL)
1834 		return;
1835 	sport = strdup(value);
1836 	if (sport == NULL)
1837 		errx(4, "Failed to allocate memory");
1838 
1839 	wait = get_config_bool_default("gdb.wait", false);
1840 
1841 	saddr = get_config_value("gdb.address");
1842 	if (saddr == NULL) {
1843 		saddr = "localhost";
1844 	}
1845 
1846 	debug("==> starting on %s:%s, %swaiting\n",
1847 	    saddr, sport, wait ? "" : "not ");
1848 
1849 	error = pthread_mutex_init(&gdb_lock, NULL);
1850 	if (error != 0)
1851 		errc(1, error, "gdb mutex init");
1852 	error = pthread_cond_init(&idle_vcpus, NULL);
1853 	if (error != 0)
1854 		errc(1, error, "gdb cv init");
1855 
1856 	memset(&hints, 0, sizeof(hints));
1857 	hints.ai_family = AF_UNSPEC;
1858 	hints.ai_socktype = SOCK_STREAM;
1859 	hints.ai_flags = AI_NUMERICSERV | AI_PASSIVE;
1860 
1861 	if (getaddrinfo(saddr, sport, &hints, &gdbaddr) != 0)
1862 		err(1, "gdb address resolve");
1863 
1864 	ctx = _ctx;
1865 	s = socket(gdbaddr->ai_family, gdbaddr->ai_socktype, 0);
1866 	if (s < 0)
1867 		err(1, "gdb socket create");
1868 
1869 	optval = 1;
1870 	(void)setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval));
1871 
1872 	if (bind(s, gdbaddr->ai_addr, gdbaddr->ai_addrlen) < 0)
1873 		err(1, "gdb socket bind");
1874 
1875 	if (listen(s, 1) < 0)
1876 		err(1, "gdb socket listen");
1877 
1878 	stopped_vcpu = -1;
1879 	TAILQ_INIT(&breakpoints);
1880 	vcpu_state = calloc(guest_ncpus, sizeof(*vcpu_state));
1881 	if (wait) {
1882 		/*
1883 		 * Set vcpu 0 in vcpus_suspended.  This will trigger the
1884 		 * logic in gdb_cpu_add() to suspend the first vcpu before
1885 		 * it starts execution.  The vcpu will remain suspended
1886 		 * until a debugger connects.
1887 		 */
1888 		CPU_SET(0, &vcpus_suspended);
1889 		stopped_vcpu = 0;
1890 	}
1891 
1892 	flags = fcntl(s, F_GETFL);
1893 	if (fcntl(s, F_SETFL, flags | O_NONBLOCK) == -1)
1894 		err(1, "Failed to mark gdb socket non-blocking");
1895 
1896 #ifndef WITHOUT_CAPSICUM
1897 	limit_gdb_socket(s);
1898 #endif
1899 	mevent_add(s, EVF_READ, new_connection, NULL);
1900 	gdb_active = true;
1901 	freeaddrinfo(gdbaddr);
1902 	free(sport);
1903 }
1904