1f343346bSRichard Henderson /*
2f343346bSRichard Henderson * Interface to the capstone disassembler.
3f343346bSRichard Henderson * SPDX-License-Identifier: GPL-2.0-or-later
4f343346bSRichard Henderson */
5f343346bSRichard Henderson
6f343346bSRichard Henderson #include "qemu/osdep.h"
7f343346bSRichard Henderson #include "qemu/bswap.h"
8f343346bSRichard Henderson #include "disas/dis-asm.h"
9f343346bSRichard Henderson #include "disas/capstone.h"
10f343346bSRichard Henderson
11f343346bSRichard Henderson
12f343346bSRichard Henderson /*
13f343346bSRichard Henderson * Temporary storage for the capstone library. This will be alloced via
14f343346bSRichard Henderson * malloc with a size private to the library; thus there's no reason not
15f343346bSRichard Henderson * to share this across calls and across host vs target disassembly.
16f343346bSRichard Henderson */
17f343346bSRichard Henderson static __thread cs_insn *cap_insn;
18f343346bSRichard Henderson
19f343346bSRichard Henderson /*
20c6d3da96SRichard Henderson * The capstone library always skips 2 bytes for S390X.
21c6d3da96SRichard Henderson * This is less than ideal, since we can tell from the first two bits
22c6d3da96SRichard Henderson * the size of the insn and thus stay in sync with the insn stream.
23c6d3da96SRichard Henderson */
24c6d3da96SRichard Henderson static size_t CAPSTONE_API
cap_skipdata_s390x_cb(const uint8_t * code,size_t code_size,size_t offset,void * user_data)25c6d3da96SRichard Henderson cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
26c6d3da96SRichard Henderson size_t offset, void *user_data)
27c6d3da96SRichard Henderson {
28c6d3da96SRichard Henderson size_t ilen;
29c6d3da96SRichard Henderson
30c6d3da96SRichard Henderson /* See get_ilen() in target/s390x/internal.h. */
31c6d3da96SRichard Henderson switch (code[offset] >> 6) {
32c6d3da96SRichard Henderson case 0:
33c6d3da96SRichard Henderson ilen = 2;
34c6d3da96SRichard Henderson break;
35c6d3da96SRichard Henderson case 1:
36c6d3da96SRichard Henderson case 2:
37c6d3da96SRichard Henderson ilen = 4;
38c6d3da96SRichard Henderson break;
39c6d3da96SRichard Henderson default:
40c6d3da96SRichard Henderson ilen = 6;
41c6d3da96SRichard Henderson break;
42c6d3da96SRichard Henderson }
43c6d3da96SRichard Henderson
44c6d3da96SRichard Henderson return ilen;
45c6d3da96SRichard Henderson }
46c6d3da96SRichard Henderson
47c6d3da96SRichard Henderson static const cs_opt_skipdata cap_skipdata_s390x = {
48c6d3da96SRichard Henderson .mnemonic = ".byte",
49c6d3da96SRichard Henderson .callback = cap_skipdata_s390x_cb
50c6d3da96SRichard Henderson };
51c6d3da96SRichard Henderson
52c6d3da96SRichard Henderson /*
53f343346bSRichard Henderson * Initialize the Capstone library.
54f343346bSRichard Henderson *
55f343346bSRichard Henderson * ??? It would be nice to cache this. We would need one handle for the
56f343346bSRichard Henderson * host and one for the target. For most targets we can reset specific
57f343346bSRichard Henderson * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
58f343346bSRichard Henderson * CS_ARCH_* in this way. Thus we would need to be able to close and
59f343346bSRichard Henderson * re-open the target handle with a different arch for the target in order
60f343346bSRichard Henderson * to handle AArch64 vs AArch32 mode switching.
61f343346bSRichard Henderson */
cap_disas_start(disassemble_info * info,csh * handle)62f343346bSRichard Henderson static cs_err cap_disas_start(disassemble_info *info, csh *handle)
63f343346bSRichard Henderson {
64f343346bSRichard Henderson cs_mode cap_mode = info->cap_mode;
65f343346bSRichard Henderson cs_err err;
66f343346bSRichard Henderson
67f343346bSRichard Henderson cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
68f343346bSRichard Henderson : CS_MODE_LITTLE_ENDIAN);
69f343346bSRichard Henderson
70f343346bSRichard Henderson err = cs_open(info->cap_arch, cap_mode, handle);
71f343346bSRichard Henderson if (err != CS_ERR_OK) {
72f343346bSRichard Henderson return err;
73f343346bSRichard Henderson }
74f343346bSRichard Henderson
75f343346bSRichard Henderson /* "Disassemble" unknown insns as ".byte W,X,Y,Z". */
76f343346bSRichard Henderson cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
77f343346bSRichard Henderson
78c6d3da96SRichard Henderson switch (info->cap_arch) {
79c6d3da96SRichard Henderson case CS_ARCH_SYSZ:
80c6d3da96SRichard Henderson cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
81c6d3da96SRichard Henderson (uintptr_t)&cap_skipdata_s390x);
82c6d3da96SRichard Henderson break;
83c6d3da96SRichard Henderson
84c6d3da96SRichard Henderson case CS_ARCH_X86:
85f343346bSRichard Henderson /*
86f343346bSRichard Henderson * We don't care about errors (if for some reason the library
87f343346bSRichard Henderson * is compiled without AT&T syntax); the user will just have
88f343346bSRichard Henderson * to deal with the Intel syntax.
89f343346bSRichard Henderson */
90f343346bSRichard Henderson cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
91c6d3da96SRichard Henderson break;
92f343346bSRichard Henderson }
93f343346bSRichard Henderson
94f343346bSRichard Henderson /* Allocate temp space for cs_disasm_iter. */
95f343346bSRichard Henderson if (cap_insn == NULL) {
96f343346bSRichard Henderson cap_insn = cs_malloc(*handle);
97f343346bSRichard Henderson if (cap_insn == NULL) {
98f343346bSRichard Henderson cs_close(handle);
99f343346bSRichard Henderson return CS_ERR_MEM;
100f343346bSRichard Henderson }
101f343346bSRichard Henderson }
102f343346bSRichard Henderson return CS_ERR_OK;
103f343346bSRichard Henderson }
104f343346bSRichard Henderson
cap_dump_insn_units(disassemble_info * info,cs_insn * insn,int i,int n)105f343346bSRichard Henderson static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
106f343346bSRichard Henderson int i, int n)
107f343346bSRichard Henderson {
108f343346bSRichard Henderson fprintf_function print = info->fprintf_func;
109f343346bSRichard Henderson FILE *stream = info->stream;
110f343346bSRichard Henderson
111f343346bSRichard Henderson switch (info->cap_insn_unit) {
112f343346bSRichard Henderson case 4:
113f343346bSRichard Henderson if (info->endian == BFD_ENDIAN_BIG) {
114f343346bSRichard Henderson for (; i < n; i += 4) {
115f343346bSRichard Henderson print(stream, " %08x", ldl_be_p(insn->bytes + i));
116f343346bSRichard Henderson
117f343346bSRichard Henderson }
118f343346bSRichard Henderson } else {
119f343346bSRichard Henderson for (; i < n; i += 4) {
120f343346bSRichard Henderson print(stream, " %08x", ldl_le_p(insn->bytes + i));
121f343346bSRichard Henderson }
122f343346bSRichard Henderson }
123f343346bSRichard Henderson break;
124f343346bSRichard Henderson
125f343346bSRichard Henderson case 2:
126f343346bSRichard Henderson if (info->endian == BFD_ENDIAN_BIG) {
127f343346bSRichard Henderson for (; i < n; i += 2) {
128f343346bSRichard Henderson print(stream, " %04x", lduw_be_p(insn->bytes + i));
129f343346bSRichard Henderson }
130f343346bSRichard Henderson } else {
131f343346bSRichard Henderson for (; i < n; i += 2) {
132f343346bSRichard Henderson print(stream, " %04x", lduw_le_p(insn->bytes + i));
133f343346bSRichard Henderson }
134f343346bSRichard Henderson }
135f343346bSRichard Henderson break;
136f343346bSRichard Henderson
137f343346bSRichard Henderson default:
138f343346bSRichard Henderson for (; i < n; i++) {
139f343346bSRichard Henderson print(stream, " %02x", insn->bytes[i]);
140f343346bSRichard Henderson }
141f343346bSRichard Henderson break;
142f343346bSRichard Henderson }
143f343346bSRichard Henderson }
144f343346bSRichard Henderson
cap_dump_insn(disassemble_info * info,cs_insn * insn)145f343346bSRichard Henderson static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
146f343346bSRichard Henderson {
147f343346bSRichard Henderson fprintf_function print = info->fprintf_func;
148f343346bSRichard Henderson FILE *stream = info->stream;
149f343346bSRichard Henderson int i, n, split;
150f343346bSRichard Henderson
151f343346bSRichard Henderson print(stream, "0x%08" PRIx64 ": ", insn->address);
152f343346bSRichard Henderson
153f343346bSRichard Henderson n = insn->size;
154f343346bSRichard Henderson split = info->cap_insn_split;
155f343346bSRichard Henderson
156f343346bSRichard Henderson /* Dump the first SPLIT bytes of the instruction. */
157f343346bSRichard Henderson cap_dump_insn_units(info, insn, 0, MIN(n, split));
158f343346bSRichard Henderson
159f343346bSRichard Henderson /* Add padding up to SPLIT so that mnemonics line up. */
160f343346bSRichard Henderson if (n < split) {
161f343346bSRichard Henderson int width = (split - n) / info->cap_insn_unit;
162f343346bSRichard Henderson width *= (2 * info->cap_insn_unit + 1);
163f343346bSRichard Henderson print(stream, "%*s", width, "");
164f343346bSRichard Henderson }
165f343346bSRichard Henderson
166f343346bSRichard Henderson /* Print the actual instruction. */
167f343346bSRichard Henderson print(stream, " %-8s %s\n", insn->mnemonic, insn->op_str);
168f343346bSRichard Henderson
169f343346bSRichard Henderson /* Dump any remaining part of the insn on subsequent lines. */
170f343346bSRichard Henderson for (i = split; i < n; i += split) {
171f343346bSRichard Henderson print(stream, "0x%08" PRIx64 ": ", insn->address + i);
172f343346bSRichard Henderson cap_dump_insn_units(info, insn, i, MIN(n, i + split));
173f343346bSRichard Henderson print(stream, "\n");
174f343346bSRichard Henderson }
175f343346bSRichard Henderson }
176f343346bSRichard Henderson
177f343346bSRichard Henderson /* Disassemble SIZE bytes at PC for the target. */
cap_disas_target(disassemble_info * info,uint64_t pc,size_t size)178f343346bSRichard Henderson bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
179f343346bSRichard Henderson {
180f343346bSRichard Henderson uint8_t cap_buf[1024];
181f343346bSRichard Henderson csh handle;
182f343346bSRichard Henderson cs_insn *insn;
183f343346bSRichard Henderson size_t csize = 0;
184f343346bSRichard Henderson
185f343346bSRichard Henderson if (cap_disas_start(info, &handle) != CS_ERR_OK) {
186f343346bSRichard Henderson return false;
187f343346bSRichard Henderson }
188f343346bSRichard Henderson insn = cap_insn;
189f343346bSRichard Henderson
190f343346bSRichard Henderson while (1) {
191f343346bSRichard Henderson size_t tsize = MIN(sizeof(cap_buf) - csize, size);
192f343346bSRichard Henderson const uint8_t *cbuf = cap_buf;
193f343346bSRichard Henderson
194*90bbf9d9SAlex Bennée if (info->read_memory_func(pc + csize, cap_buf + csize, tsize, info) == 0) {
195f343346bSRichard Henderson csize += tsize;
196f343346bSRichard Henderson size -= tsize;
197f343346bSRichard Henderson
198f343346bSRichard Henderson while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
199f343346bSRichard Henderson cap_dump_insn(info, insn);
200f343346bSRichard Henderson }
201f343346bSRichard Henderson
202f343346bSRichard Henderson /* If the target memory is not consumed, go back for more... */
203f343346bSRichard Henderson if (size != 0) {
204f343346bSRichard Henderson /*
205f343346bSRichard Henderson * ... taking care to move any remaining fractional insn
206f343346bSRichard Henderson * to the beginning of the buffer.
207f343346bSRichard Henderson */
208f343346bSRichard Henderson if (csize != 0) {
209f343346bSRichard Henderson memmove(cap_buf, cbuf, csize);
210f343346bSRichard Henderson }
211f343346bSRichard Henderson continue;
212f343346bSRichard Henderson }
213f343346bSRichard Henderson
214f343346bSRichard Henderson /*
215f343346bSRichard Henderson * Since the target memory is consumed, we should not have
216f343346bSRichard Henderson * a remaining fractional insn.
217f343346bSRichard Henderson */
218f343346bSRichard Henderson if (csize != 0) {
219f343346bSRichard Henderson info->fprintf_func(info->stream,
220f343346bSRichard Henderson "Disassembler disagrees with translator "
221f343346bSRichard Henderson "over instruction decoding\n"
222f343346bSRichard Henderson "Please report this to qemu-devel@nongnu.org\n");
223f343346bSRichard Henderson }
224f343346bSRichard Henderson break;
225*90bbf9d9SAlex Bennée
226*90bbf9d9SAlex Bennée } else {
227*90bbf9d9SAlex Bennée info->fprintf_func(info->stream,
228*90bbf9d9SAlex Bennée "0x%08" PRIx64 ": unable to read memory\n", pc);
229*90bbf9d9SAlex Bennée break;
230*90bbf9d9SAlex Bennée }
231f343346bSRichard Henderson }
232f343346bSRichard Henderson
233f343346bSRichard Henderson cs_close(&handle);
234f343346bSRichard Henderson return true;
235f343346bSRichard Henderson }
236f343346bSRichard Henderson
237f343346bSRichard Henderson /* Disassemble SIZE bytes at CODE for the host. */
cap_disas_host(disassemble_info * info,const void * code,size_t size)238f06176beSRichard Henderson bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
239f343346bSRichard Henderson {
240f343346bSRichard Henderson csh handle;
241f343346bSRichard Henderson const uint8_t *cbuf;
242f343346bSRichard Henderson cs_insn *insn;
243f343346bSRichard Henderson uint64_t pc;
244f343346bSRichard Henderson
245f343346bSRichard Henderson if (cap_disas_start(info, &handle) != CS_ERR_OK) {
246f343346bSRichard Henderson return false;
247f343346bSRichard Henderson }
248f343346bSRichard Henderson insn = cap_insn;
249f343346bSRichard Henderson
250f343346bSRichard Henderson cbuf = code;
251f343346bSRichard Henderson pc = (uintptr_t)code;
252f343346bSRichard Henderson
253f343346bSRichard Henderson while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
254f343346bSRichard Henderson cap_dump_insn(info, insn);
255f343346bSRichard Henderson }
256f343346bSRichard Henderson if (size != 0) {
257f343346bSRichard Henderson info->fprintf_func(info->stream,
258f343346bSRichard Henderson "Disassembler disagrees with TCG over instruction encoding\n"
259f343346bSRichard Henderson "Please report this to qemu-devel@nongnu.org\n");
260f343346bSRichard Henderson }
261f343346bSRichard Henderson
262f343346bSRichard Henderson cs_close(&handle);
263f343346bSRichard Henderson return true;
264f343346bSRichard Henderson }
265f343346bSRichard Henderson
266f343346bSRichard Henderson /* Disassemble COUNT insns at PC for the target. */
cap_disas_monitor(disassemble_info * info,uint64_t pc,int count)267f343346bSRichard Henderson bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
268f343346bSRichard Henderson {
269f343346bSRichard Henderson uint8_t cap_buf[32];
270f343346bSRichard Henderson csh handle;
271f343346bSRichard Henderson cs_insn *insn;
272f343346bSRichard Henderson size_t csize = 0;
273f343346bSRichard Henderson
274f343346bSRichard Henderson if (cap_disas_start(info, &handle) != CS_ERR_OK) {
275f343346bSRichard Henderson return false;
276f343346bSRichard Henderson }
277f343346bSRichard Henderson insn = cap_insn;
278f343346bSRichard Henderson
279f343346bSRichard Henderson while (1) {
280f343346bSRichard Henderson /*
281f343346bSRichard Henderson * We want to read memory for one insn, but generically we do not
282f343346bSRichard Henderson * know how much memory that is. We have a small buffer which is
283f343346bSRichard Henderson * known to be sufficient for all supported targets. Try to not
284f343346bSRichard Henderson * read beyond the page, Just In Case. For even more simplicity,
285f343346bSRichard Henderson * ignore the actual target page size and use a 1k boundary. If
286f343346bSRichard Henderson * that turns out to be insufficient, we'll come back around the
287f343346bSRichard Henderson * loop and read more.
288f343346bSRichard Henderson */
289f343346bSRichard Henderson uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
290f343346bSRichard Henderson size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
291f343346bSRichard Henderson const uint8_t *cbuf = cap_buf;
292f343346bSRichard Henderson
293f343346bSRichard Henderson /* Make certain that we can make progress. */
294f343346bSRichard Henderson assert(tsize != 0);
295*90bbf9d9SAlex Bennée if (info->read_memory_func(pc + csize, cap_buf + csize,
296*90bbf9d9SAlex Bennée tsize, info) == 0)
297*90bbf9d9SAlex Bennée {
298f343346bSRichard Henderson csize += tsize;
299f343346bSRichard Henderson
300f343346bSRichard Henderson if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
301f343346bSRichard Henderson cap_dump_insn(info, insn);
302f343346bSRichard Henderson if (--count <= 0) {
303f343346bSRichard Henderson break;
304f343346bSRichard Henderson }
305f343346bSRichard Henderson }
306f343346bSRichard Henderson memmove(cap_buf, cbuf, csize);
307*90bbf9d9SAlex Bennée } else {
308*90bbf9d9SAlex Bennée info->fprintf_func(info->stream,
309*90bbf9d9SAlex Bennée "0x%08" PRIx64 ": unable to read memory\n", pc);
310*90bbf9d9SAlex Bennée break;
311*90bbf9d9SAlex Bennée }
312f343346bSRichard Henderson }
313f343346bSRichard Henderson
314f343346bSRichard Henderson cs_close(&handle);
315f343346bSRichard Henderson return true;
316f343346bSRichard Henderson }
317f343346bSRichard Henderson
318f343346bSRichard Henderson /* Disassemble a single instruction directly into plugin output */
cap_disas_plugin(disassemble_info * info,uint64_t pc,size_t size)319f343346bSRichard Henderson bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
320f343346bSRichard Henderson {
321f343346bSRichard Henderson uint8_t cap_buf[32];
322f343346bSRichard Henderson const uint8_t *cbuf = cap_buf;
323f343346bSRichard Henderson csh handle;
324f343346bSRichard Henderson
325f343346bSRichard Henderson if (cap_disas_start(info, &handle) != CS_ERR_OK) {
326f343346bSRichard Henderson return false;
327f343346bSRichard Henderson }
328f343346bSRichard Henderson
329f343346bSRichard Henderson assert(size < sizeof(cap_buf));
330f343346bSRichard Henderson info->read_memory_func(pc, cap_buf, size, info);
331f343346bSRichard Henderson
332f343346bSRichard Henderson if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
333f343346bSRichard Henderson info->fprintf_func(info->stream, "%s %s",
334f343346bSRichard Henderson cap_insn->mnemonic, cap_insn->op_str);
335f343346bSRichard Henderson }
336f343346bSRichard Henderson
337f343346bSRichard Henderson cs_close(&handle);
338f343346bSRichard Henderson return true;
339f343346bSRichard Henderson }
340