xref: /qemu/disas/capstone.c (revision 90bbf9d9)
1f343346bSRichard Henderson /*
2f343346bSRichard Henderson  * Interface to the capstone disassembler.
3f343346bSRichard Henderson  * SPDX-License-Identifier: GPL-2.0-or-later
4f343346bSRichard Henderson  */
5f343346bSRichard Henderson 
6f343346bSRichard Henderson #include "qemu/osdep.h"
7f343346bSRichard Henderson #include "qemu/bswap.h"
8f343346bSRichard Henderson #include "disas/dis-asm.h"
9f343346bSRichard Henderson #include "disas/capstone.h"
10f343346bSRichard Henderson 
11f343346bSRichard Henderson 
12f343346bSRichard Henderson /*
13f343346bSRichard Henderson  * Temporary storage for the capstone library.  This will be alloced via
14f343346bSRichard Henderson  * malloc with a size private to the library; thus there's no reason not
15f343346bSRichard Henderson  * to share this across calls and across host vs target disassembly.
16f343346bSRichard Henderson  */
17f343346bSRichard Henderson static __thread cs_insn *cap_insn;
18f343346bSRichard Henderson 
19f343346bSRichard Henderson /*
20c6d3da96SRichard Henderson  * The capstone library always skips 2 bytes for S390X.
21c6d3da96SRichard Henderson  * This is less than ideal, since we can tell from the first two bits
22c6d3da96SRichard Henderson  * the size of the insn and thus stay in sync with the insn stream.
23c6d3da96SRichard Henderson  */
24c6d3da96SRichard Henderson static size_t CAPSTONE_API
cap_skipdata_s390x_cb(const uint8_t * code,size_t code_size,size_t offset,void * user_data)25c6d3da96SRichard Henderson cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
26c6d3da96SRichard Henderson                       size_t offset, void *user_data)
27c6d3da96SRichard Henderson {
28c6d3da96SRichard Henderson     size_t ilen;
29c6d3da96SRichard Henderson 
30c6d3da96SRichard Henderson     /* See get_ilen() in target/s390x/internal.h.  */
31c6d3da96SRichard Henderson     switch (code[offset] >> 6) {
32c6d3da96SRichard Henderson     case 0:
33c6d3da96SRichard Henderson         ilen = 2;
34c6d3da96SRichard Henderson         break;
35c6d3da96SRichard Henderson     case 1:
36c6d3da96SRichard Henderson     case 2:
37c6d3da96SRichard Henderson         ilen = 4;
38c6d3da96SRichard Henderson         break;
39c6d3da96SRichard Henderson     default:
40c6d3da96SRichard Henderson         ilen = 6;
41c6d3da96SRichard Henderson         break;
42c6d3da96SRichard Henderson     }
43c6d3da96SRichard Henderson 
44c6d3da96SRichard Henderson     return ilen;
45c6d3da96SRichard Henderson }
46c6d3da96SRichard Henderson 
47c6d3da96SRichard Henderson static const cs_opt_skipdata cap_skipdata_s390x = {
48c6d3da96SRichard Henderson     .mnemonic = ".byte",
49c6d3da96SRichard Henderson     .callback = cap_skipdata_s390x_cb
50c6d3da96SRichard Henderson };
51c6d3da96SRichard Henderson 
52c6d3da96SRichard Henderson /*
53f343346bSRichard Henderson  * Initialize the Capstone library.
54f343346bSRichard Henderson  *
55f343346bSRichard Henderson  * ??? It would be nice to cache this.  We would need one handle for the
56f343346bSRichard Henderson  * host and one for the target.  For most targets we can reset specific
57f343346bSRichard Henderson  * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
58f343346bSRichard Henderson  * CS_ARCH_* in this way.  Thus we would need to be able to close and
59f343346bSRichard Henderson  * re-open the target handle with a different arch for the target in order
60f343346bSRichard Henderson  * to handle AArch64 vs AArch32 mode switching.
61f343346bSRichard Henderson  */
cap_disas_start(disassemble_info * info,csh * handle)62f343346bSRichard Henderson static cs_err cap_disas_start(disassemble_info *info, csh *handle)
63f343346bSRichard Henderson {
64f343346bSRichard Henderson     cs_mode cap_mode = info->cap_mode;
65f343346bSRichard Henderson     cs_err err;
66f343346bSRichard Henderson 
67f343346bSRichard Henderson     cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
68f343346bSRichard Henderson                  : CS_MODE_LITTLE_ENDIAN);
69f343346bSRichard Henderson 
70f343346bSRichard Henderson     err = cs_open(info->cap_arch, cap_mode, handle);
71f343346bSRichard Henderson     if (err != CS_ERR_OK) {
72f343346bSRichard Henderson         return err;
73f343346bSRichard Henderson     }
74f343346bSRichard Henderson 
75f343346bSRichard Henderson     /* "Disassemble" unknown insns as ".byte W,X,Y,Z".  */
76f343346bSRichard Henderson     cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
77f343346bSRichard Henderson 
78c6d3da96SRichard Henderson     switch (info->cap_arch) {
79c6d3da96SRichard Henderson     case CS_ARCH_SYSZ:
80c6d3da96SRichard Henderson         cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
81c6d3da96SRichard Henderson                   (uintptr_t)&cap_skipdata_s390x);
82c6d3da96SRichard Henderson         break;
83c6d3da96SRichard Henderson 
84c6d3da96SRichard Henderson     case CS_ARCH_X86:
85f343346bSRichard Henderson         /*
86f343346bSRichard Henderson          * We don't care about errors (if for some reason the library
87f343346bSRichard Henderson          * is compiled without AT&T syntax); the user will just have
88f343346bSRichard Henderson          * to deal with the Intel syntax.
89f343346bSRichard Henderson          */
90f343346bSRichard Henderson         cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
91c6d3da96SRichard Henderson         break;
92f343346bSRichard Henderson     }
93f343346bSRichard Henderson 
94f343346bSRichard Henderson     /* Allocate temp space for cs_disasm_iter.  */
95f343346bSRichard Henderson     if (cap_insn == NULL) {
96f343346bSRichard Henderson         cap_insn = cs_malloc(*handle);
97f343346bSRichard Henderson         if (cap_insn == NULL) {
98f343346bSRichard Henderson             cs_close(handle);
99f343346bSRichard Henderson             return CS_ERR_MEM;
100f343346bSRichard Henderson         }
101f343346bSRichard Henderson     }
102f343346bSRichard Henderson     return CS_ERR_OK;
103f343346bSRichard Henderson }
104f343346bSRichard Henderson 
cap_dump_insn_units(disassemble_info * info,cs_insn * insn,int i,int n)105f343346bSRichard Henderson static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
106f343346bSRichard Henderson                                 int i, int n)
107f343346bSRichard Henderson {
108f343346bSRichard Henderson     fprintf_function print = info->fprintf_func;
109f343346bSRichard Henderson     FILE *stream = info->stream;
110f343346bSRichard Henderson 
111f343346bSRichard Henderson     switch (info->cap_insn_unit) {
112f343346bSRichard Henderson     case 4:
113f343346bSRichard Henderson         if (info->endian == BFD_ENDIAN_BIG) {
114f343346bSRichard Henderson             for (; i < n; i += 4) {
115f343346bSRichard Henderson                 print(stream, " %08x", ldl_be_p(insn->bytes + i));
116f343346bSRichard Henderson 
117f343346bSRichard Henderson             }
118f343346bSRichard Henderson         } else {
119f343346bSRichard Henderson             for (; i < n; i += 4) {
120f343346bSRichard Henderson                 print(stream, " %08x", ldl_le_p(insn->bytes + i));
121f343346bSRichard Henderson             }
122f343346bSRichard Henderson         }
123f343346bSRichard Henderson         break;
124f343346bSRichard Henderson 
125f343346bSRichard Henderson     case 2:
126f343346bSRichard Henderson         if (info->endian == BFD_ENDIAN_BIG) {
127f343346bSRichard Henderson             for (; i < n; i += 2) {
128f343346bSRichard Henderson                 print(stream, " %04x", lduw_be_p(insn->bytes + i));
129f343346bSRichard Henderson             }
130f343346bSRichard Henderson         } else {
131f343346bSRichard Henderson             for (; i < n; i += 2) {
132f343346bSRichard Henderson                 print(stream, " %04x", lduw_le_p(insn->bytes + i));
133f343346bSRichard Henderson             }
134f343346bSRichard Henderson         }
135f343346bSRichard Henderson         break;
136f343346bSRichard Henderson 
137f343346bSRichard Henderson     default:
138f343346bSRichard Henderson         for (; i < n; i++) {
139f343346bSRichard Henderson             print(stream, " %02x", insn->bytes[i]);
140f343346bSRichard Henderson         }
141f343346bSRichard Henderson         break;
142f343346bSRichard Henderson     }
143f343346bSRichard Henderson }
144f343346bSRichard Henderson 
cap_dump_insn(disassemble_info * info,cs_insn * insn)145f343346bSRichard Henderson static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
146f343346bSRichard Henderson {
147f343346bSRichard Henderson     fprintf_function print = info->fprintf_func;
148f343346bSRichard Henderson     FILE *stream = info->stream;
149f343346bSRichard Henderson     int i, n, split;
150f343346bSRichard Henderson 
151f343346bSRichard Henderson     print(stream, "0x%08" PRIx64 ": ", insn->address);
152f343346bSRichard Henderson 
153f343346bSRichard Henderson     n = insn->size;
154f343346bSRichard Henderson     split = info->cap_insn_split;
155f343346bSRichard Henderson 
156f343346bSRichard Henderson     /* Dump the first SPLIT bytes of the instruction.  */
157f343346bSRichard Henderson     cap_dump_insn_units(info, insn, 0, MIN(n, split));
158f343346bSRichard Henderson 
159f343346bSRichard Henderson     /* Add padding up to SPLIT so that mnemonics line up.  */
160f343346bSRichard Henderson     if (n < split) {
161f343346bSRichard Henderson         int width = (split - n) / info->cap_insn_unit;
162f343346bSRichard Henderson         width *= (2 * info->cap_insn_unit + 1);
163f343346bSRichard Henderson         print(stream, "%*s", width, "");
164f343346bSRichard Henderson     }
165f343346bSRichard Henderson 
166f343346bSRichard Henderson     /* Print the actual instruction.  */
167f343346bSRichard Henderson     print(stream, "  %-8s %s\n", insn->mnemonic, insn->op_str);
168f343346bSRichard Henderson 
169f343346bSRichard Henderson     /* Dump any remaining part of the insn on subsequent lines.  */
170f343346bSRichard Henderson     for (i = split; i < n; i += split) {
171f343346bSRichard Henderson         print(stream, "0x%08" PRIx64 ": ", insn->address + i);
172f343346bSRichard Henderson         cap_dump_insn_units(info, insn, i, MIN(n, i + split));
173f343346bSRichard Henderson         print(stream, "\n");
174f343346bSRichard Henderson     }
175f343346bSRichard Henderson }
176f343346bSRichard Henderson 
177f343346bSRichard Henderson /* Disassemble SIZE bytes at PC for the target.  */
cap_disas_target(disassemble_info * info,uint64_t pc,size_t size)178f343346bSRichard Henderson bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
179f343346bSRichard Henderson {
180f343346bSRichard Henderson     uint8_t cap_buf[1024];
181f343346bSRichard Henderson     csh handle;
182f343346bSRichard Henderson     cs_insn *insn;
183f343346bSRichard Henderson     size_t csize = 0;
184f343346bSRichard Henderson 
185f343346bSRichard Henderson     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
186f343346bSRichard Henderson         return false;
187f343346bSRichard Henderson     }
188f343346bSRichard Henderson     insn = cap_insn;
189f343346bSRichard Henderson 
190f343346bSRichard Henderson     while (1) {
191f343346bSRichard Henderson         size_t tsize = MIN(sizeof(cap_buf) - csize, size);
192f343346bSRichard Henderson         const uint8_t *cbuf = cap_buf;
193f343346bSRichard Henderson 
194*90bbf9d9SAlex Bennée         if (info->read_memory_func(pc + csize, cap_buf + csize, tsize, info) == 0) {
195f343346bSRichard Henderson             csize += tsize;
196f343346bSRichard Henderson             size -= tsize;
197f343346bSRichard Henderson 
198f343346bSRichard Henderson             while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
199f343346bSRichard Henderson                 cap_dump_insn(info, insn);
200f343346bSRichard Henderson             }
201f343346bSRichard Henderson 
202f343346bSRichard Henderson             /* If the target memory is not consumed, go back for more... */
203f343346bSRichard Henderson             if (size != 0) {
204f343346bSRichard Henderson                 /*
205f343346bSRichard Henderson                  * ... taking care to move any remaining fractional insn
206f343346bSRichard Henderson                  * to the beginning of the buffer.
207f343346bSRichard Henderson                  */
208f343346bSRichard Henderson                 if (csize != 0) {
209f343346bSRichard Henderson                     memmove(cap_buf, cbuf, csize);
210f343346bSRichard Henderson                 }
211f343346bSRichard Henderson                 continue;
212f343346bSRichard Henderson             }
213f343346bSRichard Henderson 
214f343346bSRichard Henderson             /*
215f343346bSRichard Henderson              * Since the target memory is consumed, we should not have
216f343346bSRichard Henderson              * a remaining fractional insn.
217f343346bSRichard Henderson              */
218f343346bSRichard Henderson             if (csize != 0) {
219f343346bSRichard Henderson                 info->fprintf_func(info->stream,
220f343346bSRichard Henderson                                    "Disassembler disagrees with translator "
221f343346bSRichard Henderson                                    "over instruction decoding\n"
222f343346bSRichard Henderson                                    "Please report this to qemu-devel@nongnu.org\n");
223f343346bSRichard Henderson             }
224f343346bSRichard Henderson             break;
225*90bbf9d9SAlex Bennée 
226*90bbf9d9SAlex Bennée         } else {
227*90bbf9d9SAlex Bennée             info->fprintf_func(info->stream,
228*90bbf9d9SAlex Bennée                                "0x%08" PRIx64 ": unable to read memory\n", pc);
229*90bbf9d9SAlex Bennée             break;
230*90bbf9d9SAlex Bennée         }
231f343346bSRichard Henderson     }
232f343346bSRichard Henderson 
233f343346bSRichard Henderson     cs_close(&handle);
234f343346bSRichard Henderson     return true;
235f343346bSRichard Henderson }
236f343346bSRichard Henderson 
237f343346bSRichard Henderson /* Disassemble SIZE bytes at CODE for the host.  */
cap_disas_host(disassemble_info * info,const void * code,size_t size)238f06176beSRichard Henderson bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
239f343346bSRichard Henderson {
240f343346bSRichard Henderson     csh handle;
241f343346bSRichard Henderson     const uint8_t *cbuf;
242f343346bSRichard Henderson     cs_insn *insn;
243f343346bSRichard Henderson     uint64_t pc;
244f343346bSRichard Henderson 
245f343346bSRichard Henderson     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
246f343346bSRichard Henderson         return false;
247f343346bSRichard Henderson     }
248f343346bSRichard Henderson     insn = cap_insn;
249f343346bSRichard Henderson 
250f343346bSRichard Henderson     cbuf = code;
251f343346bSRichard Henderson     pc = (uintptr_t)code;
252f343346bSRichard Henderson 
253f343346bSRichard Henderson     while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
254f343346bSRichard Henderson         cap_dump_insn(info, insn);
255f343346bSRichard Henderson     }
256f343346bSRichard Henderson     if (size != 0) {
257f343346bSRichard Henderson         info->fprintf_func(info->stream,
258f343346bSRichard Henderson             "Disassembler disagrees with TCG over instruction encoding\n"
259f343346bSRichard Henderson             "Please report this to qemu-devel@nongnu.org\n");
260f343346bSRichard Henderson     }
261f343346bSRichard Henderson 
262f343346bSRichard Henderson     cs_close(&handle);
263f343346bSRichard Henderson     return true;
264f343346bSRichard Henderson }
265f343346bSRichard Henderson 
266f343346bSRichard Henderson /* Disassemble COUNT insns at PC for the target.  */
cap_disas_monitor(disassemble_info * info,uint64_t pc,int count)267f343346bSRichard Henderson bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
268f343346bSRichard Henderson {
269f343346bSRichard Henderson     uint8_t cap_buf[32];
270f343346bSRichard Henderson     csh handle;
271f343346bSRichard Henderson     cs_insn *insn;
272f343346bSRichard Henderson     size_t csize = 0;
273f343346bSRichard Henderson 
274f343346bSRichard Henderson     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
275f343346bSRichard Henderson         return false;
276f343346bSRichard Henderson     }
277f343346bSRichard Henderson     insn = cap_insn;
278f343346bSRichard Henderson 
279f343346bSRichard Henderson     while (1) {
280f343346bSRichard Henderson         /*
281f343346bSRichard Henderson          * We want to read memory for one insn, but generically we do not
282f343346bSRichard Henderson          * know how much memory that is.  We have a small buffer which is
283f343346bSRichard Henderson          * known to be sufficient for all supported targets.  Try to not
284f343346bSRichard Henderson          * read beyond the page, Just In Case.  For even more simplicity,
285f343346bSRichard Henderson          * ignore the actual target page size and use a 1k boundary.  If
286f343346bSRichard Henderson          * that turns out to be insufficient, we'll come back around the
287f343346bSRichard Henderson          * loop and read more.
288f343346bSRichard Henderson          */
289f343346bSRichard Henderson         uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
290f343346bSRichard Henderson         size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
291f343346bSRichard Henderson         const uint8_t *cbuf = cap_buf;
292f343346bSRichard Henderson 
293f343346bSRichard Henderson         /* Make certain that we can make progress.  */
294f343346bSRichard Henderson         assert(tsize != 0);
295*90bbf9d9SAlex Bennée         if (info->read_memory_func(pc + csize, cap_buf + csize,
296*90bbf9d9SAlex Bennée                                    tsize, info) == 0)
297*90bbf9d9SAlex Bennée         {
298f343346bSRichard Henderson             csize += tsize;
299f343346bSRichard Henderson 
300f343346bSRichard Henderson             if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
301f343346bSRichard Henderson                 cap_dump_insn(info, insn);
302f343346bSRichard Henderson                 if (--count <= 0) {
303f343346bSRichard Henderson                     break;
304f343346bSRichard Henderson                 }
305f343346bSRichard Henderson             }
306f343346bSRichard Henderson             memmove(cap_buf, cbuf, csize);
307*90bbf9d9SAlex Bennée         } else {
308*90bbf9d9SAlex Bennée             info->fprintf_func(info->stream,
309*90bbf9d9SAlex Bennée                                "0x%08" PRIx64 ": unable to read memory\n", pc);
310*90bbf9d9SAlex Bennée             break;
311*90bbf9d9SAlex Bennée         }
312f343346bSRichard Henderson     }
313f343346bSRichard Henderson 
314f343346bSRichard Henderson     cs_close(&handle);
315f343346bSRichard Henderson     return true;
316f343346bSRichard Henderson }
317f343346bSRichard Henderson 
318f343346bSRichard Henderson /* Disassemble a single instruction directly into plugin output */
cap_disas_plugin(disassemble_info * info,uint64_t pc,size_t size)319f343346bSRichard Henderson bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
320f343346bSRichard Henderson {
321f343346bSRichard Henderson     uint8_t cap_buf[32];
322f343346bSRichard Henderson     const uint8_t *cbuf = cap_buf;
323f343346bSRichard Henderson     csh handle;
324f343346bSRichard Henderson 
325f343346bSRichard Henderson     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
326f343346bSRichard Henderson         return false;
327f343346bSRichard Henderson     }
328f343346bSRichard Henderson 
329f343346bSRichard Henderson     assert(size < sizeof(cap_buf));
330f343346bSRichard Henderson     info->read_memory_func(pc, cap_buf, size, info);
331f343346bSRichard Henderson 
332f343346bSRichard Henderson     if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
333f343346bSRichard Henderson         info->fprintf_func(info->stream, "%s %s",
334f343346bSRichard Henderson                            cap_insn->mnemonic, cap_insn->op_str);
335f343346bSRichard Henderson     }
336f343346bSRichard Henderson 
337f343346bSRichard Henderson     cs_close(&handle);
338f343346bSRichard Henderson     return true;
339f343346bSRichard Henderson }
340