xref: /qemu/disas/capstone.c (revision b2a3cbb8)
1 /*
2  * Interface to the capstone disassembler.
3  * SPDX-License-Identifier: GPL-2.0-or-later
4  */
5 
6 #include "qemu/osdep.h"
7 #include "qemu/bswap.h"
8 #include "disas/dis-asm.h"
9 #include "disas/capstone.h"
10 
11 
12 /*
13  * Temporary storage for the capstone library.  This will be alloced via
14  * malloc with a size private to the library; thus there's no reason not
15  * to share this across calls and across host vs target disassembly.
16  */
17 static __thread cs_insn *cap_insn;
18 
19 /*
20  * The capstone library always skips 2 bytes for S390X.
21  * This is less than ideal, since we can tell from the first two bits
22  * the size of the insn and thus stay in sync with the insn stream.
23  */
24 static size_t CAPSTONE_API
25 cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
26                       size_t offset, void *user_data)
27 {
28     size_t ilen;
29 
30     /* See get_ilen() in target/s390x/internal.h.  */
31     switch (code[offset] >> 6) {
32     case 0:
33         ilen = 2;
34         break;
35     case 1:
36     case 2:
37         ilen = 4;
38         break;
39     default:
40         ilen = 6;
41         break;
42     }
43 
44     return ilen;
45 }
46 
47 static const cs_opt_skipdata cap_skipdata_s390x = {
48     .mnemonic = ".byte",
49     .callback = cap_skipdata_s390x_cb
50 };
51 
52 /*
53  * Initialize the Capstone library.
54  *
55  * ??? It would be nice to cache this.  We would need one handle for the
56  * host and one for the target.  For most targets we can reset specific
57  * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
58  * CS_ARCH_* in this way.  Thus we would need to be able to close and
59  * re-open the target handle with a different arch for the target in order
60  * to handle AArch64 vs AArch32 mode switching.
61  */
62 static cs_err cap_disas_start(disassemble_info *info, csh *handle)
63 {
64     cs_mode cap_mode = info->cap_mode;
65     cs_err err;
66 
67     cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
68                  : CS_MODE_LITTLE_ENDIAN);
69 
70     err = cs_open(info->cap_arch, cap_mode, handle);
71     if (err != CS_ERR_OK) {
72         return err;
73     }
74 
75     /* "Disassemble" unknown insns as ".byte W,X,Y,Z".  */
76     cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
77 
78     switch (info->cap_arch) {
79     case CS_ARCH_SYSZ:
80         cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
81                   (uintptr_t)&cap_skipdata_s390x);
82         break;
83 
84     case CS_ARCH_X86:
85         /*
86          * We don't care about errors (if for some reason the library
87          * is compiled without AT&T syntax); the user will just have
88          * to deal with the Intel syntax.
89          */
90         cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
91         break;
92     }
93 
94     /* Allocate temp space for cs_disasm_iter.  */
95     if (cap_insn == NULL) {
96         cap_insn = cs_malloc(*handle);
97         if (cap_insn == NULL) {
98             cs_close(handle);
99             return CS_ERR_MEM;
100         }
101     }
102     return CS_ERR_OK;
103 }
104 
105 static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
106                                 int i, int n)
107 {
108     fprintf_function print = info->fprintf_func;
109     FILE *stream = info->stream;
110 
111     switch (info->cap_insn_unit) {
112     case 4:
113         if (info->endian == BFD_ENDIAN_BIG) {
114             for (; i < n; i += 4) {
115                 print(stream, " %08x", ldl_be_p(insn->bytes + i));
116 
117             }
118         } else {
119             for (; i < n; i += 4) {
120                 print(stream, " %08x", ldl_le_p(insn->bytes + i));
121             }
122         }
123         break;
124 
125     case 2:
126         if (info->endian == BFD_ENDIAN_BIG) {
127             for (; i < n; i += 2) {
128                 print(stream, " %04x", lduw_be_p(insn->bytes + i));
129             }
130         } else {
131             for (; i < n; i += 2) {
132                 print(stream, " %04x", lduw_le_p(insn->bytes + i));
133             }
134         }
135         break;
136 
137     default:
138         for (; i < n; i++) {
139             print(stream, " %02x", insn->bytes[i]);
140         }
141         break;
142     }
143 }
144 
145 static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
146 {
147     fprintf_function print = info->fprintf_func;
148     FILE *stream = info->stream;
149     int i, n, split;
150 
151     print(stream, "0x%08" PRIx64 ": ", insn->address);
152 
153     n = insn->size;
154     split = info->cap_insn_split;
155 
156     /* Dump the first SPLIT bytes of the instruction.  */
157     cap_dump_insn_units(info, insn, 0, MIN(n, split));
158 
159     /* Add padding up to SPLIT so that mnemonics line up.  */
160     if (n < split) {
161         int width = (split - n) / info->cap_insn_unit;
162         width *= (2 * info->cap_insn_unit + 1);
163         print(stream, "%*s", width, "");
164     }
165 
166     /* Print the actual instruction.  */
167     print(stream, "  %-8s %s\n", insn->mnemonic, insn->op_str);
168 
169     /* Dump any remaining part of the insn on subsequent lines.  */
170     for (i = split; i < n; i += split) {
171         print(stream, "0x%08" PRIx64 ": ", insn->address + i);
172         cap_dump_insn_units(info, insn, i, MIN(n, i + split));
173         print(stream, "\n");
174     }
175 }
176 
177 /* Disassemble SIZE bytes at PC for the target.  */
178 bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
179 {
180     uint8_t cap_buf[1024];
181     csh handle;
182     cs_insn *insn;
183     size_t csize = 0;
184 
185     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
186         return false;
187     }
188     insn = cap_insn;
189 
190     while (1) {
191         size_t tsize = MIN(sizeof(cap_buf) - csize, size);
192         const uint8_t *cbuf = cap_buf;
193 
194         if (info->read_memory_func(pc + csize, cap_buf + csize, tsize, info) == 0) {
195             csize += tsize;
196             size -= tsize;
197 
198             while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
199                 cap_dump_insn(info, insn);
200             }
201 
202             /* If the target memory is not consumed, go back for more... */
203             if (size != 0) {
204                 /*
205                  * ... taking care to move any remaining fractional insn
206                  * to the beginning of the buffer.
207                  */
208                 if (csize != 0) {
209                     memmove(cap_buf, cbuf, csize);
210                 }
211                 continue;
212             }
213 
214             /*
215              * Since the target memory is consumed, we should not have
216              * a remaining fractional insn.
217              */
218             if (csize != 0) {
219                 info->fprintf_func(info->stream,
220                                    "Disassembler disagrees with translator "
221                                    "over instruction decoding\n"
222                                    "Please report this to qemu-devel@nongnu.org\n");
223             }
224             break;
225 
226         } else {
227             info->fprintf_func(info->stream,
228                                "0x%08" PRIx64 ": unable to read memory\n", pc);
229             break;
230         }
231     }
232 
233     cs_close(&handle);
234     return true;
235 }
236 
237 /* Disassemble SIZE bytes at CODE for the host.  */
238 bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
239 {
240     csh handle;
241     const uint8_t *cbuf;
242     cs_insn *insn;
243     uint64_t pc;
244 
245     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
246         return false;
247     }
248     insn = cap_insn;
249 
250     cbuf = code;
251     pc = (uintptr_t)code;
252 
253     while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
254         cap_dump_insn(info, insn);
255     }
256     if (size != 0) {
257         info->fprintf_func(info->stream,
258             "Disassembler disagrees with TCG over instruction encoding\n"
259             "Please report this to qemu-devel@nongnu.org\n");
260     }
261 
262     cs_close(&handle);
263     return true;
264 }
265 
266 /* Disassemble COUNT insns at PC for the target.  */
267 bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
268 {
269     uint8_t cap_buf[32];
270     csh handle;
271     cs_insn *insn;
272     size_t csize = 0;
273 
274     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
275         return false;
276     }
277     insn = cap_insn;
278 
279     while (1) {
280         /*
281          * We want to read memory for one insn, but generically we do not
282          * know how much memory that is.  We have a small buffer which is
283          * known to be sufficient for all supported targets.  Try to not
284          * read beyond the page, Just In Case.  For even more simplicity,
285          * ignore the actual target page size and use a 1k boundary.  If
286          * that turns out to be insufficient, we'll come back around the
287          * loop and read more.
288          */
289         uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
290         size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
291         const uint8_t *cbuf = cap_buf;
292 
293         /* Make certain that we can make progress.  */
294         assert(tsize != 0);
295         if (info->read_memory_func(pc + csize, cap_buf + csize,
296                                    tsize, info) == 0)
297         {
298             csize += tsize;
299 
300             if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
301                 cap_dump_insn(info, insn);
302                 if (--count <= 0) {
303                     break;
304                 }
305             }
306             memmove(cap_buf, cbuf, csize);
307         } else {
308             info->fprintf_func(info->stream,
309                                "0x%08" PRIx64 ": unable to read memory\n", pc);
310             break;
311         }
312     }
313 
314     cs_close(&handle);
315     return true;
316 }
317 
318 /* Disassemble a single instruction directly into plugin output */
319 bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
320 {
321     uint8_t cap_buf[32];
322     const uint8_t *cbuf = cap_buf;
323     csh handle;
324 
325     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
326         return false;
327     }
328 
329     assert(size < sizeof(cap_buf));
330     info->read_memory_func(pc, cap_buf, size, info);
331 
332     if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
333         info->fprintf_func(info->stream, "%s %s",
334                            cap_insn->mnemonic, cap_insn->op_str);
335     }
336 
337     cs_close(&handle);
338     return true;
339 }
340