xref: /qemu/disas/capstone.c (revision 2e8f72ac)
1 /*
2  * Interface to the capstone disassembler.
3  * SPDX-License-Identifier: GPL-2.0-or-later
4  */
5 
6 #include "qemu/osdep.h"
7 #include "qemu/bswap.h"
8 #include "disas/dis-asm.h"
9 #include "disas/capstone.h"
10 
11 
12 /*
13  * Temporary storage for the capstone library.  This will be alloced via
14  * malloc with a size private to the library; thus there's no reason not
15  * to share this across calls and across host vs target disassembly.
16  */
17 static __thread cs_insn *cap_insn;
18 
19 /*
20  * The capstone library always skips 2 bytes for S390X.
21  * This is less than ideal, since we can tell from the first two bits
22  * the size of the insn and thus stay in sync with the insn stream.
23  */
24 static size_t CAPSTONE_API
25 cap_skipdata_s390x_cb(const uint8_t *code, size_t code_size,
26                       size_t offset, void *user_data)
27 {
28     size_t ilen;
29 
30     /* See get_ilen() in target/s390x/internal.h.  */
31     switch (code[offset] >> 6) {
32     case 0:
33         ilen = 2;
34         break;
35     case 1:
36     case 2:
37         ilen = 4;
38         break;
39     default:
40         ilen = 6;
41         break;
42     }
43 
44     return ilen;
45 }
46 
47 static const cs_opt_skipdata cap_skipdata_s390x = {
48     .mnemonic = ".byte",
49     .callback = cap_skipdata_s390x_cb
50 };
51 
52 /*
53  * Initialize the Capstone library.
54  *
55  * ??? It would be nice to cache this.  We would need one handle for the
56  * host and one for the target.  For most targets we can reset specific
57  * parameters via cs_option(CS_OPT_MODE, new_mode), but we cannot change
58  * CS_ARCH_* in this way.  Thus we would need to be able to close and
59  * re-open the target handle with a different arch for the target in order
60  * to handle AArch64 vs AArch32 mode switching.
61  */
62 static cs_err cap_disas_start(disassemble_info *info, csh *handle)
63 {
64     cs_mode cap_mode = info->cap_mode;
65     cs_err err;
66 
67     cap_mode += (info->endian == BFD_ENDIAN_BIG ? CS_MODE_BIG_ENDIAN
68                  : CS_MODE_LITTLE_ENDIAN);
69 
70     err = cs_open(info->cap_arch, cap_mode, handle);
71     if (err != CS_ERR_OK) {
72         return err;
73     }
74 
75     /* "Disassemble" unknown insns as ".byte W,X,Y,Z".  */
76     cs_option(*handle, CS_OPT_SKIPDATA, CS_OPT_ON);
77 
78     switch (info->cap_arch) {
79     case CS_ARCH_SYSZ:
80         cs_option(*handle, CS_OPT_SKIPDATA_SETUP,
81                   (uintptr_t)&cap_skipdata_s390x);
82         break;
83 
84     case CS_ARCH_X86:
85         /*
86          * We don't care about errors (if for some reason the library
87          * is compiled without AT&T syntax); the user will just have
88          * to deal with the Intel syntax.
89          */
90         cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
91         break;
92     }
93 
94     /* Allocate temp space for cs_disasm_iter.  */
95     if (cap_insn == NULL) {
96         cap_insn = cs_malloc(*handle);
97         if (cap_insn == NULL) {
98             cs_close(handle);
99             return CS_ERR_MEM;
100         }
101     }
102     return CS_ERR_OK;
103 }
104 
105 static void cap_dump_insn_units(disassemble_info *info, cs_insn *insn,
106                                 int i, int n)
107 {
108     fprintf_function print = info->fprintf_func;
109     FILE *stream = info->stream;
110 
111     switch (info->cap_insn_unit) {
112     case 4:
113         if (info->endian == BFD_ENDIAN_BIG) {
114             for (; i < n; i += 4) {
115                 print(stream, " %08x", ldl_be_p(insn->bytes + i));
116 
117             }
118         } else {
119             for (; i < n; i += 4) {
120                 print(stream, " %08x", ldl_le_p(insn->bytes + i));
121             }
122         }
123         break;
124 
125     case 2:
126         if (info->endian == BFD_ENDIAN_BIG) {
127             for (; i < n; i += 2) {
128                 print(stream, " %04x", lduw_be_p(insn->bytes + i));
129             }
130         } else {
131             for (; i < n; i += 2) {
132                 print(stream, " %04x", lduw_le_p(insn->bytes + i));
133             }
134         }
135         break;
136 
137     default:
138         for (; i < n; i++) {
139             print(stream, " %02x", insn->bytes[i]);
140         }
141         break;
142     }
143 }
144 
145 static void cap_dump_insn(disassemble_info *info, cs_insn *insn)
146 {
147     fprintf_function print = info->fprintf_func;
148     FILE *stream = info->stream;
149     int i, n, split;
150 
151     print(stream, "0x%08" PRIx64 ": ", insn->address);
152 
153     n = insn->size;
154     split = info->cap_insn_split;
155 
156     /* Dump the first SPLIT bytes of the instruction.  */
157     cap_dump_insn_units(info, insn, 0, MIN(n, split));
158 
159     /* Add padding up to SPLIT so that mnemonics line up.  */
160     if (n < split) {
161         int width = (split - n) / info->cap_insn_unit;
162         width *= (2 * info->cap_insn_unit + 1);
163         print(stream, "%*s", width, "");
164     }
165 
166     /* Print the actual instruction.  */
167     print(stream, "  %-8s %s\n", insn->mnemonic, insn->op_str);
168 
169     /* Dump any remaining part of the insn on subsequent lines.  */
170     for (i = split; i < n; i += split) {
171         print(stream, "0x%08" PRIx64 ": ", insn->address + i);
172         cap_dump_insn_units(info, insn, i, MIN(n, i + split));
173         print(stream, "\n");
174     }
175 }
176 
177 /* Disassemble SIZE bytes at PC for the target.  */
178 bool cap_disas_target(disassemble_info *info, uint64_t pc, size_t size)
179 {
180     uint8_t cap_buf[1024];
181     csh handle;
182     cs_insn *insn;
183     size_t csize = 0;
184 
185     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
186         return false;
187     }
188     insn = cap_insn;
189 
190     while (1) {
191         size_t tsize = MIN(sizeof(cap_buf) - csize, size);
192         const uint8_t *cbuf = cap_buf;
193 
194         info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
195         csize += tsize;
196         size -= tsize;
197 
198         while (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
199             cap_dump_insn(info, insn);
200         }
201 
202         /* If the target memory is not consumed, go back for more... */
203         if (size != 0) {
204             /*
205              * ... taking care to move any remaining fractional insn
206              * to the beginning of the buffer.
207              */
208             if (csize != 0) {
209                 memmove(cap_buf, cbuf, csize);
210             }
211             continue;
212         }
213 
214         /*
215          * Since the target memory is consumed, we should not have
216          * a remaining fractional insn.
217          */
218         if (csize != 0) {
219             info->fprintf_func(info->stream,
220                 "Disassembler disagrees with translator "
221                 "over instruction decoding\n"
222                 "Please report this to qemu-devel@nongnu.org\n");
223         }
224         break;
225     }
226 
227     cs_close(&handle);
228     return true;
229 }
230 
231 /* Disassemble SIZE bytes at CODE for the host.  */
232 bool cap_disas_host(disassemble_info *info, const void *code, size_t size)
233 {
234     csh handle;
235     const uint8_t *cbuf;
236     cs_insn *insn;
237     uint64_t pc;
238 
239     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
240         return false;
241     }
242     insn = cap_insn;
243 
244     cbuf = code;
245     pc = (uintptr_t)code;
246 
247     while (cs_disasm_iter(handle, &cbuf, &size, &pc, insn)) {
248         cap_dump_insn(info, insn);
249     }
250     if (size != 0) {
251         info->fprintf_func(info->stream,
252             "Disassembler disagrees with TCG over instruction encoding\n"
253             "Please report this to qemu-devel@nongnu.org\n");
254     }
255 
256     cs_close(&handle);
257     return true;
258 }
259 
260 /* Disassemble COUNT insns at PC for the target.  */
261 bool cap_disas_monitor(disassemble_info *info, uint64_t pc, int count)
262 {
263     uint8_t cap_buf[32];
264     csh handle;
265     cs_insn *insn;
266     size_t csize = 0;
267 
268     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
269         return false;
270     }
271     insn = cap_insn;
272 
273     while (1) {
274         /*
275          * We want to read memory for one insn, but generically we do not
276          * know how much memory that is.  We have a small buffer which is
277          * known to be sufficient for all supported targets.  Try to not
278          * read beyond the page, Just In Case.  For even more simplicity,
279          * ignore the actual target page size and use a 1k boundary.  If
280          * that turns out to be insufficient, we'll come back around the
281          * loop and read more.
282          */
283         uint64_t epc = QEMU_ALIGN_UP(pc + csize + 1, 1024);
284         size_t tsize = MIN(sizeof(cap_buf) - csize, epc - pc);
285         const uint8_t *cbuf = cap_buf;
286 
287         /* Make certain that we can make progress.  */
288         assert(tsize != 0);
289         info->read_memory_func(pc + csize, cap_buf + csize, tsize, info);
290         csize += tsize;
291 
292         if (cs_disasm_iter(handle, &cbuf, &csize, &pc, insn)) {
293             cap_dump_insn(info, insn);
294             if (--count <= 0) {
295                 break;
296             }
297         }
298         memmove(cap_buf, cbuf, csize);
299     }
300 
301     cs_close(&handle);
302     return true;
303 }
304 
305 /* Disassemble a single instruction directly into plugin output */
306 bool cap_disas_plugin(disassemble_info *info, uint64_t pc, size_t size)
307 {
308     uint8_t cap_buf[32];
309     const uint8_t *cbuf = cap_buf;
310     csh handle;
311 
312     if (cap_disas_start(info, &handle) != CS_ERR_OK) {
313         return false;
314     }
315 
316     assert(size < sizeof(cap_buf));
317     info->read_memory_func(pc, cap_buf, size, info);
318 
319     if (cs_disasm_iter(handle, &cbuf, &size, &pc, cap_insn)) {
320         info->fprintf_func(info->stream, "%s %s",
321                            cap_insn->mnemonic, cap_insn->op_str);
322     }
323 
324     cs_close(&handle);
325     return true;
326 }
327