xref: /qemu/target/riscv/kvm/kvm-cpu.c (revision a0e93dd8)
1 /*
2  * RISC-V implementation of KVM hooks
3  *
4  * Copyright (c) 2020 Huawei Technologies Co., Ltd
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2 or later, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program.  If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "qemu/osdep.h"
20 #include <sys/ioctl.h>
21 #include <sys/prctl.h>
22 
23 #include <linux/kvm.h>
24 
25 #include "qemu/timer.h"
26 #include "qapi/error.h"
27 #include "qemu/error-report.h"
28 #include "qemu/main-loop.h"
29 #include "qapi/visitor.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/kvm_int.h"
33 #include "cpu.h"
34 #include "trace.h"
35 #include "hw/core/accel-cpu.h"
36 #include "hw/pci/pci.h"
37 #include "exec/memattrs.h"
38 #include "exec/address-spaces.h"
39 #include "hw/boards.h"
40 #include "hw/irq.h"
41 #include "hw/intc/riscv_imsic.h"
42 #include "qemu/log.h"
43 #include "hw/loader.h"
44 #include "kvm_riscv.h"
45 #include "sbi_ecall_interface.h"
46 #include "chardev/char-fe.h"
47 #include "migration/migration.h"
48 #include "sysemu/runstate.h"
49 #include "hw/riscv/numa.h"
50 
51 #define PR_RISCV_V_SET_CONTROL            69
52 #define PR_RISCV_V_VSTATE_CTRL_ON          2
53 
54 void riscv_kvm_aplic_request(void *opaque, int irq, int level)
55 {
56     kvm_set_irq(kvm_state, irq, !!level);
57 }
58 
59 static bool cap_has_mp_state;
60 
61 static uint64_t kvm_riscv_reg_id_ulong(CPURISCVState *env, uint64_t type,
62                                  uint64_t idx)
63 {
64     uint64_t id = KVM_REG_RISCV | type | idx;
65 
66     switch (riscv_cpu_mxl(env)) {
67     case MXL_RV32:
68         id |= KVM_REG_SIZE_U32;
69         break;
70     case MXL_RV64:
71         id |= KVM_REG_SIZE_U64;
72         break;
73     default:
74         g_assert_not_reached();
75     }
76     return id;
77 }
78 
79 static uint64_t kvm_riscv_reg_id_u32(uint64_t type, uint64_t idx)
80 {
81     return KVM_REG_RISCV | KVM_REG_SIZE_U32 | type | idx;
82 }
83 
84 static uint64_t kvm_riscv_reg_id_u64(uint64_t type, uint64_t idx)
85 {
86     return KVM_REG_RISCV | KVM_REG_SIZE_U64 | type | idx;
87 }
88 
89 static uint64_t kvm_encode_reg_size_id(uint64_t id, size_t size_b)
90 {
91     uint64_t size_ctz = __builtin_ctz(size_b);
92 
93     return id | (size_ctz << KVM_REG_SIZE_SHIFT);
94 }
95 
96 static uint64_t kvm_riscv_vector_reg_id(RISCVCPU *cpu,
97                                         uint64_t idx)
98 {
99     uint64_t id;
100     size_t size_b;
101 
102     g_assert(idx < 32);
103 
104     id = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(idx);
105     size_b = cpu->cfg.vlenb;
106 
107     return kvm_encode_reg_size_id(id, size_b);
108 }
109 
110 #define RISCV_CORE_REG(env, name) \
111     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, \
112                            KVM_REG_RISCV_CORE_REG(name))
113 
114 #define RISCV_CSR_REG(env, name) \
115     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CSR, \
116                            KVM_REG_RISCV_CSR_REG(name))
117 
118 #define RISCV_CONFIG_REG(env, name) \
119     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG, \
120                            KVM_REG_RISCV_CONFIG_REG(name))
121 
122 #define RISCV_TIMER_REG(name)  kvm_riscv_reg_id_u64(KVM_REG_RISCV_TIMER, \
123                  KVM_REG_RISCV_TIMER_REG(name))
124 
125 #define RISCV_FP_F_REG(idx)  kvm_riscv_reg_id_u32(KVM_REG_RISCV_FP_F, idx)
126 
127 #define RISCV_FP_D_REG(idx)  kvm_riscv_reg_id_u64(KVM_REG_RISCV_FP_D, idx)
128 
129 #define RISCV_VECTOR_CSR_REG(env, name) \
130     kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_VECTOR, \
131                            KVM_REG_RISCV_VECTOR_CSR_REG(name))
132 
133 #define KVM_RISCV_GET_CSR(cs, env, csr, reg) \
134     do { \
135         int _ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, csr), &reg); \
136         if (_ret) { \
137             return _ret; \
138         } \
139     } while (0)
140 
141 #define KVM_RISCV_SET_CSR(cs, env, csr, reg) \
142     do { \
143         int _ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, csr), &reg); \
144         if (_ret) { \
145             return _ret; \
146         } \
147     } while (0)
148 
149 #define KVM_RISCV_GET_TIMER(cs, name, reg) \
150     do { \
151         int ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(name), &reg); \
152         if (ret) { \
153             abort(); \
154         } \
155     } while (0)
156 
157 #define KVM_RISCV_SET_TIMER(cs, name, reg) \
158     do { \
159         int ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(name), &reg); \
160         if (ret) { \
161             abort(); \
162         } \
163     } while (0)
164 
165 typedef struct KVMCPUConfig {
166     const char *name;
167     const char *description;
168     target_ulong offset;
169     uint64_t kvm_reg_id;
170     bool user_set;
171     bool supported;
172 } KVMCPUConfig;
173 
174 #define KVM_MISA_CFG(_bit, _reg_id) \
175     {.offset = _bit, .kvm_reg_id = _reg_id}
176 
177 /* KVM ISA extensions */
178 static KVMCPUConfig kvm_misa_ext_cfgs[] = {
179     KVM_MISA_CFG(RVA, KVM_RISCV_ISA_EXT_A),
180     KVM_MISA_CFG(RVC, KVM_RISCV_ISA_EXT_C),
181     KVM_MISA_CFG(RVD, KVM_RISCV_ISA_EXT_D),
182     KVM_MISA_CFG(RVF, KVM_RISCV_ISA_EXT_F),
183     KVM_MISA_CFG(RVH, KVM_RISCV_ISA_EXT_H),
184     KVM_MISA_CFG(RVI, KVM_RISCV_ISA_EXT_I),
185     KVM_MISA_CFG(RVM, KVM_RISCV_ISA_EXT_M),
186     KVM_MISA_CFG(RVV, KVM_RISCV_ISA_EXT_V),
187 };
188 
189 static void kvm_cpu_get_misa_ext_cfg(Object *obj, Visitor *v,
190                                      const char *name,
191                                      void *opaque, Error **errp)
192 {
193     KVMCPUConfig *misa_ext_cfg = opaque;
194     target_ulong misa_bit = misa_ext_cfg->offset;
195     RISCVCPU *cpu = RISCV_CPU(obj);
196     CPURISCVState *env = &cpu->env;
197     bool value = env->misa_ext_mask & misa_bit;
198 
199     visit_type_bool(v, name, &value, errp);
200 }
201 
202 static void kvm_cpu_set_misa_ext_cfg(Object *obj, Visitor *v,
203                                      const char *name,
204                                      void *opaque, Error **errp)
205 {
206     KVMCPUConfig *misa_ext_cfg = opaque;
207     target_ulong misa_bit = misa_ext_cfg->offset;
208     RISCVCPU *cpu = RISCV_CPU(obj);
209     CPURISCVState *env = &cpu->env;
210     bool value, host_bit;
211 
212     if (!visit_type_bool(v, name, &value, errp)) {
213         return;
214     }
215 
216     host_bit = env->misa_ext_mask & misa_bit;
217 
218     if (value == host_bit) {
219         return;
220     }
221 
222     if (!value) {
223         misa_ext_cfg->user_set = true;
224         return;
225     }
226 
227     /*
228      * Forbid users to enable extensions that aren't
229      * available in the hart.
230      */
231     error_setg(errp, "Enabling MISA bit '%s' is not allowed: it's not "
232                "enabled in the host", misa_ext_cfg->name);
233 }
234 
235 static void kvm_riscv_update_cpu_misa_ext(RISCVCPU *cpu, CPUState *cs)
236 {
237     CPURISCVState *env = &cpu->env;
238     uint64_t id, reg;
239     int i, ret;
240 
241     for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
242         KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
243         target_ulong misa_bit = misa_cfg->offset;
244 
245         if (!misa_cfg->user_set) {
246             continue;
247         }
248 
249         /* If we're here we're going to disable the MISA bit */
250         reg = 0;
251         id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
252                                     misa_cfg->kvm_reg_id);
253         ret = kvm_set_one_reg(cs, id, &reg);
254         if (ret != 0) {
255             /*
256              * We're not checking for -EINVAL because if the bit is about
257              * to be disabled, it means that it was already enabled by
258              * KVM. We determined that by fetching the 'isa' register
259              * during init() time. Any error at this point is worth
260              * aborting.
261              */
262             error_report("Unable to set KVM reg %s, error %d",
263                          misa_cfg->name, ret);
264             exit(EXIT_FAILURE);
265         }
266         env->misa_ext &= ~misa_bit;
267     }
268 }
269 
270 #define KVM_EXT_CFG(_name, _prop, _reg_id) \
271     {.name = _name, .offset = CPU_CFG_OFFSET(_prop), \
272      .kvm_reg_id = _reg_id}
273 
274 static KVMCPUConfig kvm_multi_ext_cfgs[] = {
275     KVM_EXT_CFG("zicbom", ext_zicbom, KVM_RISCV_ISA_EXT_ZICBOM),
276     KVM_EXT_CFG("zicboz", ext_zicboz, KVM_RISCV_ISA_EXT_ZICBOZ),
277     KVM_EXT_CFG("zicntr", ext_zicntr, KVM_RISCV_ISA_EXT_ZICNTR),
278     KVM_EXT_CFG("zicsr", ext_zicsr, KVM_RISCV_ISA_EXT_ZICSR),
279     KVM_EXT_CFG("zifencei", ext_zifencei, KVM_RISCV_ISA_EXT_ZIFENCEI),
280     KVM_EXT_CFG("zihintpause", ext_zihintpause, KVM_RISCV_ISA_EXT_ZIHINTPAUSE),
281     KVM_EXT_CFG("zihpm", ext_zihpm, KVM_RISCV_ISA_EXT_ZIHPM),
282     KVM_EXT_CFG("zba", ext_zba, KVM_RISCV_ISA_EXT_ZBA),
283     KVM_EXT_CFG("zbb", ext_zbb, KVM_RISCV_ISA_EXT_ZBB),
284     KVM_EXT_CFG("zbs", ext_zbs, KVM_RISCV_ISA_EXT_ZBS),
285     KVM_EXT_CFG("ssaia", ext_ssaia, KVM_RISCV_ISA_EXT_SSAIA),
286     KVM_EXT_CFG("sstc", ext_sstc, KVM_RISCV_ISA_EXT_SSTC),
287     KVM_EXT_CFG("svinval", ext_svinval, KVM_RISCV_ISA_EXT_SVINVAL),
288     KVM_EXT_CFG("svnapot", ext_svnapot, KVM_RISCV_ISA_EXT_SVNAPOT),
289     KVM_EXT_CFG("svpbmt", ext_svpbmt, KVM_RISCV_ISA_EXT_SVPBMT),
290 };
291 
292 static void *kvmconfig_get_cfg_addr(RISCVCPU *cpu, KVMCPUConfig *kvmcfg)
293 {
294     return (void *)&cpu->cfg + kvmcfg->offset;
295 }
296 
297 static void kvm_cpu_cfg_set(RISCVCPU *cpu, KVMCPUConfig *multi_ext,
298                             uint32_t val)
299 {
300     bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
301 
302     *ext_enabled = val;
303 }
304 
305 static uint32_t kvm_cpu_cfg_get(RISCVCPU *cpu,
306                                 KVMCPUConfig *multi_ext)
307 {
308     bool *ext_enabled = kvmconfig_get_cfg_addr(cpu, multi_ext);
309 
310     return *ext_enabled;
311 }
312 
313 static void kvm_cpu_get_multi_ext_cfg(Object *obj, Visitor *v,
314                                       const char *name,
315                                       void *opaque, Error **errp)
316 {
317     KVMCPUConfig *multi_ext_cfg = opaque;
318     RISCVCPU *cpu = RISCV_CPU(obj);
319     bool value = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
320 
321     visit_type_bool(v, name, &value, errp);
322 }
323 
324 static void kvm_cpu_set_multi_ext_cfg(Object *obj, Visitor *v,
325                                       const char *name,
326                                       void *opaque, Error **errp)
327 {
328     KVMCPUConfig *multi_ext_cfg = opaque;
329     RISCVCPU *cpu = RISCV_CPU(obj);
330     bool value, host_val;
331 
332     if (!visit_type_bool(v, name, &value, errp)) {
333         return;
334     }
335 
336     host_val = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
337 
338     /*
339      * Ignore if the user is setting the same value
340      * as the host.
341      */
342     if (value == host_val) {
343         return;
344     }
345 
346     if (!multi_ext_cfg->supported) {
347         /*
348          * Error out if the user is trying to enable an
349          * extension that KVM doesn't support. Ignore
350          * option otherwise.
351          */
352         if (value) {
353             error_setg(errp, "KVM does not support disabling extension %s",
354                        multi_ext_cfg->name);
355         }
356 
357         return;
358     }
359 
360     multi_ext_cfg->user_set = true;
361     kvm_cpu_cfg_set(cpu, multi_ext_cfg, value);
362 }
363 
364 static KVMCPUConfig kvm_cbom_blocksize = {
365     .name = "cbom_blocksize",
366     .offset = CPU_CFG_OFFSET(cbom_blocksize),
367     .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)
368 };
369 
370 static KVMCPUConfig kvm_cboz_blocksize = {
371     .name = "cboz_blocksize",
372     .offset = CPU_CFG_OFFSET(cboz_blocksize),
373     .kvm_reg_id = KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)
374 };
375 
376 static KVMCPUConfig kvm_v_vlenb = {
377     .name = "vlenb",
378     .offset = CPU_CFG_OFFSET(vlenb),
379     .kvm_reg_id =  KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_VECTOR |
380                    KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)
381 };
382 
383 static void kvm_riscv_update_cpu_cfg_isa_ext(RISCVCPU *cpu, CPUState *cs)
384 {
385     CPURISCVState *env = &cpu->env;
386     uint64_t id, reg;
387     int i, ret;
388 
389     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
390         KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
391 
392         if (!multi_ext_cfg->user_set) {
393             continue;
394         }
395 
396         id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
397                                     multi_ext_cfg->kvm_reg_id);
398         reg = kvm_cpu_cfg_get(cpu, multi_ext_cfg);
399         ret = kvm_set_one_reg(cs, id, &reg);
400         if (ret != 0) {
401             error_report("Unable to %s extension %s in KVM, error %d",
402                          reg ? "enable" : "disable",
403                          multi_ext_cfg->name, ret);
404             exit(EXIT_FAILURE);
405         }
406     }
407 }
408 
409 static void cpu_get_cfg_unavailable(Object *obj, Visitor *v,
410                                     const char *name,
411                                     void *opaque, Error **errp)
412 {
413     bool value = false;
414 
415     visit_type_bool(v, name, &value, errp);
416 }
417 
418 static void cpu_set_cfg_unavailable(Object *obj, Visitor *v,
419                                     const char *name,
420                                     void *opaque, Error **errp)
421 {
422     const char *propname = opaque;
423     bool value;
424 
425     if (!visit_type_bool(v, name, &value, errp)) {
426         return;
427     }
428 
429     if (value) {
430         error_setg(errp, "'%s' is not available with KVM",
431                    propname);
432     }
433 }
434 
435 static void riscv_cpu_add_kvm_unavail_prop(Object *obj, const char *prop_name)
436 {
437     /* Check if KVM created the property already */
438     if (object_property_find(obj, prop_name)) {
439         return;
440     }
441 
442     /*
443      * Set the default to disabled for every extension
444      * unknown to KVM and error out if the user attempts
445      * to enable any of them.
446      */
447     object_property_add(obj, prop_name, "bool",
448                         cpu_get_cfg_unavailable,
449                         cpu_set_cfg_unavailable,
450                         NULL, (void *)prop_name);
451 }
452 
453 static void riscv_cpu_add_kvm_unavail_prop_array(Object *obj,
454                                         const RISCVCPUMultiExtConfig *array)
455 {
456     const RISCVCPUMultiExtConfig *prop;
457 
458     g_assert(array);
459 
460     for (prop = array; prop && prop->name; prop++) {
461         riscv_cpu_add_kvm_unavail_prop(obj, prop->name);
462     }
463 }
464 
465 static void kvm_riscv_add_cpu_user_properties(Object *cpu_obj)
466 {
467     int i;
468 
469     riscv_add_satp_mode_properties(cpu_obj);
470 
471     for (i = 0; i < ARRAY_SIZE(kvm_misa_ext_cfgs); i++) {
472         KVMCPUConfig *misa_cfg = &kvm_misa_ext_cfgs[i];
473         int bit = misa_cfg->offset;
474 
475         misa_cfg->name = riscv_get_misa_ext_name(bit);
476         misa_cfg->description = riscv_get_misa_ext_description(bit);
477 
478         object_property_add(cpu_obj, misa_cfg->name, "bool",
479                             kvm_cpu_get_misa_ext_cfg,
480                             kvm_cpu_set_misa_ext_cfg,
481                             NULL, misa_cfg);
482         object_property_set_description(cpu_obj, misa_cfg->name,
483                                         misa_cfg->description);
484     }
485 
486     for (i = 0; misa_bits[i] != 0; i++) {
487         const char *ext_name = riscv_get_misa_ext_name(misa_bits[i]);
488         riscv_cpu_add_kvm_unavail_prop(cpu_obj, ext_name);
489     }
490 
491     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
492         KVMCPUConfig *multi_cfg = &kvm_multi_ext_cfgs[i];
493 
494         object_property_add(cpu_obj, multi_cfg->name, "bool",
495                             kvm_cpu_get_multi_ext_cfg,
496                             kvm_cpu_set_multi_ext_cfg,
497                             NULL, multi_cfg);
498     }
499 
500     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_extensions);
501     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_vendor_exts);
502     riscv_cpu_add_kvm_unavail_prop_array(cpu_obj, riscv_cpu_experimental_exts);
503 
504    /* We don't have the needed KVM support for profiles */
505     for (i = 0; riscv_profiles[i] != NULL; i++) {
506         riscv_cpu_add_kvm_unavail_prop(cpu_obj, riscv_profiles[i]->name);
507     }
508 }
509 
510 static int kvm_riscv_get_regs_core(CPUState *cs)
511 {
512     int ret = 0;
513     int i;
514     target_ulong reg;
515     CPURISCVState *env = &RISCV_CPU(cs)->env;
516 
517     ret = kvm_get_one_reg(cs, RISCV_CORE_REG(env, regs.pc), &reg);
518     if (ret) {
519         return ret;
520     }
521     env->pc = reg;
522 
523     for (i = 1; i < 32; i++) {
524         uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i);
525         ret = kvm_get_one_reg(cs, id, &reg);
526         if (ret) {
527             return ret;
528         }
529         env->gpr[i] = reg;
530     }
531 
532     return ret;
533 }
534 
535 static int kvm_riscv_put_regs_core(CPUState *cs)
536 {
537     int ret = 0;
538     int i;
539     target_ulong reg;
540     CPURISCVState *env = &RISCV_CPU(cs)->env;
541 
542     reg = env->pc;
543     ret = kvm_set_one_reg(cs, RISCV_CORE_REG(env, regs.pc), &reg);
544     if (ret) {
545         return ret;
546     }
547 
548     for (i = 1; i < 32; i++) {
549         uint64_t id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CORE, i);
550         reg = env->gpr[i];
551         ret = kvm_set_one_reg(cs, id, &reg);
552         if (ret) {
553             return ret;
554         }
555     }
556 
557     return ret;
558 }
559 
560 static int kvm_riscv_get_regs_csr(CPUState *cs)
561 {
562     CPURISCVState *env = &RISCV_CPU(cs)->env;
563 
564     KVM_RISCV_GET_CSR(cs, env, sstatus, env->mstatus);
565     KVM_RISCV_GET_CSR(cs, env, sie, env->mie);
566     KVM_RISCV_GET_CSR(cs, env, stvec, env->stvec);
567     KVM_RISCV_GET_CSR(cs, env, sscratch, env->sscratch);
568     KVM_RISCV_GET_CSR(cs, env, sepc, env->sepc);
569     KVM_RISCV_GET_CSR(cs, env, scause, env->scause);
570     KVM_RISCV_GET_CSR(cs, env, stval, env->stval);
571     KVM_RISCV_GET_CSR(cs, env, sip, env->mip);
572     KVM_RISCV_GET_CSR(cs, env, satp, env->satp);
573 
574     return 0;
575 }
576 
577 static int kvm_riscv_put_regs_csr(CPUState *cs)
578 {
579     CPURISCVState *env = &RISCV_CPU(cs)->env;
580 
581     KVM_RISCV_SET_CSR(cs, env, sstatus, env->mstatus);
582     KVM_RISCV_SET_CSR(cs, env, sie, env->mie);
583     KVM_RISCV_SET_CSR(cs, env, stvec, env->stvec);
584     KVM_RISCV_SET_CSR(cs, env, sscratch, env->sscratch);
585     KVM_RISCV_SET_CSR(cs, env, sepc, env->sepc);
586     KVM_RISCV_SET_CSR(cs, env, scause, env->scause);
587     KVM_RISCV_SET_CSR(cs, env, stval, env->stval);
588     KVM_RISCV_SET_CSR(cs, env, sip, env->mip);
589     KVM_RISCV_SET_CSR(cs, env, satp, env->satp);
590 
591     return 0;
592 }
593 
594 static int kvm_riscv_get_regs_fp(CPUState *cs)
595 {
596     int ret = 0;
597     int i;
598     CPURISCVState *env = &RISCV_CPU(cs)->env;
599 
600     if (riscv_has_ext(env, RVD)) {
601         uint64_t reg;
602         for (i = 0; i < 32; i++) {
603             ret = kvm_get_one_reg(cs, RISCV_FP_D_REG(i), &reg);
604             if (ret) {
605                 return ret;
606             }
607             env->fpr[i] = reg;
608         }
609         return ret;
610     }
611 
612     if (riscv_has_ext(env, RVF)) {
613         uint32_t reg;
614         for (i = 0; i < 32; i++) {
615             ret = kvm_get_one_reg(cs, RISCV_FP_F_REG(i), &reg);
616             if (ret) {
617                 return ret;
618             }
619             env->fpr[i] = reg;
620         }
621         return ret;
622     }
623 
624     return ret;
625 }
626 
627 static int kvm_riscv_put_regs_fp(CPUState *cs)
628 {
629     int ret = 0;
630     int i;
631     CPURISCVState *env = &RISCV_CPU(cs)->env;
632 
633     if (riscv_has_ext(env, RVD)) {
634         uint64_t reg;
635         for (i = 0; i < 32; i++) {
636             reg = env->fpr[i];
637             ret = kvm_set_one_reg(cs, RISCV_FP_D_REG(i), &reg);
638             if (ret) {
639                 return ret;
640             }
641         }
642         return ret;
643     }
644 
645     if (riscv_has_ext(env, RVF)) {
646         uint32_t reg;
647         for (i = 0; i < 32; i++) {
648             reg = env->fpr[i];
649             ret = kvm_set_one_reg(cs, RISCV_FP_F_REG(i), &reg);
650             if (ret) {
651                 return ret;
652             }
653         }
654         return ret;
655     }
656 
657     return ret;
658 }
659 
660 static void kvm_riscv_get_regs_timer(CPUState *cs)
661 {
662     CPURISCVState *env = &RISCV_CPU(cs)->env;
663 
664     if (env->kvm_timer_dirty) {
665         return;
666     }
667 
668     KVM_RISCV_GET_TIMER(cs, time, env->kvm_timer_time);
669     KVM_RISCV_GET_TIMER(cs, compare, env->kvm_timer_compare);
670     KVM_RISCV_GET_TIMER(cs, state, env->kvm_timer_state);
671     KVM_RISCV_GET_TIMER(cs, frequency, env->kvm_timer_frequency);
672 
673     env->kvm_timer_dirty = true;
674 }
675 
676 static void kvm_riscv_put_regs_timer(CPUState *cs)
677 {
678     uint64_t reg;
679     CPURISCVState *env = &RISCV_CPU(cs)->env;
680 
681     if (!env->kvm_timer_dirty) {
682         return;
683     }
684 
685     KVM_RISCV_SET_TIMER(cs, time, env->kvm_timer_time);
686     KVM_RISCV_SET_TIMER(cs, compare, env->kvm_timer_compare);
687 
688     /*
689      * To set register of RISCV_TIMER_REG(state) will occur a error from KVM
690      * on env->kvm_timer_state == 0, It's better to adapt in KVM, but it
691      * doesn't matter that adaping in QEMU now.
692      * TODO If KVM changes, adapt here.
693      */
694     if (env->kvm_timer_state) {
695         KVM_RISCV_SET_TIMER(cs, state, env->kvm_timer_state);
696     }
697 
698     /*
699      * For now, migration will not work between Hosts with different timer
700      * frequency. Therefore, we should check whether they are the same here
701      * during the migration.
702      */
703     if (migration_is_running(migrate_get_current()->state)) {
704         KVM_RISCV_GET_TIMER(cs, frequency, reg);
705         if (reg != env->kvm_timer_frequency) {
706             error_report("Dst Hosts timer frequency != Src Hosts");
707         }
708     }
709 
710     env->kvm_timer_dirty = false;
711 }
712 
713 static int kvm_riscv_get_regs_vector(CPUState *cs)
714 {
715     RISCVCPU *cpu = RISCV_CPU(cs);
716     CPURISCVState *env = &cpu->env;
717     target_ulong reg;
718     uint64_t vreg_id;
719     int vreg_idx, ret = 0;
720 
721     if (!riscv_has_ext(env, RVV)) {
722         return 0;
723     }
724 
725     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), &reg);
726     if (ret) {
727         return ret;
728     }
729     env->vstart = reg;
730 
731     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), &reg);
732     if (ret) {
733         return ret;
734     }
735     env->vl = reg;
736 
737     ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), &reg);
738     if (ret) {
739         return ret;
740     }
741     env->vtype = reg;
742 
743     if (kvm_v_vlenb.supported) {
744         ret = kvm_get_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), &reg);
745         if (ret) {
746             return ret;
747         }
748         cpu->cfg.vlenb = reg;
749 
750         for (int i = 0; i < 32; i++) {
751             /*
752              * vreg[] is statically allocated using RV_VLEN_MAX.
753              * Use it instead of vlenb to calculate vreg_idx for
754              * simplicity.
755              */
756             vreg_idx = i * RV_VLEN_MAX / 64;
757             vreg_id = kvm_riscv_vector_reg_id(cpu, i);
758 
759             ret = kvm_get_one_reg(cs, vreg_id, &env->vreg[vreg_idx]);
760             if (ret) {
761                 return ret;
762             }
763         }
764     }
765 
766     return 0;
767 }
768 
769 static int kvm_riscv_put_regs_vector(CPUState *cs)
770 {
771     RISCVCPU *cpu = RISCV_CPU(cs);
772     CPURISCVState *env = &cpu->env;
773     target_ulong reg;
774     uint64_t vreg_id;
775     int vreg_idx, ret = 0;
776 
777     if (!riscv_has_ext(env, RVV)) {
778         return 0;
779     }
780 
781     reg = env->vstart;
782     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vstart), &reg);
783     if (ret) {
784         return ret;
785     }
786 
787     reg = env->vl;
788     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vl), &reg);
789     if (ret) {
790         return ret;
791     }
792 
793     reg = env->vtype;
794     ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vtype), &reg);
795     if (ret) {
796         return ret;
797     }
798 
799     if (kvm_v_vlenb.supported) {
800         reg = cpu->cfg.vlenb;
801         ret = kvm_set_one_reg(cs, RISCV_VECTOR_CSR_REG(env, vlenb), &reg);
802 
803         for (int i = 0; i < 32; i++) {
804             /*
805              * vreg[] is statically allocated using RV_VLEN_MAX.
806              * Use it instead of vlenb to calculate vreg_idx for
807              * simplicity.
808              */
809             vreg_idx = i * RV_VLEN_MAX / 64;
810             vreg_id = kvm_riscv_vector_reg_id(cpu, i);
811 
812             ret = kvm_set_one_reg(cs, vreg_id, &env->vreg[vreg_idx]);
813             if (ret) {
814                 return ret;
815             }
816         }
817     }
818 
819     return ret;
820 }
821 
822 typedef struct KVMScratchCPU {
823     int kvmfd;
824     int vmfd;
825     int cpufd;
826 } KVMScratchCPU;
827 
828 /*
829  * Heavily inspired by kvm_arm_create_scratch_host_vcpu()
830  * from target/arm/kvm.c.
831  */
832 static bool kvm_riscv_create_scratch_vcpu(KVMScratchCPU *scratch)
833 {
834     int kvmfd = -1, vmfd = -1, cpufd = -1;
835 
836     kvmfd = qemu_open_old("/dev/kvm", O_RDWR);
837     if (kvmfd < 0) {
838         goto err;
839     }
840     do {
841         vmfd = ioctl(kvmfd, KVM_CREATE_VM, 0);
842     } while (vmfd == -1 && errno == EINTR);
843     if (vmfd < 0) {
844         goto err;
845     }
846     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
847     if (cpufd < 0) {
848         goto err;
849     }
850 
851     scratch->kvmfd =  kvmfd;
852     scratch->vmfd = vmfd;
853     scratch->cpufd = cpufd;
854 
855     return true;
856 
857  err:
858     if (cpufd >= 0) {
859         close(cpufd);
860     }
861     if (vmfd >= 0) {
862         close(vmfd);
863     }
864     if (kvmfd >= 0) {
865         close(kvmfd);
866     }
867 
868     return false;
869 }
870 
871 static void kvm_riscv_destroy_scratch_vcpu(KVMScratchCPU *scratch)
872 {
873     close(scratch->cpufd);
874     close(scratch->vmfd);
875     close(scratch->kvmfd);
876 }
877 
878 static void kvm_riscv_init_machine_ids(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
879 {
880     CPURISCVState *env = &cpu->env;
881     struct kvm_one_reg reg;
882     int ret;
883 
884     reg.id = RISCV_CONFIG_REG(env, mvendorid);
885     reg.addr = (uint64_t)&cpu->cfg.mvendorid;
886     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
887     if (ret != 0) {
888         error_report("Unable to retrieve mvendorid from host, error %d", ret);
889     }
890 
891     reg.id = RISCV_CONFIG_REG(env, marchid);
892     reg.addr = (uint64_t)&cpu->cfg.marchid;
893     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
894     if (ret != 0) {
895         error_report("Unable to retrieve marchid from host, error %d", ret);
896     }
897 
898     reg.id = RISCV_CONFIG_REG(env, mimpid);
899     reg.addr = (uint64_t)&cpu->cfg.mimpid;
900     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
901     if (ret != 0) {
902         error_report("Unable to retrieve mimpid from host, error %d", ret);
903     }
904 }
905 
906 static void kvm_riscv_init_misa_ext_mask(RISCVCPU *cpu,
907                                          KVMScratchCPU *kvmcpu)
908 {
909     CPURISCVState *env = &cpu->env;
910     struct kvm_one_reg reg;
911     int ret;
912 
913     reg.id = RISCV_CONFIG_REG(env, isa);
914     reg.addr = (uint64_t)&env->misa_ext_mask;
915     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
916 
917     if (ret) {
918         error_report("Unable to fetch ISA register from KVM, "
919                      "error %d", ret);
920         kvm_riscv_destroy_scratch_vcpu(kvmcpu);
921         exit(EXIT_FAILURE);
922     }
923 
924     env->misa_ext = env->misa_ext_mask;
925 }
926 
927 static void kvm_riscv_read_cbomz_blksize(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
928                                          KVMCPUConfig *cbomz_cfg)
929 {
930     CPURISCVState *env = &cpu->env;
931     struct kvm_one_reg reg;
932     int ret;
933 
934     reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
935                                     cbomz_cfg->kvm_reg_id);
936     reg.addr = (uint64_t)kvmconfig_get_cfg_addr(cpu, cbomz_cfg);
937     ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
938     if (ret != 0) {
939         error_report("Unable to read KVM reg %s, error %d",
940                      cbomz_cfg->name, ret);
941         exit(EXIT_FAILURE);
942     }
943 }
944 
945 static void kvm_riscv_read_multiext_legacy(RISCVCPU *cpu,
946                                            KVMScratchCPU *kvmcpu)
947 {
948     CPURISCVState *env = &cpu->env;
949     uint64_t val;
950     int i, ret;
951 
952     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
953         KVMCPUConfig *multi_ext_cfg = &kvm_multi_ext_cfgs[i];
954         struct kvm_one_reg reg;
955 
956         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_ISA_EXT,
957                                         multi_ext_cfg->kvm_reg_id);
958         reg.addr = (uint64_t)&val;
959         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
960         if (ret != 0) {
961             if (errno == EINVAL) {
962                 /* Silently default to 'false' if KVM does not support it. */
963                 multi_ext_cfg->supported = false;
964                 val = false;
965             } else {
966                 error_report("Unable to read ISA_EXT KVM register %s: %s",
967                              multi_ext_cfg->name, strerror(errno));
968                 exit(EXIT_FAILURE);
969             }
970         } else {
971             multi_ext_cfg->supported = true;
972         }
973 
974         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
975     }
976 
977     if (cpu->cfg.ext_zicbom) {
978         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
979     }
980 
981     if (cpu->cfg.ext_zicboz) {
982         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
983     }
984 }
985 
986 static int uint64_cmp(const void *a, const void *b)
987 {
988     uint64_t val1 = *(const uint64_t *)a;
989     uint64_t val2 = *(const uint64_t *)b;
990 
991     if (val1 < val2) {
992         return -1;
993     }
994 
995     if (val1 > val2) {
996         return 1;
997     }
998 
999     return 0;
1000 }
1001 
1002 static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu,
1003                                  struct kvm_reg_list *reglist)
1004 {
1005     struct kvm_one_reg reg;
1006     struct kvm_reg_list *reg_search;
1007     uint64_t val;
1008     int ret;
1009 
1010     reg_search = bsearch(&kvm_v_vlenb.kvm_reg_id, reglist->reg, reglist->n,
1011                          sizeof(uint64_t), uint64_cmp);
1012 
1013     if (reg_search) {
1014         reg.id = kvm_v_vlenb.kvm_reg_id;
1015         reg.addr = (uint64_t)&val;
1016 
1017         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
1018         if (ret != 0) {
1019             error_report("Unable to read vlenb register, error code: %s",
1020                          strerrorname_np(errno));
1021             exit(EXIT_FAILURE);
1022         }
1023 
1024         kvm_v_vlenb.supported = true;
1025         cpu->cfg.vlenb = val;
1026     }
1027 }
1028 
1029 static void kvm_riscv_init_multiext_cfg(RISCVCPU *cpu, KVMScratchCPU *kvmcpu)
1030 {
1031     KVMCPUConfig *multi_ext_cfg;
1032     struct kvm_one_reg reg;
1033     struct kvm_reg_list rl_struct;
1034     struct kvm_reg_list *reglist;
1035     uint64_t val, reg_id, *reg_search;
1036     int i, ret;
1037 
1038     rl_struct.n = 0;
1039     ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, &rl_struct);
1040 
1041     /*
1042      * If KVM_GET_REG_LIST isn't supported we'll get errno 22
1043      * (EINVAL). Use read_legacy() in this case.
1044      */
1045     if (errno == EINVAL) {
1046         return kvm_riscv_read_multiext_legacy(cpu, kvmcpu);
1047     } else if (errno != E2BIG) {
1048         /*
1049          * E2BIG is an expected error message for the API since we
1050          * don't know the number of registers. The right amount will
1051          * be written in rl_struct.n.
1052          *
1053          * Error out if we get any other errno.
1054          */
1055         error_report("Error when accessing get-reg-list: %s",
1056                      strerror(errno));
1057         exit(EXIT_FAILURE);
1058     }
1059 
1060     reglist = g_malloc(sizeof(struct kvm_reg_list) +
1061                        rl_struct.n * sizeof(uint64_t));
1062     reglist->n = rl_struct.n;
1063     ret = ioctl(kvmcpu->cpufd, KVM_GET_REG_LIST, reglist);
1064     if (ret) {
1065         error_report("Error when reading KVM_GET_REG_LIST: %s",
1066                      strerror(errno));
1067         exit(EXIT_FAILURE);
1068     }
1069 
1070     /* sort reglist to use bsearch() */
1071     qsort(&reglist->reg, reglist->n, sizeof(uint64_t), uint64_cmp);
1072 
1073     for (i = 0; i < ARRAY_SIZE(kvm_multi_ext_cfgs); i++) {
1074         multi_ext_cfg = &kvm_multi_ext_cfgs[i];
1075         reg_id = kvm_riscv_reg_id_ulong(&cpu->env, KVM_REG_RISCV_ISA_EXT,
1076                                         multi_ext_cfg->kvm_reg_id);
1077         reg_search = bsearch(&reg_id, reglist->reg, reglist->n,
1078                              sizeof(uint64_t), uint64_cmp);
1079         if (!reg_search) {
1080             continue;
1081         }
1082 
1083         reg.id = reg_id;
1084         reg.addr = (uint64_t)&val;
1085         ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, &reg);
1086         if (ret != 0) {
1087             error_report("Unable to read ISA_EXT KVM register %s: %s",
1088                          multi_ext_cfg->name, strerror(errno));
1089             exit(EXIT_FAILURE);
1090         }
1091 
1092         multi_ext_cfg->supported = true;
1093         kvm_cpu_cfg_set(cpu, multi_ext_cfg, val);
1094     }
1095 
1096     if (cpu->cfg.ext_zicbom) {
1097         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cbom_blocksize);
1098     }
1099 
1100     if (cpu->cfg.ext_zicboz) {
1101         kvm_riscv_read_cbomz_blksize(cpu, kvmcpu, &kvm_cboz_blocksize);
1102     }
1103 
1104     if (riscv_has_ext(&cpu->env, RVV)) {
1105         kvm_riscv_read_vlenb(cpu, kvmcpu, reglist);
1106     }
1107 }
1108 
1109 static void riscv_init_kvm_registers(Object *cpu_obj)
1110 {
1111     RISCVCPU *cpu = RISCV_CPU(cpu_obj);
1112     KVMScratchCPU kvmcpu;
1113 
1114     if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) {
1115         return;
1116     }
1117 
1118     kvm_riscv_init_machine_ids(cpu, &kvmcpu);
1119     kvm_riscv_init_misa_ext_mask(cpu, &kvmcpu);
1120     kvm_riscv_init_multiext_cfg(cpu, &kvmcpu);
1121 
1122     kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
1123 }
1124 
1125 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
1126     KVM_CAP_LAST_INFO
1127 };
1128 
1129 int kvm_arch_get_registers(CPUState *cs)
1130 {
1131     int ret = 0;
1132 
1133     ret = kvm_riscv_get_regs_core(cs);
1134     if (ret) {
1135         return ret;
1136     }
1137 
1138     ret = kvm_riscv_get_regs_csr(cs);
1139     if (ret) {
1140         return ret;
1141     }
1142 
1143     ret = kvm_riscv_get_regs_fp(cs);
1144     if (ret) {
1145         return ret;
1146     }
1147 
1148     ret = kvm_riscv_get_regs_vector(cs);
1149     if (ret) {
1150         return ret;
1151     }
1152 
1153     return ret;
1154 }
1155 
1156 int kvm_riscv_sync_mpstate_to_kvm(RISCVCPU *cpu, int state)
1157 {
1158     if (cap_has_mp_state) {
1159         struct kvm_mp_state mp_state = {
1160             .mp_state = state
1161         };
1162 
1163         int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state);
1164         if (ret) {
1165             fprintf(stderr, "%s: failed to sync MP_STATE %d/%s\n",
1166                     __func__, ret, strerror(-ret));
1167             return -1;
1168         }
1169     }
1170 
1171     return 0;
1172 }
1173 
1174 int kvm_arch_put_registers(CPUState *cs, int level)
1175 {
1176     int ret = 0;
1177 
1178     ret = kvm_riscv_put_regs_core(cs);
1179     if (ret) {
1180         return ret;
1181     }
1182 
1183     ret = kvm_riscv_put_regs_csr(cs);
1184     if (ret) {
1185         return ret;
1186     }
1187 
1188     ret = kvm_riscv_put_regs_fp(cs);
1189     if (ret) {
1190         return ret;
1191     }
1192 
1193     ret = kvm_riscv_put_regs_vector(cs);
1194     if (ret) {
1195         return ret;
1196     }
1197 
1198     if (KVM_PUT_RESET_STATE == level) {
1199         RISCVCPU *cpu = RISCV_CPU(cs);
1200         if (cs->cpu_index == 0) {
1201             ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_RUNNABLE);
1202         } else {
1203             ret = kvm_riscv_sync_mpstate_to_kvm(cpu, KVM_MP_STATE_STOPPED);
1204         }
1205         if (ret) {
1206             return ret;
1207         }
1208     }
1209 
1210     return ret;
1211 }
1212 
1213 int kvm_arch_release_virq_post(int virq)
1214 {
1215     return 0;
1216 }
1217 
1218 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
1219                              uint64_t address, uint32_t data, PCIDevice *dev)
1220 {
1221     return 0;
1222 }
1223 
1224 int kvm_arch_destroy_vcpu(CPUState *cs)
1225 {
1226     return 0;
1227 }
1228 
1229 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
1230 {
1231     return cpu->cpu_index;
1232 }
1233 
1234 static void kvm_riscv_vm_state_change(void *opaque, bool running,
1235                                       RunState state)
1236 {
1237     CPUState *cs = opaque;
1238 
1239     if (running) {
1240         kvm_riscv_put_regs_timer(cs);
1241     } else {
1242         kvm_riscv_get_regs_timer(cs);
1243     }
1244 }
1245 
1246 void kvm_arch_init_irq_routing(KVMState *s)
1247 {
1248 }
1249 
1250 static int kvm_vcpu_set_machine_ids(RISCVCPU *cpu, CPUState *cs)
1251 {
1252     CPURISCVState *env = &cpu->env;
1253     target_ulong reg;
1254     uint64_t id;
1255     int ret;
1256 
1257     id = RISCV_CONFIG_REG(env, mvendorid);
1258     /*
1259      * cfg.mvendorid is an uint32 but a target_ulong will
1260      * be written. Assign it to a target_ulong var to avoid
1261      * writing pieces of other cpu->cfg fields in the reg.
1262      */
1263     reg = cpu->cfg.mvendorid;
1264     ret = kvm_set_one_reg(cs, id, &reg);
1265     if (ret != 0) {
1266         return ret;
1267     }
1268 
1269     id = RISCV_CONFIG_REG(env, marchid);
1270     ret = kvm_set_one_reg(cs, id, &cpu->cfg.marchid);
1271     if (ret != 0) {
1272         return ret;
1273     }
1274 
1275     id = RISCV_CONFIG_REG(env, mimpid);
1276     ret = kvm_set_one_reg(cs, id, &cpu->cfg.mimpid);
1277 
1278     return ret;
1279 }
1280 
1281 int kvm_arch_init_vcpu(CPUState *cs)
1282 {
1283     int ret = 0;
1284     RISCVCPU *cpu = RISCV_CPU(cs);
1285 
1286     qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs);
1287 
1288     if (!object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST)) {
1289         ret = kvm_vcpu_set_machine_ids(cpu, cs);
1290         if (ret != 0) {
1291             return ret;
1292         }
1293     }
1294 
1295     kvm_riscv_update_cpu_misa_ext(cpu, cs);
1296     kvm_riscv_update_cpu_cfg_isa_ext(cpu, cs);
1297 
1298     return ret;
1299 }
1300 
1301 int kvm_arch_msi_data_to_gsi(uint32_t data)
1302 {
1303     abort();
1304 }
1305 
1306 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
1307                                 int vector, PCIDevice *dev)
1308 {
1309     return 0;
1310 }
1311 
1312 int kvm_arch_get_default_type(MachineState *ms)
1313 {
1314     return 0;
1315 }
1316 
1317 int kvm_arch_init(MachineState *ms, KVMState *s)
1318 {
1319     cap_has_mp_state = kvm_check_extension(s, KVM_CAP_MP_STATE);
1320     return 0;
1321 }
1322 
1323 int kvm_arch_irqchip_create(KVMState *s)
1324 {
1325     if (kvm_kernel_irqchip_split()) {
1326         error_report("-machine kernel_irqchip=split is not supported on RISC-V.");
1327         exit(1);
1328     }
1329 
1330     /*
1331      * We can create the VAIA using the newer device control API.
1332      */
1333     return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL);
1334 }
1335 
1336 int kvm_arch_process_async_events(CPUState *cs)
1337 {
1338     return 0;
1339 }
1340 
1341 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1342 {
1343 }
1344 
1345 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1346 {
1347     return MEMTXATTRS_UNSPECIFIED;
1348 }
1349 
1350 bool kvm_arch_stop_on_emulation_error(CPUState *cs)
1351 {
1352     return true;
1353 }
1354 
1355 static int kvm_riscv_handle_sbi(CPUState *cs, struct kvm_run *run)
1356 {
1357     int ret = 0;
1358     unsigned char ch;
1359     switch (run->riscv_sbi.extension_id) {
1360     case SBI_EXT_0_1_CONSOLE_PUTCHAR:
1361         ch = run->riscv_sbi.args[0];
1362         qemu_chr_fe_write(serial_hd(0)->be, &ch, sizeof(ch));
1363         break;
1364     case SBI_EXT_0_1_CONSOLE_GETCHAR:
1365         ret = qemu_chr_fe_read_all(serial_hd(0)->be, &ch, sizeof(ch));
1366         if (ret == sizeof(ch)) {
1367             run->riscv_sbi.ret[0] = ch;
1368         } else {
1369             run->riscv_sbi.ret[0] = -1;
1370         }
1371         ret = 0;
1372         break;
1373     default:
1374         qemu_log_mask(LOG_UNIMP,
1375                       "%s: un-handled SBI EXIT, specific reasons is %lu\n",
1376                       __func__, run->riscv_sbi.extension_id);
1377         ret = -1;
1378         break;
1379     }
1380     return ret;
1381 }
1382 
1383 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1384 {
1385     int ret = 0;
1386     switch (run->exit_reason) {
1387     case KVM_EXIT_RISCV_SBI:
1388         ret = kvm_riscv_handle_sbi(cs, run);
1389         break;
1390     default:
1391         qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
1392                       __func__, run->exit_reason);
1393         ret = -1;
1394         break;
1395     }
1396     return ret;
1397 }
1398 
1399 void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
1400 {
1401     CPURISCVState *env = &cpu->env;
1402     int i;
1403 
1404     if (!kvm_enabled()) {
1405         return;
1406     }
1407     for (i = 0; i < 32; i++) {
1408         env->gpr[i] = 0;
1409     }
1410     env->pc = cpu->env.kernel_addr;
1411     env->gpr[10] = kvm_arch_vcpu_id(CPU(cpu)); /* a0 */
1412     env->gpr[11] = cpu->env.fdt_addr;          /* a1 */
1413     env->satp = 0;
1414     env->mie = 0;
1415     env->stvec = 0;
1416     env->sscratch = 0;
1417     env->sepc = 0;
1418     env->scause = 0;
1419     env->stval = 0;
1420     env->mip = 0;
1421 }
1422 
1423 void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
1424 {
1425     int ret;
1426     unsigned virq = level ? KVM_INTERRUPT_SET : KVM_INTERRUPT_UNSET;
1427 
1428     if (irq != IRQ_S_EXT) {
1429         perror("kvm riscv set irq != IRQ_S_EXT\n");
1430         abort();
1431     }
1432 
1433     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1434     if (ret < 0) {
1435         perror("Set irq failed");
1436         abort();
1437     }
1438 }
1439 
1440 bool kvm_arch_cpu_check_are_resettable(void)
1441 {
1442     return true;
1443 }
1444 
1445 static int aia_mode;
1446 
1447 static const char *kvm_aia_mode_str(uint64_t mode)
1448 {
1449     switch (mode) {
1450     case KVM_DEV_RISCV_AIA_MODE_EMUL:
1451         return "emul";
1452     case KVM_DEV_RISCV_AIA_MODE_HWACCEL:
1453         return "hwaccel";
1454     case KVM_DEV_RISCV_AIA_MODE_AUTO:
1455     default:
1456         return "auto";
1457     };
1458 }
1459 
1460 static char *riscv_get_kvm_aia(Object *obj, Error **errp)
1461 {
1462     return g_strdup(kvm_aia_mode_str(aia_mode));
1463 }
1464 
1465 static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp)
1466 {
1467     if (!strcmp(val, "emul")) {
1468         aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL;
1469     } else if (!strcmp(val, "hwaccel")) {
1470         aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL;
1471     } else if (!strcmp(val, "auto")) {
1472         aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO;
1473     } else {
1474         error_setg(errp, "Invalid KVM AIA mode");
1475         error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n");
1476     }
1477 }
1478 
1479 void kvm_arch_accel_class_init(ObjectClass *oc)
1480 {
1481     object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia,
1482                                   riscv_set_kvm_aia);
1483     object_class_property_set_description(oc, "riscv-aia",
1484                                           "Set KVM AIA mode. Valid values are "
1485                                           "emul, hwaccel, and auto. Default "
1486                                           "is auto.");
1487     object_property_set_default_str(object_class_property_find(oc, "riscv-aia"),
1488                                     "auto");
1489 }
1490 
1491 void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift,
1492                           uint64_t aia_irq_num, uint64_t aia_msi_num,
1493                           uint64_t aplic_base, uint64_t imsic_base,
1494                           uint64_t guest_num)
1495 {
1496     int ret, i;
1497     int aia_fd = -1;
1498     uint64_t default_aia_mode;
1499     uint64_t socket_count = riscv_socket_count(machine);
1500     uint64_t max_hart_per_socket = 0;
1501     uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr;
1502     uint64_t socket_bits, hart_bits, guest_bits;
1503 
1504     aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false);
1505 
1506     if (aia_fd < 0) {
1507         error_report("Unable to create in-kernel irqchip");
1508         exit(1);
1509     }
1510 
1511     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1512                             KVM_DEV_RISCV_AIA_CONFIG_MODE,
1513                             &default_aia_mode, false, NULL);
1514     if (ret < 0) {
1515         error_report("KVM AIA: failed to get current KVM AIA mode");
1516         exit(1);
1517     }
1518     qemu_log("KVM AIA: default mode is %s\n",
1519              kvm_aia_mode_str(default_aia_mode));
1520 
1521     if (default_aia_mode != aia_mode) {
1522         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1523                                 KVM_DEV_RISCV_AIA_CONFIG_MODE,
1524                                 &aia_mode, true, NULL);
1525         if (ret < 0)
1526             warn_report("KVM AIA: failed to set KVM AIA mode");
1527         else
1528             qemu_log("KVM AIA: set current mode to %s\n",
1529                      kvm_aia_mode_str(aia_mode));
1530     }
1531 
1532     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1533                             KVM_DEV_RISCV_AIA_CONFIG_SRCS,
1534                             &aia_irq_num, true, NULL);
1535     if (ret < 0) {
1536         error_report("KVM AIA: failed to set number of input irq lines");
1537         exit(1);
1538     }
1539 
1540     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1541                             KVM_DEV_RISCV_AIA_CONFIG_IDS,
1542                             &aia_msi_num, true, NULL);
1543     if (ret < 0) {
1544         error_report("KVM AIA: failed to set number of msi");
1545         exit(1);
1546     }
1547 
1548 
1549     if (socket_count > 1) {
1550         socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1;
1551         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1552                                 KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS,
1553                                 &socket_bits, true, NULL);
1554         if (ret < 0) {
1555             error_report("KVM AIA: failed to set group_bits");
1556             exit(1);
1557         }
1558 
1559         ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1560                                 KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT,
1561                                 &group_shift, true, NULL);
1562         if (ret < 0) {
1563             error_report("KVM AIA: failed to set group_shift");
1564             exit(1);
1565         }
1566     }
1567 
1568     guest_bits = guest_num == 0 ? 0 :
1569                  find_last_bit(&guest_num, BITS_PER_LONG) + 1;
1570     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1571                             KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS,
1572                             &guest_bits, true, NULL);
1573     if (ret < 0) {
1574         error_report("KVM AIA: failed to set guest_bits");
1575         exit(1);
1576     }
1577 
1578     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
1579                             KVM_DEV_RISCV_AIA_ADDR_APLIC,
1580                             &aplic_base, true, NULL);
1581     if (ret < 0) {
1582         error_report("KVM AIA: failed to set the base address of APLIC");
1583         exit(1);
1584     }
1585 
1586     for (socket = 0; socket < socket_count; socket++) {
1587         socket_imsic_base = imsic_base + socket * (1U << group_shift);
1588         hart_count = riscv_socket_hart_count(machine, socket);
1589         base_hart = riscv_socket_first_hartid(machine, socket);
1590 
1591         if (max_hart_per_socket < hart_count) {
1592             max_hart_per_socket = hart_count;
1593         }
1594 
1595         for (i = 0; i < hart_count; i++) {
1596             imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits);
1597             ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR,
1598                                     KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart),
1599                                     &imsic_addr, true, NULL);
1600             if (ret < 0) {
1601                 error_report("KVM AIA: failed to set the IMSIC address for hart %d", i);
1602                 exit(1);
1603             }
1604         }
1605     }
1606 
1607     hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1;
1608     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG,
1609                             KVM_DEV_RISCV_AIA_CONFIG_HART_BITS,
1610                             &hart_bits, true, NULL);
1611     if (ret < 0) {
1612         error_report("KVM AIA: failed to set hart_bits");
1613         exit(1);
1614     }
1615 
1616     if (kvm_has_gsi_routing()) {
1617         for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) {
1618             /* KVM AIA only has one APLIC instance */
1619             kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx);
1620         }
1621         kvm_gsi_routing_allowed = true;
1622         kvm_irqchip_commit_routes(kvm_state);
1623     }
1624 
1625     ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL,
1626                             KVM_DEV_RISCV_AIA_CTRL_INIT,
1627                             NULL, true, NULL);
1628     if (ret < 0) {
1629         error_report("KVM AIA: initialized fail");
1630         exit(1);
1631     }
1632 
1633     kvm_msi_via_irqfd_allowed = true;
1634 }
1635 
1636 static void kvm_cpu_instance_init(CPUState *cs)
1637 {
1638     Object *obj = OBJECT(RISCV_CPU(cs));
1639 
1640     riscv_init_kvm_registers(obj);
1641 
1642     kvm_riscv_add_cpu_user_properties(obj);
1643 }
1644 
1645 /*
1646  * We'll get here via the following path:
1647  *
1648  * riscv_cpu_realize()
1649  *   -> cpu_exec_realizefn()
1650  *      -> kvm_cpu_realize() (via accel_cpu_common_realize())
1651  */
1652 static bool kvm_cpu_realize(CPUState *cs, Error **errp)
1653 {
1654     RISCVCPU *cpu = RISCV_CPU(cs);
1655     int ret;
1656 
1657     if (riscv_has_ext(&cpu->env, RVV)) {
1658         ret = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
1659         if (ret) {
1660             error_setg(errp, "Error in prctl PR_RISCV_V_SET_CONTROL, code: %s",
1661                        strerrorname_np(errno));
1662             return false;
1663         }
1664     }
1665 
1666    return true;
1667 }
1668 
1669 void riscv_kvm_cpu_finalize_features(RISCVCPU *cpu, Error **errp)
1670 {
1671     CPURISCVState *env = &cpu->env;
1672     KVMScratchCPU kvmcpu;
1673     struct kvm_one_reg reg;
1674     uint64_t val;
1675     int ret;
1676 
1677     /* short-circuit without spinning the scratch CPU */
1678     if (!cpu->cfg.ext_zicbom && !cpu->cfg.ext_zicboz &&
1679         !riscv_has_ext(env, RVV)) {
1680         return;
1681     }
1682 
1683     if (!kvm_riscv_create_scratch_vcpu(&kvmcpu)) {
1684         error_setg(errp, "Unable to create scratch KVM cpu");
1685         return;
1686     }
1687 
1688     if (cpu->cfg.ext_zicbom &&
1689         riscv_cpu_option_set(kvm_cbom_blocksize.name)) {
1690 
1691         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
1692                                         kvm_cbom_blocksize.kvm_reg_id);
1693         reg.addr = (uint64_t)&val;
1694         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1695         if (ret != 0) {
1696             error_setg(errp, "Unable to read cbom_blocksize, error %d", errno);
1697             return;
1698         }
1699 
1700         if (cpu->cfg.cbom_blocksize != val) {
1701             error_setg(errp, "Unable to set cbom_blocksize to a different "
1702                        "value than the host (%lu)", val);
1703             return;
1704         }
1705     }
1706 
1707     if (cpu->cfg.ext_zicboz &&
1708         riscv_cpu_option_set(kvm_cboz_blocksize.name)) {
1709 
1710         reg.id = kvm_riscv_reg_id_ulong(env, KVM_REG_RISCV_CONFIG,
1711                                         kvm_cboz_blocksize.kvm_reg_id);
1712         reg.addr = (uint64_t)&val;
1713         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1714         if (ret != 0) {
1715             error_setg(errp, "Unable to read cboz_blocksize, error %d", errno);
1716             return;
1717         }
1718 
1719         if (cpu->cfg.cboz_blocksize != val) {
1720             error_setg(errp, "Unable to set cboz_blocksize to a different "
1721                        "value than the host (%lu)", val);
1722             return;
1723         }
1724     }
1725 
1726     /* Users are setting vlen, not vlenb */
1727     if (riscv_has_ext(env, RVV) && riscv_cpu_option_set("vlen")) {
1728         if (!kvm_v_vlenb.supported) {
1729             error_setg(errp, "Unable to set 'vlenb': register not supported");
1730             return;
1731         }
1732 
1733         reg.id = kvm_v_vlenb.kvm_reg_id;
1734         reg.addr = (uint64_t)&val;
1735         ret = ioctl(kvmcpu.cpufd, KVM_GET_ONE_REG, &reg);
1736         if (ret != 0) {
1737             error_setg(errp, "Unable to read vlenb register, error %d", errno);
1738             return;
1739         }
1740 
1741         if (cpu->cfg.vlenb != val) {
1742             error_setg(errp, "Unable to set 'vlen' to a different "
1743                        "value than the host (%lu)", val * 8);
1744             return;
1745         }
1746     }
1747 
1748     kvm_riscv_destroy_scratch_vcpu(&kvmcpu);
1749 }
1750 
1751 static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data)
1752 {
1753     AccelCPUClass *acc = ACCEL_CPU_CLASS(oc);
1754 
1755     acc->cpu_instance_init = kvm_cpu_instance_init;
1756     acc->cpu_target_realize = kvm_cpu_realize;
1757 }
1758 
1759 static const TypeInfo kvm_cpu_accel_type_info = {
1760     .name = ACCEL_CPU_NAME("kvm"),
1761 
1762     .parent = TYPE_ACCEL_CPU,
1763     .class_init = kvm_cpu_accel_class_init,
1764     .abstract = true,
1765 };
1766 static void kvm_cpu_accel_register_types(void)
1767 {
1768     type_register_static(&kvm_cpu_accel_type_info);
1769 }
1770 type_init(kvm_cpu_accel_register_types);
1771 
1772 static void riscv_host_cpu_class_init(ObjectClass *c, void *data)
1773 {
1774     RISCVCPUClass *mcc = RISCV_CPU_CLASS(c);
1775 
1776 #if defined(TARGET_RISCV32)
1777     mcc->misa_mxl_max = MXL_RV32;
1778 #elif defined(TARGET_RISCV64)
1779     mcc->misa_mxl_max = MXL_RV64;
1780 #endif
1781 }
1782 
1783 static const TypeInfo riscv_kvm_cpu_type_infos[] = {
1784     {
1785         .name = TYPE_RISCV_CPU_HOST,
1786         .parent = TYPE_RISCV_CPU,
1787         .class_init = riscv_host_cpu_class_init,
1788     }
1789 };
1790 
1791 DEFINE_TYPES(riscv_kvm_cpu_type_infos)
1792