xref: /qemu/target/ppc/kvm.c (revision 00382fa8)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 static int cap_ppc_nested_kvm_hv;
95 
96 static uint32_t debug_inst_opcode;
97 
98 /* XXX We have a race condition where we actually have a level triggered
99  *     interrupt, but the infrastructure can't expose that yet, so the guest
100  *     takes but ignores it, goes to sleep and never gets notified that there's
101  *     still an interrupt pending.
102  *
103  *     As a quick workaround, let's just wake up again 20 ms after we injected
104  *     an interrupt. That way we can assure that we're always reinjecting
105  *     interrupts in case the guest swallowed them.
106  */
107 static QEMUTimer *idle_timer;
108 
109 static void kvm_kick_cpu(void *opaque)
110 {
111     PowerPCCPU *cpu = opaque;
112 
113     qemu_cpu_kick(CPU(cpu));
114 }
115 
116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
117  * should only be used for fallback tests - generally we should use
118  * explicit capabilities for the features we want, rather than
119  * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
121 {
122     /* Assume KVM-PR if the GET_PVINFO capability is available */
123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 }
125 
126 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
127 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 
129 int kvm_arch_init(MachineState *ms, KVMState *s)
130 {
131     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
132     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
133     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
134     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
135     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144     /* Note: we don't set cap_papr here, because this capability is
145      * only activated after this by kvmppc_set_papr() */
146     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
153     kvmppc_get_cpu_characteristics(s);
154     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
155     /*
156      * Note: setting it to false because there is not such capability
157      * in KVM at this moment.
158      *
159      * TODO: call kvm_vm_check_extension() with the right capability
160      * after the kernel starts implementing it.*/
161     cap_ppc_pvr_compat = false;
162 
163     if (!cap_interrupt_level) {
164         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
165                         "VM to stall at times!\n");
166     }
167 
168     kvm_ppc_register_host_cpu_type(ms);
169 
170     return 0;
171 }
172 
173 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 {
175     return 0;
176 }
177 
178 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 {
180     CPUPPCState *cenv = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_sregs sregs;
183     int ret;
184 
185     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
186         /* What we're really trying to say is "if we're on BookE, we use
187            the native PVR for now". This is the only sane way to check
188            it though, so we potentially confuse users that they can run
189            BookE guests on BookS. Let's hope nobody dares enough :) */
190         return 0;
191     } else {
192         if (!cap_segstate) {
193             fprintf(stderr, "kvm error: missing PVR setting capability\n");
194             return -ENOSYS;
195         }
196     }
197 
198     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
199     if (ret) {
200         return ret;
201     }
202 
203     sregs.pvr = cenv->spr[SPR_PVR];
204     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 }
206 
207 /* Set up a shared TLB array with KVM */
208 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 {
210     CPUPPCState *env = &cpu->env;
211     CPUState *cs = CPU(cpu);
212     struct kvm_book3e_206_tlb_params params = {};
213     struct kvm_config_tlb cfg = {};
214     unsigned int entries = 0;
215     int ret, i;
216 
217     if (!kvm_enabled() ||
218         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
219         return 0;
220     }
221 
222     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 
224     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
225         params.tlb_sizes[i] = booke206_tlb_size(env, i);
226         params.tlb_ways[i] = booke206_tlb_ways(env, i);
227         entries += params.tlb_sizes[i];
228     }
229 
230     assert(entries == env->nb_tlb);
231     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 
233     env->tlb_dirty = true;
234 
235     cfg.array = (uintptr_t)env->tlb.tlbm;
236     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
237     cfg.params = (uintptr_t)&params;
238     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 
240     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
241     if (ret < 0) {
242         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
243                 __func__, strerror(-ret));
244         return ret;
245     }
246 
247     env->kvm_sw_tlb = true;
248     return 0;
249 }
250 
251 
252 #if defined(TARGET_PPC64)
253 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
254 {
255     int ret;
256 
257     assert(kvm_state != NULL);
258 
259     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
260         error_setg(errp, "KVM doesn't expose the MMU features it supports");
261         error_append_hint(errp, "Consider switching to a newer KVM\n");
262         return;
263     }
264 
265     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
266     if (ret == 0) {
267         return;
268     }
269 
270     error_setg_errno(errp, -ret,
271                      "KVM failed to provide the MMU features it supports");
272 }
273 
274 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
275 {
276     KVMState *s = KVM_STATE(current_machine->accelerator);
277     struct ppc_radix_page_info *radix_page_info;
278     struct kvm_ppc_rmmu_info rmmu_info;
279     int i;
280 
281     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
282         return NULL;
283     }
284     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
285         return NULL;
286     }
287     radix_page_info = g_malloc0(sizeof(*radix_page_info));
288     radix_page_info->count = 0;
289     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
290         if (rmmu_info.ap_encodings[i]) {
291             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
292             radix_page_info->count++;
293         }
294     }
295     return radix_page_info;
296 }
297 
298 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
299                                      bool radix, bool gtse,
300                                      uint64_t proc_tbl)
301 {
302     CPUState *cs = CPU(cpu);
303     int ret;
304     uint64_t flags = 0;
305     struct kvm_ppc_mmuv3_cfg cfg = {
306         .process_table = proc_tbl,
307     };
308 
309     if (radix) {
310         flags |= KVM_PPC_MMUV3_RADIX;
311     }
312     if (gtse) {
313         flags |= KVM_PPC_MMUV3_GTSE;
314     }
315     cfg.flags = flags;
316     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
317     switch (ret) {
318     case 0:
319         return H_SUCCESS;
320     case -EINVAL:
321         return H_PARAMETER;
322     case -ENODEV:
323         return H_NOT_AVAILABLE;
324     default:
325         return H_HARDWARE;
326     }
327 }
328 
329 bool kvmppc_hpt_needs_host_contiguous_pages(void)
330 {
331     static struct kvm_ppc_smmu_info smmu_info;
332 
333     if (!kvm_enabled()) {
334         return false;
335     }
336 
337     kvm_get_smmu_info(&smmu_info, &error_fatal);
338     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
339 }
340 
341 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
342 {
343     struct kvm_ppc_smmu_info smmu_info;
344     int iq, ik, jq, jk;
345     Error *local_err = NULL;
346 
347     /* For now, we only have anything to check on hash64 MMUs */
348     if (!cpu->hash64_opts || !kvm_enabled()) {
349         return;
350     }
351 
352     kvm_get_smmu_info(&smmu_info, &local_err);
353     if (local_err) {
354         error_propagate(errp, local_err);
355         return;
356     }
357 
358     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
359         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
360         error_setg(errp,
361                    "KVM does not support 1TiB segments which guest expects");
362         return;
363     }
364 
365     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
366         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
367                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
368         return;
369     }
370 
371     /*
372      * Verify that every pagesize supported by the cpu model is
373      * supported by KVM with the same encodings
374      */
375     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
376         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
377         struct kvm_ppc_one_seg_page_size *ksps;
378 
379         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
380             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
381                 break;
382             }
383         }
384         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
385             error_setg(errp, "KVM doesn't support for base page shift %u",
386                        qsps->page_shift);
387             return;
388         }
389 
390         ksps = &smmu_info.sps[ik];
391         if (ksps->slb_enc != qsps->slb_enc) {
392             error_setg(errp,
393 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
394                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
395             return;
396         }
397 
398         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
399             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
400                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
401                     break;
402                 }
403             }
404 
405             if (jk >= ARRAY_SIZE(ksps->enc)) {
406                 error_setg(errp, "KVM doesn't support page shift %u/%u",
407                            qsps->enc[jq].page_shift, qsps->page_shift);
408                 return;
409             }
410             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
411                 error_setg(errp,
412 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
413                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
414                            qsps->page_shift, qsps->enc[jq].pte_enc);
415                 return;
416             }
417         }
418     }
419 
420     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
421         /* Mostly what guest pagesizes we can use are related to the
422          * host pages used to map guest RAM, which is handled in the
423          * platform code. Cache-Inhibited largepages (64k) however are
424          * used for I/O, so if they're mapped to the host at all it
425          * will be a normal mapping, not a special hugepage one used
426          * for RAM. */
427         if (getpagesize() < 0x10000) {
428             error_setg(errp,
429                        "KVM can't supply 64kiB CI pages, which guest expects");
430         }
431     }
432 }
433 #endif /* !defined (TARGET_PPC64) */
434 
435 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
436 {
437     return POWERPC_CPU(cpu)->vcpu_id;
438 }
439 
440 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
441  * book3s supports only 1 watchpoint, so array size
442  * of 4 is sufficient for now.
443  */
444 #define MAX_HW_BKPTS 4
445 
446 static struct HWBreakpoint {
447     target_ulong addr;
448     int type;
449 } hw_debug_points[MAX_HW_BKPTS];
450 
451 static CPUWatchpoint hw_watchpoint;
452 
453 /* Default there is no breakpoint and watchpoint supported */
454 static int max_hw_breakpoint;
455 static int max_hw_watchpoint;
456 static int nb_hw_breakpoint;
457 static int nb_hw_watchpoint;
458 
459 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
460 {
461     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
462         max_hw_breakpoint = 2;
463         max_hw_watchpoint = 2;
464     }
465 
466     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
467         fprintf(stderr, "Error initializing h/w breakpoints\n");
468         return;
469     }
470 }
471 
472 int kvm_arch_init_vcpu(CPUState *cs)
473 {
474     PowerPCCPU *cpu = POWERPC_CPU(cs);
475     CPUPPCState *cenv = &cpu->env;
476     int ret;
477 
478     /* Synchronize sregs with kvm */
479     ret = kvm_arch_sync_sregs(cpu);
480     if (ret) {
481         if (ret == -EINVAL) {
482             error_report("Register sync failed... If you're using kvm-hv.ko,"
483                          " only \"-cpu host\" is possible");
484         }
485         return ret;
486     }
487 
488     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
489 
490     switch (cenv->mmu_model) {
491     case POWERPC_MMU_BOOKE206:
492         /* This target supports access to KVM's guest TLB */
493         ret = kvm_booke206_tlb_init(cpu);
494         break;
495     case POWERPC_MMU_2_07:
496         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
497             /* KVM-HV has transactional memory on POWER8 also without the
498              * KVM_CAP_PPC_HTM extension, so enable it here instead as
499              * long as it's availble to userspace on the host. */
500             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
501                 cap_htm = true;
502             }
503         }
504         break;
505     default:
506         break;
507     }
508 
509     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
510     kvmppc_hw_debug_points_init(cenv);
511 
512     return ret;
513 }
514 
515 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
516 {
517     CPUPPCState *env = &cpu->env;
518     CPUState *cs = CPU(cpu);
519     struct kvm_dirty_tlb dirty_tlb;
520     unsigned char *bitmap;
521     int ret;
522 
523     if (!env->kvm_sw_tlb) {
524         return;
525     }
526 
527     bitmap = g_malloc((env->nb_tlb + 7) / 8);
528     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
529 
530     dirty_tlb.bitmap = (uintptr_t)bitmap;
531     dirty_tlb.num_dirty = env->nb_tlb;
532 
533     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
534     if (ret) {
535         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
536                 __func__, strerror(-ret));
537     }
538 
539     g_free(bitmap);
540 }
541 
542 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
543 {
544     PowerPCCPU *cpu = POWERPC_CPU(cs);
545     CPUPPCState *env = &cpu->env;
546     union {
547         uint32_t u32;
548         uint64_t u64;
549     } val;
550     struct kvm_one_reg reg = {
551         .id = id,
552         .addr = (uintptr_t) &val,
553     };
554     int ret;
555 
556     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
557     if (ret != 0) {
558         trace_kvm_failed_spr_get(spr, strerror(errno));
559     } else {
560         switch (id & KVM_REG_SIZE_MASK) {
561         case KVM_REG_SIZE_U32:
562             env->spr[spr] = val.u32;
563             break;
564 
565         case KVM_REG_SIZE_U64:
566             env->spr[spr] = val.u64;
567             break;
568 
569         default:
570             /* Don't handle this size yet */
571             abort();
572         }
573     }
574 }
575 
576 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
577 {
578     PowerPCCPU *cpu = POWERPC_CPU(cs);
579     CPUPPCState *env = &cpu->env;
580     union {
581         uint32_t u32;
582         uint64_t u64;
583     } val;
584     struct kvm_one_reg reg = {
585         .id = id,
586         .addr = (uintptr_t) &val,
587     };
588     int ret;
589 
590     switch (id & KVM_REG_SIZE_MASK) {
591     case KVM_REG_SIZE_U32:
592         val.u32 = env->spr[spr];
593         break;
594 
595     case KVM_REG_SIZE_U64:
596         val.u64 = env->spr[spr];
597         break;
598 
599     default:
600         /* Don't handle this size yet */
601         abort();
602     }
603 
604     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
605     if (ret != 0) {
606         trace_kvm_failed_spr_set(spr, strerror(errno));
607     }
608 }
609 
610 static int kvm_put_fp(CPUState *cs)
611 {
612     PowerPCCPU *cpu = POWERPC_CPU(cs);
613     CPUPPCState *env = &cpu->env;
614     struct kvm_one_reg reg;
615     int i;
616     int ret;
617 
618     if (env->insns_flags & PPC_FLOAT) {
619         uint64_t fpscr = env->fpscr;
620         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
621 
622         reg.id = KVM_REG_PPC_FPSCR;
623         reg.addr = (uintptr_t)&fpscr;
624         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625         if (ret < 0) {
626             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
627             return ret;
628         }
629 
630         for (i = 0; i < 32; i++) {
631             uint64_t vsr[2];
632 
633 #ifdef HOST_WORDS_BIGENDIAN
634             vsr[0] = float64_val(env->fpr[i]);
635             vsr[1] = env->vsr[i];
636 #else
637             vsr[0] = env->vsr[i];
638             vsr[1] = float64_val(env->fpr[i]);
639 #endif
640             reg.addr = (uintptr_t) &vsr;
641             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
642 
643             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
644             if (ret < 0) {
645                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
646                         i, strerror(errno));
647                 return ret;
648             }
649         }
650     }
651 
652     if (env->insns_flags & PPC_ALTIVEC) {
653         reg.id = KVM_REG_PPC_VSCR;
654         reg.addr = (uintptr_t)&env->vscr;
655         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
656         if (ret < 0) {
657             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
658             return ret;
659         }
660 
661         for (i = 0; i < 32; i++) {
662             reg.id = KVM_REG_PPC_VR(i);
663             reg.addr = (uintptr_t)&env->avr[i];
664             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
665             if (ret < 0) {
666                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
667                 return ret;
668             }
669         }
670     }
671 
672     return 0;
673 }
674 
675 static int kvm_get_fp(CPUState *cs)
676 {
677     PowerPCCPU *cpu = POWERPC_CPU(cs);
678     CPUPPCState *env = &cpu->env;
679     struct kvm_one_reg reg;
680     int i;
681     int ret;
682 
683     if (env->insns_flags & PPC_FLOAT) {
684         uint64_t fpscr;
685         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
686 
687         reg.id = KVM_REG_PPC_FPSCR;
688         reg.addr = (uintptr_t)&fpscr;
689         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690         if (ret < 0) {
691             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
692             return ret;
693         } else {
694             env->fpscr = fpscr;
695         }
696 
697         for (i = 0; i < 32; i++) {
698             uint64_t vsr[2];
699 
700             reg.addr = (uintptr_t) &vsr;
701             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
702 
703             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
704             if (ret < 0) {
705                 DPRINTF("Unable to get %s%d from KVM: %s\n",
706                         vsx ? "VSR" : "FPR", i, strerror(errno));
707                 return ret;
708             } else {
709 #ifdef HOST_WORDS_BIGENDIAN
710                 env->fpr[i] = vsr[0];
711                 if (vsx) {
712                     env->vsr[i] = vsr[1];
713                 }
714 #else
715                 env->fpr[i] = vsr[1];
716                 if (vsx) {
717                     env->vsr[i] = vsr[0];
718                 }
719 #endif
720             }
721         }
722     }
723 
724     if (env->insns_flags & PPC_ALTIVEC) {
725         reg.id = KVM_REG_PPC_VSCR;
726         reg.addr = (uintptr_t)&env->vscr;
727         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
728         if (ret < 0) {
729             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
730             return ret;
731         }
732 
733         for (i = 0; i < 32; i++) {
734             reg.id = KVM_REG_PPC_VR(i);
735             reg.addr = (uintptr_t)&env->avr[i];
736             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
737             if (ret < 0) {
738                 DPRINTF("Unable to get VR%d from KVM: %s\n",
739                         i, strerror(errno));
740                 return ret;
741             }
742         }
743     }
744 
745     return 0;
746 }
747 
748 #if defined(TARGET_PPC64)
749 static int kvm_get_vpa(CPUState *cs)
750 {
751     PowerPCCPU *cpu = POWERPC_CPU(cs);
752     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
753     struct kvm_one_reg reg;
754     int ret;
755 
756     reg.id = KVM_REG_PPC_VPA_ADDR;
757     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
758     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
759     if (ret < 0) {
760         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
761         return ret;
762     }
763 
764     assert((uintptr_t)&spapr_cpu->slb_shadow_size
765            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
766     reg.id = KVM_REG_PPC_VPA_SLB;
767     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
768     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
769     if (ret < 0) {
770         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
771                 strerror(errno));
772         return ret;
773     }
774 
775     assert((uintptr_t)&spapr_cpu->dtl_size
776            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
777     reg.id = KVM_REG_PPC_VPA_DTL;
778     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
779     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
780     if (ret < 0) {
781         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
782                 strerror(errno));
783         return ret;
784     }
785 
786     return 0;
787 }
788 
789 static int kvm_put_vpa(CPUState *cs)
790 {
791     PowerPCCPU *cpu = POWERPC_CPU(cs);
792     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
793     struct kvm_one_reg reg;
794     int ret;
795 
796     /* SLB shadow or DTL can't be registered unless a master VPA is
797      * registered.  That means when restoring state, if a VPA *is*
798      * registered, we need to set that up first.  If not, we need to
799      * deregister the others before deregistering the master VPA */
800     assert(spapr_cpu->vpa_addr
801            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
802 
803     if (spapr_cpu->vpa_addr) {
804         reg.id = KVM_REG_PPC_VPA_ADDR;
805         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
806         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
807         if (ret < 0) {
808             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
809             return ret;
810         }
811     }
812 
813     assert((uintptr_t)&spapr_cpu->slb_shadow_size
814            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
815     reg.id = KVM_REG_PPC_VPA_SLB;
816     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
817     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
818     if (ret < 0) {
819         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
820         return ret;
821     }
822 
823     assert((uintptr_t)&spapr_cpu->dtl_size
824            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
825     reg.id = KVM_REG_PPC_VPA_DTL;
826     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
827     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
828     if (ret < 0) {
829         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
830                 strerror(errno));
831         return ret;
832     }
833 
834     if (!spapr_cpu->vpa_addr) {
835         reg.id = KVM_REG_PPC_VPA_ADDR;
836         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
837         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
838         if (ret < 0) {
839             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
840             return ret;
841         }
842     }
843 
844     return 0;
845 }
846 #endif /* TARGET_PPC64 */
847 
848 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
849 {
850     CPUPPCState *env = &cpu->env;
851     struct kvm_sregs sregs;
852     int i;
853 
854     sregs.pvr = env->spr[SPR_PVR];
855 
856     if (cpu->vhyp) {
857         PPCVirtualHypervisorClass *vhc =
858             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
859         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
860     } else {
861         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
862     }
863 
864     /* Sync SLB */
865 #ifdef TARGET_PPC64
866     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
867         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
868         if (env->slb[i].esid & SLB_ESID_V) {
869             sregs.u.s.ppc64.slb[i].slbe |= i;
870         }
871         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
872     }
873 #endif
874 
875     /* Sync SRs */
876     for (i = 0; i < 16; i++) {
877         sregs.u.s.ppc32.sr[i] = env->sr[i];
878     }
879 
880     /* Sync BATs */
881     for (i = 0; i < 8; i++) {
882         /* Beware. We have to swap upper and lower bits here */
883         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
884             | env->DBAT[1][i];
885         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
886             | env->IBAT[1][i];
887     }
888 
889     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
890 }
891 
892 int kvm_arch_put_registers(CPUState *cs, int level)
893 {
894     PowerPCCPU *cpu = POWERPC_CPU(cs);
895     CPUPPCState *env = &cpu->env;
896     struct kvm_regs regs;
897     int ret;
898     int i;
899 
900     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
901     if (ret < 0) {
902         return ret;
903     }
904 
905     regs.ctr = env->ctr;
906     regs.lr  = env->lr;
907     regs.xer = cpu_read_xer(env);
908     regs.msr = env->msr;
909     regs.pc = env->nip;
910 
911     regs.srr0 = env->spr[SPR_SRR0];
912     regs.srr1 = env->spr[SPR_SRR1];
913 
914     regs.sprg0 = env->spr[SPR_SPRG0];
915     regs.sprg1 = env->spr[SPR_SPRG1];
916     regs.sprg2 = env->spr[SPR_SPRG2];
917     regs.sprg3 = env->spr[SPR_SPRG3];
918     regs.sprg4 = env->spr[SPR_SPRG4];
919     regs.sprg5 = env->spr[SPR_SPRG5];
920     regs.sprg6 = env->spr[SPR_SPRG6];
921     regs.sprg7 = env->spr[SPR_SPRG7];
922 
923     regs.pid = env->spr[SPR_BOOKE_PID];
924 
925     for (i = 0;i < 32; i++)
926         regs.gpr[i] = env->gpr[i];
927 
928     regs.cr = 0;
929     for (i = 0; i < 8; i++) {
930         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
931     }
932 
933     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
934     if (ret < 0)
935         return ret;
936 
937     kvm_put_fp(cs);
938 
939     if (env->tlb_dirty) {
940         kvm_sw_tlb_put(cpu);
941         env->tlb_dirty = false;
942     }
943 
944     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
945         ret = kvmppc_put_books_sregs(cpu);
946         if (ret < 0) {
947             return ret;
948         }
949     }
950 
951     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
952         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
953     }
954 
955     if (cap_one_reg) {
956         int i;
957 
958         /* We deliberately ignore errors here, for kernels which have
959          * the ONE_REG calls, but don't support the specific
960          * registers, there's a reasonable chance things will still
961          * work, at least until we try to migrate. */
962         for (i = 0; i < 1024; i++) {
963             uint64_t id = env->spr_cb[i].one_reg_id;
964 
965             if (id != 0) {
966                 kvm_put_one_spr(cs, id, i);
967             }
968         }
969 
970 #ifdef TARGET_PPC64
971         if (msr_ts) {
972             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
973                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
974             }
975             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
976                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
977             }
978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
988         }
989 
990         if (cap_papr) {
991             if (kvm_put_vpa(cs) < 0) {
992                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
993             }
994         }
995 
996         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
997 #endif /* TARGET_PPC64 */
998     }
999 
1000     return ret;
1001 }
1002 
1003 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1004 {
1005      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1006 }
1007 
1008 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1009 {
1010     CPUPPCState *env = &cpu->env;
1011     struct kvm_sregs sregs;
1012     int ret;
1013 
1014     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1015     if (ret < 0) {
1016         return ret;
1017     }
1018 
1019     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1020         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1021         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1022         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1023         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1024         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1025         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1026         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1027         env->spr[SPR_DECR] = sregs.u.e.dec;
1028         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1029         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1030         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1031     }
1032 
1033     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1034         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1035         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1036         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1037         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1038         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1039     }
1040 
1041     if (sregs.u.e.features & KVM_SREGS_E_64) {
1042         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1043     }
1044 
1045     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1046         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1047     }
1048 
1049     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1050         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1051         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1052         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1053         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1054         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1055         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1056         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1057         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1058         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1059         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1060         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1061         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1062         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1063         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1064         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1065         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1066         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1067         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1068         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1069         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1070         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1071         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1072         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1073         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1074         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1075         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1076         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1077         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1078         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1079         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1080         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1081         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1082 
1083         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1084             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1085             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1086             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1087             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1088             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1089             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1090         }
1091 
1092         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1093             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1094             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1095         }
1096 
1097         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1098             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1099             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1100             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1101             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1102         }
1103     }
1104 
1105     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1106         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1107         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1108         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1109         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1110         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1111         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1112         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1113         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1114         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1115         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1116     }
1117 
1118     if (sregs.u.e.features & KVM_SREGS_EXP) {
1119         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1120     }
1121 
1122     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1123         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1124         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1125     }
1126 
1127     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1128         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1129         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1130         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1131 
1132         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1133             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1134             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1135         }
1136     }
1137 
1138     return 0;
1139 }
1140 
1141 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1142 {
1143     CPUPPCState *env = &cpu->env;
1144     struct kvm_sregs sregs;
1145     int ret;
1146     int i;
1147 
1148     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1149     if (ret < 0) {
1150         return ret;
1151     }
1152 
1153     if (!cpu->vhyp) {
1154         ppc_store_sdr1(env, sregs.u.s.sdr1);
1155     }
1156 
1157     /* Sync SLB */
1158 #ifdef TARGET_PPC64
1159     /*
1160      * The packed SLB array we get from KVM_GET_SREGS only contains
1161      * information about valid entries. So we flush our internal copy
1162      * to get rid of stale ones, then put all valid SLB entries back
1163      * in.
1164      */
1165     memset(env->slb, 0, sizeof(env->slb));
1166     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1167         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1168         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1169         /*
1170          * Only restore valid entries
1171          */
1172         if (rb & SLB_ESID_V) {
1173             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1174         }
1175     }
1176 #endif
1177 
1178     /* Sync SRs */
1179     for (i = 0; i < 16; i++) {
1180         env->sr[i] = sregs.u.s.ppc32.sr[i];
1181     }
1182 
1183     /* Sync BATs */
1184     for (i = 0; i < 8; i++) {
1185         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1186         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1187         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1188         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1189     }
1190 
1191     return 0;
1192 }
1193 
1194 int kvm_arch_get_registers(CPUState *cs)
1195 {
1196     PowerPCCPU *cpu = POWERPC_CPU(cs);
1197     CPUPPCState *env = &cpu->env;
1198     struct kvm_regs regs;
1199     uint32_t cr;
1200     int i, ret;
1201 
1202     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1203     if (ret < 0)
1204         return ret;
1205 
1206     cr = regs.cr;
1207     for (i = 7; i >= 0; i--) {
1208         env->crf[i] = cr & 15;
1209         cr >>= 4;
1210     }
1211 
1212     env->ctr = regs.ctr;
1213     env->lr = regs.lr;
1214     cpu_write_xer(env, regs.xer);
1215     env->msr = regs.msr;
1216     env->nip = regs.pc;
1217 
1218     env->spr[SPR_SRR0] = regs.srr0;
1219     env->spr[SPR_SRR1] = regs.srr1;
1220 
1221     env->spr[SPR_SPRG0] = regs.sprg0;
1222     env->spr[SPR_SPRG1] = regs.sprg1;
1223     env->spr[SPR_SPRG2] = regs.sprg2;
1224     env->spr[SPR_SPRG3] = regs.sprg3;
1225     env->spr[SPR_SPRG4] = regs.sprg4;
1226     env->spr[SPR_SPRG5] = regs.sprg5;
1227     env->spr[SPR_SPRG6] = regs.sprg6;
1228     env->spr[SPR_SPRG7] = regs.sprg7;
1229 
1230     env->spr[SPR_BOOKE_PID] = regs.pid;
1231 
1232     for (i = 0;i < 32; i++)
1233         env->gpr[i] = regs.gpr[i];
1234 
1235     kvm_get_fp(cs);
1236 
1237     if (cap_booke_sregs) {
1238         ret = kvmppc_get_booke_sregs(cpu);
1239         if (ret < 0) {
1240             return ret;
1241         }
1242     }
1243 
1244     if (cap_segstate) {
1245         ret = kvmppc_get_books_sregs(cpu);
1246         if (ret < 0) {
1247             return ret;
1248         }
1249     }
1250 
1251     if (cap_hior) {
1252         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1253     }
1254 
1255     if (cap_one_reg) {
1256         int i;
1257 
1258         /* We deliberately ignore errors here, for kernels which have
1259          * the ONE_REG calls, but don't support the specific
1260          * registers, there's a reasonable chance things will still
1261          * work, at least until we try to migrate. */
1262         for (i = 0; i < 1024; i++) {
1263             uint64_t id = env->spr_cb[i].one_reg_id;
1264 
1265             if (id != 0) {
1266                 kvm_get_one_spr(cs, id, i);
1267             }
1268         }
1269 
1270 #ifdef TARGET_PPC64
1271         if (msr_ts) {
1272             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1273                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1274             }
1275             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1276                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1277             }
1278             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1279             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1288         }
1289 
1290         if (cap_papr) {
1291             if (kvm_get_vpa(cs) < 0) {
1292                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1293             }
1294         }
1295 
1296         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1297 #endif
1298     }
1299 
1300     return 0;
1301 }
1302 
1303 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1304 {
1305     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1306 
1307     if (irq != PPC_INTERRUPT_EXT) {
1308         return 0;
1309     }
1310 
1311     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1312         return 0;
1313     }
1314 
1315     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1316 
1317     return 0;
1318 }
1319 
1320 #if defined(TARGET_PPC64)
1321 #define PPC_INPUT_INT PPC970_INPUT_INT
1322 #else
1323 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 #endif
1325 
1326 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327 {
1328     PowerPCCPU *cpu = POWERPC_CPU(cs);
1329     CPUPPCState *env = &cpu->env;
1330     int r;
1331     unsigned irq;
1332 
1333     qemu_mutex_lock_iothread();
1334 
1335     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337     if (!cap_interrupt_level &&
1338         run->ready_for_interrupt_injection &&
1339         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341     {
1342         /* For now KVM disregards the 'irq' argument. However, in the
1343          * future KVM could cache it in-kernel to avoid a heavyweight exit
1344          * when reading the UIC.
1345          */
1346         irq = KVM_INTERRUPT_SET;
1347 
1348         DPRINTF("injected interrupt %d\n", irq);
1349         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350         if (r < 0) {
1351             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352         }
1353 
1354         /* Always wake up soon in case the interrupt was level based */
1355         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356                        (NANOSECONDS_PER_SECOND / 50));
1357     }
1358 
1359     /* We don't know if there are more interrupts pending after this. However,
1360      * the guest will return to userspace in the course of handling this one
1361      * anyways, so we will get a chance to deliver the rest. */
1362 
1363     qemu_mutex_unlock_iothread();
1364 }
1365 
1366 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367 {
1368     return MEMTXATTRS_UNSPECIFIED;
1369 }
1370 
1371 int kvm_arch_process_async_events(CPUState *cs)
1372 {
1373     return cs->halted;
1374 }
1375 
1376 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377 {
1378     CPUState *cs = CPU(cpu);
1379     CPUPPCState *env = &cpu->env;
1380 
1381     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382         cs->halted = 1;
1383         cs->exception_index = EXCP_HLT;
1384     }
1385 
1386     return 0;
1387 }
1388 
1389 /* map dcr access to existing qemu dcr emulation */
1390 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391 {
1392     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394 
1395     return 0;
1396 }
1397 
1398 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399 {
1400     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402 
1403     return 0;
1404 }
1405 
1406 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407 {
1408     /* Mixed endian case is not handled */
1409     uint32_t sc = debug_inst_opcode;
1410 
1411     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412                             sizeof(sc), 0) ||
1413         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414         return -EINVAL;
1415     }
1416 
1417     return 0;
1418 }
1419 
1420 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421 {
1422     uint32_t sc;
1423 
1424     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425         sc != debug_inst_opcode ||
1426         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427                             sizeof(sc), 1)) {
1428         return -EINVAL;
1429     }
1430 
1431     return 0;
1432 }
1433 
1434 static int find_hw_breakpoint(target_ulong addr, int type)
1435 {
1436     int n;
1437 
1438     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439            <= ARRAY_SIZE(hw_debug_points));
1440 
1441     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442         if (hw_debug_points[n].addr == addr &&
1443              hw_debug_points[n].type == type) {
1444             return n;
1445         }
1446     }
1447 
1448     return -1;
1449 }
1450 
1451 static int find_hw_watchpoint(target_ulong addr, int *flag)
1452 {
1453     int n;
1454 
1455     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456     if (n >= 0) {
1457         *flag = BP_MEM_ACCESS;
1458         return n;
1459     }
1460 
1461     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462     if (n >= 0) {
1463         *flag = BP_MEM_WRITE;
1464         return n;
1465     }
1466 
1467     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468     if (n >= 0) {
1469         *flag = BP_MEM_READ;
1470         return n;
1471     }
1472 
1473     return -1;
1474 }
1475 
1476 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477                                   target_ulong len, int type)
1478 {
1479     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480         return -ENOBUFS;
1481     }
1482 
1483     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485 
1486     switch (type) {
1487     case GDB_BREAKPOINT_HW:
1488         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489             return -ENOBUFS;
1490         }
1491 
1492         if (find_hw_breakpoint(addr, type) >= 0) {
1493             return -EEXIST;
1494         }
1495 
1496         nb_hw_breakpoint++;
1497         break;
1498 
1499     case GDB_WATCHPOINT_WRITE:
1500     case GDB_WATCHPOINT_READ:
1501     case GDB_WATCHPOINT_ACCESS:
1502         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503             return -ENOBUFS;
1504         }
1505 
1506         if (find_hw_breakpoint(addr, type) >= 0) {
1507             return -EEXIST;
1508         }
1509 
1510         nb_hw_watchpoint++;
1511         break;
1512 
1513     default:
1514         return -ENOSYS;
1515     }
1516 
1517     return 0;
1518 }
1519 
1520 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521                                   target_ulong len, int type)
1522 {
1523     int n;
1524 
1525     n = find_hw_breakpoint(addr, type);
1526     if (n < 0) {
1527         return -ENOENT;
1528     }
1529 
1530     switch (type) {
1531     case GDB_BREAKPOINT_HW:
1532         nb_hw_breakpoint--;
1533         break;
1534 
1535     case GDB_WATCHPOINT_WRITE:
1536     case GDB_WATCHPOINT_READ:
1537     case GDB_WATCHPOINT_ACCESS:
1538         nb_hw_watchpoint--;
1539         break;
1540 
1541     default:
1542         return -ENOSYS;
1543     }
1544     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545 
1546     return 0;
1547 }
1548 
1549 void kvm_arch_remove_all_hw_breakpoints(void)
1550 {
1551     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552 }
1553 
1554 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555 {
1556     int n;
1557 
1558     /* Software Breakpoint updates */
1559     if (kvm_sw_breakpoints_active(cs)) {
1560         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561     }
1562 
1563     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564            <= ARRAY_SIZE(hw_debug_points));
1565     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566 
1567     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571             switch (hw_debug_points[n].type) {
1572             case GDB_BREAKPOINT_HW:
1573                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574                 break;
1575             case GDB_WATCHPOINT_WRITE:
1576                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577                 break;
1578             case GDB_WATCHPOINT_READ:
1579                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580                 break;
1581             case GDB_WATCHPOINT_ACCESS:
1582                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583                                         KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             default:
1586                 cpu_abort(cs, "Unsupported breakpoint type\n");
1587             }
1588             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589         }
1590     }
1591 }
1592 
1593 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594 {
1595     CPUState *cs = CPU(cpu);
1596     CPUPPCState *env = &cpu->env;
1597     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598     int handle = 0;
1599     int n;
1600     int flag = 0;
1601 
1602     if (cs->singlestep_enabled) {
1603         handle = 1;
1604     } else if (arch_info->status) {
1605         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608                 if (n >= 0) {
1609                     handle = 1;
1610                 }
1611             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1613                 n = find_hw_watchpoint(arch_info->address,  &flag);
1614                 if (n >= 0) {
1615                     handle = 1;
1616                     cs->watchpoint_hit = &hw_watchpoint;
1617                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618                     hw_watchpoint.flags = flag;
1619                 }
1620             }
1621         }
1622     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623         handle = 1;
1624     } else {
1625         /* QEMU is not able to handle debug exception, so inject
1626          * program exception to guest;
1627          * Yes program exception NOT debug exception !!
1628          * When QEMU is using debug resources then debug exception must
1629          * be always set. To achieve this we set MSR_DE and also set
1630          * MSRP_DEP so guest cannot change MSR_DE.
1631          * When emulating debug resource for guest we want guest
1632          * to control MSR_DE (enable/disable debug interrupt on need).
1633          * Supporting both configurations are NOT possible.
1634          * So the result is that we cannot share debug resources
1635          * between QEMU and Guest on BOOKE architecture.
1636          * In the current design QEMU gets the priority over guest,
1637          * this means that if QEMU is using debug resources then guest
1638          * cannot use them;
1639          * For software breakpoint QEMU uses a privileged instruction;
1640          * So there cannot be any reason that we are here for guest
1641          * set debug exception, only possibility is guest executed a
1642          * privileged / illegal instruction and that's why we are
1643          * injecting a program interrupt.
1644          */
1645 
1646         cpu_synchronize_state(cs);
1647         /* env->nip is PC, so increment this by 4 to use
1648          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649          */
1650         env->nip += 4;
1651         cs->exception_index = POWERPC_EXCP_PROGRAM;
1652         env->error_code = POWERPC_EXCP_INVAL;
1653         ppc_cpu_do_interrupt(cs);
1654     }
1655 
1656     return handle;
1657 }
1658 
1659 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660 {
1661     PowerPCCPU *cpu = POWERPC_CPU(cs);
1662     CPUPPCState *env = &cpu->env;
1663     int ret;
1664 
1665     qemu_mutex_lock_iothread();
1666 
1667     switch (run->exit_reason) {
1668     case KVM_EXIT_DCR:
1669         if (run->dcr.is_write) {
1670             DPRINTF("handle dcr write\n");
1671             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672         } else {
1673             DPRINTF("handle dcr read\n");
1674             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675         }
1676         break;
1677     case KVM_EXIT_HLT:
1678         DPRINTF("handle halt\n");
1679         ret = kvmppc_handle_halt(cpu);
1680         break;
1681 #if defined(TARGET_PPC64)
1682     case KVM_EXIT_PAPR_HCALL:
1683         DPRINTF("handle PAPR hypercall\n");
1684         run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                               run->papr_hcall.nr,
1686                                               run->papr_hcall.args);
1687         ret = 0;
1688         break;
1689 #endif
1690     case KVM_EXIT_EPR:
1691         DPRINTF("handle epr\n");
1692         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693         ret = 0;
1694         break;
1695     case KVM_EXIT_WATCHDOG:
1696         DPRINTF("handle watchdog expiry\n");
1697         watchdog_perform_action();
1698         ret = 0;
1699         break;
1700 
1701     case KVM_EXIT_DEBUG:
1702         DPRINTF("handle debug exception\n");
1703         if (kvm_handle_debug(cpu, run)) {
1704             ret = EXCP_DEBUG;
1705             break;
1706         }
1707         /* re-enter, this exception was guest-internal */
1708         ret = 0;
1709         break;
1710 
1711     default:
1712         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713         ret = -1;
1714         break;
1715     }
1716 
1717     qemu_mutex_unlock_iothread();
1718     return ret;
1719 }
1720 
1721 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722 {
1723     CPUState *cs = CPU(cpu);
1724     uint32_t bits = tsr_bits;
1725     struct kvm_one_reg reg = {
1726         .id = KVM_REG_PPC_OR_TSR,
1727         .addr = (uintptr_t) &bits,
1728     };
1729 
1730     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731 }
1732 
1733 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734 {
1735 
1736     CPUState *cs = CPU(cpu);
1737     uint32_t bits = tsr_bits;
1738     struct kvm_one_reg reg = {
1739         .id = KVM_REG_PPC_CLEAR_TSR,
1740         .addr = (uintptr_t) &bits,
1741     };
1742 
1743     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744 }
1745 
1746 int kvmppc_set_tcr(PowerPCCPU *cpu)
1747 {
1748     CPUState *cs = CPU(cpu);
1749     CPUPPCState *env = &cpu->env;
1750     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751 
1752     struct kvm_one_reg reg = {
1753         .id = KVM_REG_PPC_TCR,
1754         .addr = (uintptr_t) &tcr,
1755     };
1756 
1757     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758 }
1759 
1760 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761 {
1762     CPUState *cs = CPU(cpu);
1763     int ret;
1764 
1765     if (!kvm_enabled()) {
1766         return -1;
1767     }
1768 
1769     if (!cap_ppc_watchdog) {
1770         printf("warning: KVM does not support watchdog");
1771         return -1;
1772     }
1773 
1774     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775     if (ret < 0) {
1776         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                 __func__, strerror(-ret));
1778         return ret;
1779     }
1780 
1781     return ret;
1782 }
1783 
1784 static int read_cpuinfo(const char *field, char *value, int len)
1785 {
1786     FILE *f;
1787     int ret = -1;
1788     int field_len = strlen(field);
1789     char line[512];
1790 
1791     f = fopen("/proc/cpuinfo", "r");
1792     if (!f) {
1793         return -1;
1794     }
1795 
1796     do {
1797         if (!fgets(line, sizeof(line), f)) {
1798             break;
1799         }
1800         if (!strncmp(line, field, field_len)) {
1801             pstrcpy(value, len, line);
1802             ret = 0;
1803             break;
1804         }
1805     } while(*line);
1806 
1807     fclose(f);
1808 
1809     return ret;
1810 }
1811 
1812 uint32_t kvmppc_get_tbfreq(void)
1813 {
1814     char line[512];
1815     char *ns;
1816     uint32_t retval = NANOSECONDS_PER_SECOND;
1817 
1818     if (read_cpuinfo("timebase", line, sizeof(line))) {
1819         return retval;
1820     }
1821 
1822     if (!(ns = strchr(line, ':'))) {
1823         return retval;
1824     }
1825 
1826     ns++;
1827 
1828     return atoi(ns);
1829 }
1830 
1831 bool kvmppc_get_host_serial(char **value)
1832 {
1833     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834                                NULL);
1835 }
1836 
1837 bool kvmppc_get_host_model(char **value)
1838 {
1839     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840 }
1841 
1842 /* Try to find a device tree node for a CPU with clock-frequency property */
1843 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844 {
1845     struct dirent *dirp;
1846     DIR *dp;
1847 
1848     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850         return -1;
1851     }
1852 
1853     buf[0] = '\0';
1854     while ((dirp = readdir(dp)) != NULL) {
1855         FILE *f;
1856         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857                  dirp->d_name);
1858         f = fopen(buf, "r");
1859         if (f) {
1860             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861             fclose(f);
1862             break;
1863         }
1864         buf[0] = '\0';
1865     }
1866     closedir(dp);
1867     if (buf[0] == '\0') {
1868         printf("Unknown host!\n");
1869         return -1;
1870     }
1871 
1872     return 0;
1873 }
1874 
1875 static uint64_t kvmppc_read_int_dt(const char *filename)
1876 {
1877     union {
1878         uint32_t v32;
1879         uint64_t v64;
1880     } u;
1881     FILE *f;
1882     int len;
1883 
1884     f = fopen(filename, "rb");
1885     if (!f) {
1886         return -1;
1887     }
1888 
1889     len = fread(&u, 1, sizeof(u), f);
1890     fclose(f);
1891     switch (len) {
1892     case 4:
1893         /* property is a 32-bit quantity */
1894         return be32_to_cpu(u.v32);
1895     case 8:
1896         return be64_to_cpu(u.v64);
1897     }
1898 
1899     return 0;
1900 }
1901 
1902 /* Read a CPU node property from the host device tree that's a single
1903  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1904  * (can't find or open the property, or doesn't understand the
1905  * format) */
1906 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907 {
1908     char buf[PATH_MAX], *tmp;
1909     uint64_t val;
1910 
1911     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912         return -1;
1913     }
1914 
1915     tmp = g_strdup_printf("%s/%s", buf, propname);
1916     val = kvmppc_read_int_dt(tmp);
1917     g_free(tmp);
1918 
1919     return val;
1920 }
1921 
1922 uint64_t kvmppc_get_clockfreq(void)
1923 {
1924     return kvmppc_read_int_cpu_dt("clock-frequency");
1925 }
1926 
1927 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928  {
1929      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930      CPUState *cs = CPU(cpu);
1931 
1932     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934         return 0;
1935     }
1936 
1937     return 1;
1938 }
1939 
1940 int kvmppc_get_hasidle(CPUPPCState *env)
1941 {
1942     struct kvm_ppc_pvinfo pvinfo;
1943 
1944     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946         return 1;
1947     }
1948 
1949     return 0;
1950 }
1951 
1952 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953 {
1954     uint32_t *hc = (uint32_t*)buf;
1955     struct kvm_ppc_pvinfo pvinfo;
1956 
1957     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958         memcpy(buf, pvinfo.hcall, buf_len);
1959         return 0;
1960     }
1961 
1962     /*
1963      * Fallback to always fail hypercalls regardless of endianness:
1964      *
1965      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966      *     li r3, -1
1967      *     b .+8       (becomes nop in wrong endian)
1968      *     bswap32(li r3, -1)
1969      */
1970 
1971     hc[0] = cpu_to_be32(0x08000048);
1972     hc[1] = cpu_to_be32(0x3860ffff);
1973     hc[2] = cpu_to_be32(0x48000008);
1974     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975 
1976     return 1;
1977 }
1978 
1979 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980 {
1981     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982 }
1983 
1984 void kvmppc_enable_logical_ci_hcalls(void)
1985 {
1986     /*
1987      * FIXME: it would be nice if we could detect the cases where
1988      * we're using a device which requires the in kernel
1989      * implementation of these hcalls, but the kernel lacks them and
1990      * produce a warning.
1991      */
1992     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994 }
1995 
1996 void kvmppc_enable_set_mode_hcall(void)
1997 {
1998     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999 }
2000 
2001 void kvmppc_enable_clear_ref_mod_hcalls(void)
2002 {
2003     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005 }
2006 
2007 void kvmppc_set_papr(PowerPCCPU *cpu)
2008 {
2009     CPUState *cs = CPU(cpu);
2010     int ret;
2011 
2012     if (!kvm_enabled()) {
2013         return;
2014     }
2015 
2016     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017     if (ret) {
2018         error_report("This vCPU type or KVM version does not support PAPR");
2019         exit(1);
2020     }
2021 
2022     /* Update the capability flag so we sync the right information
2023      * with kvm */
2024     cap_papr = 1;
2025 }
2026 
2027 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028 {
2029     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030 }
2031 
2032 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033 {
2034     CPUState *cs = CPU(cpu);
2035     int ret;
2036 
2037     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038     if (ret && mpic_proxy) {
2039         error_report("This KVM version does not support EPR");
2040         exit(1);
2041     }
2042 }
2043 
2044 int kvmppc_smt_threads(void)
2045 {
2046     return cap_ppc_smt ? cap_ppc_smt : 1;
2047 }
2048 
2049 int kvmppc_set_smt_threads(int smt)
2050 {
2051     int ret;
2052 
2053     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054     if (!ret) {
2055         cap_ppc_smt = smt;
2056     }
2057     return ret;
2058 }
2059 
2060 void kvmppc_hint_smt_possible(Error **errp)
2061 {
2062     int i;
2063     GString *g;
2064     char *s;
2065 
2066     assert(kvm_enabled());
2067     if (cap_ppc_smt_possible) {
2068         g = g_string_new("Available VSMT modes:");
2069         for (i = 63; i >= 0; i--) {
2070             if ((1UL << i) & cap_ppc_smt_possible) {
2071                 g_string_append_printf(g, " %lu", (1UL << i));
2072             }
2073         }
2074         s = g_string_free(g, false);
2075         error_append_hint(errp, "%s.\n", s);
2076         g_free(s);
2077     } else {
2078         error_append_hint(errp,
2079                           "This KVM seems to be too old to support VSMT.\n");
2080     }
2081 }
2082 
2083 
2084 #ifdef TARGET_PPC64
2085 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086 {
2087     struct kvm_ppc_smmu_info info;
2088     long rampagesize, best_page_shift;
2089     int i;
2090 
2091     /* Find the largest hardware supported page size that's less than
2092      * or equal to the (logical) backing page size of guest RAM */
2093     kvm_get_smmu_info(&info, &error_fatal);
2094     rampagesize = qemu_getrampagesize();
2095     best_page_shift = 0;
2096 
2097     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099 
2100         if (!sps->page_shift) {
2101             continue;
2102         }
2103 
2104         if ((sps->page_shift > best_page_shift)
2105             && ((1UL << sps->page_shift) <= rampagesize)) {
2106             best_page_shift = sps->page_shift;
2107         }
2108     }
2109 
2110     return MIN(current_size,
2111                1ULL << (best_page_shift + hash_shift - 7));
2112 }
2113 #endif
2114 
2115 bool kvmppc_spapr_use_multitce(void)
2116 {
2117     return cap_spapr_multitce;
2118 }
2119 
2120 int kvmppc_spapr_enable_inkernel_multitce(void)
2121 {
2122     int ret;
2123 
2124     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125                             H_PUT_TCE_INDIRECT, 1);
2126     if (!ret) {
2127         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                                 H_STUFF_TCE, 1);
2129     }
2130 
2131     return ret;
2132 }
2133 
2134 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135                               uint64_t bus_offset, uint32_t nb_table,
2136                               int *pfd, bool need_vfio)
2137 {
2138     long len;
2139     int fd;
2140     void *table;
2141 
2142     /* Must set fd to -1 so we don't try to munmap when called for
2143      * destroying the table, which the upper layers -will- do
2144      */
2145     *pfd = -1;
2146     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147         return NULL;
2148     }
2149 
2150     if (cap_spapr_tce_64) {
2151         struct kvm_create_spapr_tce_64 args = {
2152             .liobn = liobn,
2153             .page_shift = page_shift,
2154             .offset = bus_offset >> page_shift,
2155             .size = nb_table,
2156             .flags = 0
2157         };
2158         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159         if (fd < 0) {
2160             fprintf(stderr,
2161                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162                     liobn);
2163             return NULL;
2164         }
2165     } else if (cap_spapr_tce) {
2166         uint64_t window_size = (uint64_t) nb_table << page_shift;
2167         struct kvm_create_spapr_tce args = {
2168             .liobn = liobn,
2169             .window_size = window_size,
2170         };
2171         if ((window_size != args.window_size) || bus_offset) {
2172             return NULL;
2173         }
2174         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175         if (fd < 0) {
2176             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177                     liobn);
2178             return NULL;
2179         }
2180     } else {
2181         return NULL;
2182     }
2183 
2184     len = nb_table * sizeof(uint64_t);
2185     /* FIXME: round this up to page size */
2186 
2187     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188     if (table == MAP_FAILED) {
2189         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190                 liobn);
2191         close(fd);
2192         return NULL;
2193     }
2194 
2195     *pfd = fd;
2196     return table;
2197 }
2198 
2199 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200 {
2201     long len;
2202 
2203     if (fd < 0) {
2204         return -1;
2205     }
2206 
2207     len = nb_table * sizeof(uint64_t);
2208     if ((munmap(table, len) < 0) ||
2209         (close(fd) < 0)) {
2210         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211                 strerror(errno));
2212         /* Leak the table */
2213     }
2214 
2215     return 0;
2216 }
2217 
2218 int kvmppc_reset_htab(int shift_hint)
2219 {
2220     uint32_t shift = shift_hint;
2221 
2222     if (!kvm_enabled()) {
2223         /* Full emulation, tell caller to allocate htab itself */
2224         return 0;
2225     }
2226     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227         int ret;
2228         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229         if (ret == -ENOTTY) {
2230             /* At least some versions of PR KVM advertise the
2231              * capability, but don't implement the ioctl().  Oops.
2232              * Return 0 so that we allocate the htab in qemu, as is
2233              * correct for PR. */
2234             return 0;
2235         } else if (ret < 0) {
2236             return ret;
2237         }
2238         return shift;
2239     }
2240 
2241     /* We have a kernel that predates the htab reset calls.  For PR
2242      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243      * this era, it has allocated a 16MB fixed size hash table already. */
2244     if (kvmppc_is_pr(kvm_state)) {
2245         /* PR - tell caller to allocate htab */
2246         return 0;
2247     } else {
2248         /* HV - assume 16MB kernel allocated htab */
2249         return 24;
2250     }
2251 }
2252 
2253 static inline uint32_t mfpvr(void)
2254 {
2255     uint32_t pvr;
2256 
2257     asm ("mfpvr %0"
2258          : "=r"(pvr));
2259     return pvr;
2260 }
2261 
2262 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263 {
2264     if (on) {
2265         *word |= flags;
2266     } else {
2267         *word &= ~flags;
2268     }
2269 }
2270 
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 {
2273     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276 
2277     /* Now fix up the class with information we can query from the host */
2278     pcc->pvr = mfpvr();
2279 
2280     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286 
2287     if (dcache_size != -1) {
2288         pcc->l1_dcache_size = dcache_size;
2289     }
2290 
2291     if (icache_size != -1) {
2292         pcc->l1_icache_size = icache_size;
2293     }
2294 
2295 #if defined(TARGET_PPC64)
2296     pcc->radix_page_info = kvm_get_radix_page_info();
2297 
2298     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299         /*
2300          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301          * compliant.  More importantly, advertising ISA 3.00
2302          * architected mode may prevent guests from activating
2303          * necessary DD1 workarounds.
2304          */
2305         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307     }
2308 #endif /* defined(TARGET_PPC64) */
2309 }
2310 
2311 bool kvmppc_has_cap_epr(void)
2312 {
2313     return cap_epr;
2314 }
2315 
2316 bool kvmppc_has_cap_fixup_hcalls(void)
2317 {
2318     return cap_fixup_hcalls;
2319 }
2320 
2321 bool kvmppc_has_cap_htm(void)
2322 {
2323     return cap_htm;
2324 }
2325 
2326 bool kvmppc_has_cap_mmu_radix(void)
2327 {
2328     return cap_mmu_radix;
2329 }
2330 
2331 bool kvmppc_has_cap_mmu_hash_v3(void)
2332 {
2333     return cap_mmu_hash_v3;
2334 }
2335 
2336 static bool kvmppc_power8_host(void)
2337 {
2338     bool ret = false;
2339 #ifdef TARGET_PPC64
2340     {
2341         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344               (base_pvr == CPU_POWERPC_POWER8_BASE);
2345     }
2346 #endif /* TARGET_PPC64 */
2347     return ret;
2348 }
2349 
2350 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351 {
2352     bool l1d_thread_priv_req = !kvmppc_power8_host();
2353 
2354     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355         return 2;
2356     } else if ((!l1d_thread_priv_req ||
2357                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358                (c.character & c.character_mask
2359                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360         return 1;
2361     }
2362 
2363     return 0;
2364 }
2365 
2366 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367 {
2368     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369         return 2;
2370     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371         return 1;
2372     }
2373 
2374     return 0;
2375 }
2376 
2377 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378 {
2379     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380         return  SPAPR_CAP_FIXED_CCD;
2381     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382         return SPAPR_CAP_FIXED_IBS;
2383     }
2384 
2385     return 0;
2386 }
2387 
2388 static void kvmppc_get_cpu_characteristics(KVMState *s)
2389 {
2390     struct kvm_ppc_cpu_char c;
2391     int ret;
2392 
2393     /* Assume broken */
2394     cap_ppc_safe_cache = 0;
2395     cap_ppc_safe_bounds_check = 0;
2396     cap_ppc_safe_indirect_branch = 0;
2397 
2398     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399     if (!ret) {
2400         return;
2401     }
2402     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403     if (ret < 0) {
2404         return;
2405     }
2406 
2407     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410 }
2411 
2412 int kvmppc_get_cap_safe_cache(void)
2413 {
2414     return cap_ppc_safe_cache;
2415 }
2416 
2417 int kvmppc_get_cap_safe_bounds_check(void)
2418 {
2419     return cap_ppc_safe_bounds_check;
2420 }
2421 
2422 int kvmppc_get_cap_safe_indirect_branch(void)
2423 {
2424     return cap_ppc_safe_indirect_branch;
2425 }
2426 
2427 bool kvmppc_has_cap_nested_kvm_hv(void)
2428 {
2429     return !!cap_ppc_nested_kvm_hv;
2430 }
2431 
2432 int kvmppc_set_cap_nested_kvm_hv(int enable)
2433 {
2434     return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2435 }
2436 
2437 bool kvmppc_has_cap_spapr_vfio(void)
2438 {
2439     return cap_spapr_vfio;
2440 }
2441 
2442 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2443 {
2444     uint32_t host_pvr = mfpvr();
2445     PowerPCCPUClass *pvr_pcc;
2446 
2447     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2448     if (pvr_pcc == NULL) {
2449         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2450     }
2451 
2452     return pvr_pcc;
2453 }
2454 
2455 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2456 {
2457     TypeInfo type_info = {
2458         .name = TYPE_HOST_POWERPC_CPU,
2459         .class_init = kvmppc_host_cpu_class_init,
2460     };
2461     MachineClass *mc = MACHINE_GET_CLASS(ms);
2462     PowerPCCPUClass *pvr_pcc;
2463     ObjectClass *oc;
2464     DeviceClass *dc;
2465     int i;
2466 
2467     pvr_pcc = kvm_ppc_get_host_cpu_class();
2468     if (pvr_pcc == NULL) {
2469         return -1;
2470     }
2471     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2472     type_register(&type_info);
2473     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2474         /* override TCG default cpu type with 'host' cpu model */
2475         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2476     }
2477 
2478     oc = object_class_by_name(type_info.name);
2479     g_assert(oc);
2480 
2481     /*
2482      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2483      * we want "POWER8" to be a "family" alias that points to the current
2484      * host CPU type, too)
2485      */
2486     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2487     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2488         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2489             char *suffix;
2490 
2491             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2492             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2493             if (suffix) {
2494                 *suffix = 0;
2495             }
2496             break;
2497         }
2498     }
2499 
2500     return 0;
2501 }
2502 
2503 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2504 {
2505     struct kvm_rtas_token_args args = {
2506         .token = token,
2507     };
2508 
2509     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2510         return -ENOENT;
2511     }
2512 
2513     strncpy(args.name, function, sizeof(args.name));
2514 
2515     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2516 }
2517 
2518 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2519 {
2520     struct kvm_get_htab_fd s = {
2521         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2522         .start_index = index,
2523     };
2524     int ret;
2525 
2526     if (!cap_htab_fd) {
2527         error_setg(errp, "KVM version doesn't support %s the HPT",
2528                    write ? "writing" : "reading");
2529         return -ENOTSUP;
2530     }
2531 
2532     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2533     if (ret < 0) {
2534         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2535                    write ? "writing" : "reading", write ? "to" : "from",
2536                    strerror(errno));
2537         return -errno;
2538     }
2539 
2540     return ret;
2541 }
2542 
2543 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2544 {
2545     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2546     uint8_t buf[bufsize];
2547     ssize_t rc;
2548 
2549     do {
2550         rc = read(fd, buf, bufsize);
2551         if (rc < 0) {
2552             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2553                     strerror(errno));
2554             return rc;
2555         } else if (rc) {
2556             uint8_t *buffer = buf;
2557             ssize_t n = rc;
2558             while (n) {
2559                 struct kvm_get_htab_header *head =
2560                     (struct kvm_get_htab_header *) buffer;
2561                 size_t chunksize = sizeof(*head) +
2562                      HASH_PTE_SIZE_64 * head->n_valid;
2563 
2564                 qemu_put_be32(f, head->index);
2565                 qemu_put_be16(f, head->n_valid);
2566                 qemu_put_be16(f, head->n_invalid);
2567                 qemu_put_buffer(f, (void *)(head + 1),
2568                                 HASH_PTE_SIZE_64 * head->n_valid);
2569 
2570                 buffer += chunksize;
2571                 n -= chunksize;
2572             }
2573         }
2574     } while ((rc != 0)
2575              && ((max_ns < 0)
2576                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2577 
2578     return (rc == 0) ? 1 : 0;
2579 }
2580 
2581 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2582                            uint16_t n_valid, uint16_t n_invalid)
2583 {
2584     struct kvm_get_htab_header *buf;
2585     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2586     ssize_t rc;
2587 
2588     buf = alloca(chunksize);
2589     buf->index = index;
2590     buf->n_valid = n_valid;
2591     buf->n_invalid = n_invalid;
2592 
2593     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2594 
2595     rc = write(fd, buf, chunksize);
2596     if (rc < 0) {
2597         fprintf(stderr, "Error writing KVM hash table: %s\n",
2598                 strerror(errno));
2599         return rc;
2600     }
2601     if (rc != chunksize) {
2602         /* We should never get a short write on a single chunk */
2603         fprintf(stderr, "Short write, restoring KVM hash table\n");
2604         return -1;
2605     }
2606     return 0;
2607 }
2608 
2609 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2610 {
2611     return true;
2612 }
2613 
2614 void kvm_arch_init_irq_routing(KVMState *s)
2615 {
2616 }
2617 
2618 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2619 {
2620     int fd, rc;
2621     int i;
2622 
2623     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2624 
2625     i = 0;
2626     while (i < n) {
2627         struct kvm_get_htab_header *hdr;
2628         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2629         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2630 
2631         rc = read(fd, buf, sizeof(buf));
2632         if (rc < 0) {
2633             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2634         }
2635 
2636         hdr = (struct kvm_get_htab_header *)buf;
2637         while ((i < n) && ((char *)hdr < (buf + rc))) {
2638             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2639 
2640             if (hdr->index != (ptex + i)) {
2641                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2642                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2643             }
2644 
2645             if (n - i < valid) {
2646                 valid = n - i;
2647             }
2648             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2649             i += valid;
2650 
2651             if ((n - i) < invalid) {
2652                 invalid = n - i;
2653             }
2654             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2655             i += invalid;
2656 
2657             hdr = (struct kvm_get_htab_header *)
2658                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2659         }
2660     }
2661 
2662     close(fd);
2663 }
2664 
2665 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2666 {
2667     int fd, rc;
2668     struct {
2669         struct kvm_get_htab_header hdr;
2670         uint64_t pte0;
2671         uint64_t pte1;
2672     } buf;
2673 
2674     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2675 
2676     buf.hdr.n_valid = 1;
2677     buf.hdr.n_invalid = 0;
2678     buf.hdr.index = ptex;
2679     buf.pte0 = cpu_to_be64(pte0);
2680     buf.pte1 = cpu_to_be64(pte1);
2681 
2682     rc = write(fd, &buf, sizeof(buf));
2683     if (rc != sizeof(buf)) {
2684         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2685     }
2686     close(fd);
2687 }
2688 
2689 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2690                              uint64_t address, uint32_t data, PCIDevice *dev)
2691 {
2692     return 0;
2693 }
2694 
2695 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2696                                 int vector, PCIDevice *dev)
2697 {
2698     return 0;
2699 }
2700 
2701 int kvm_arch_release_virq_post(int virq)
2702 {
2703     return 0;
2704 }
2705 
2706 int kvm_arch_msi_data_to_gsi(uint32_t data)
2707 {
2708     return data & 0xffff;
2709 }
2710 
2711 int kvmppc_enable_hwrng(void)
2712 {
2713     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2714         return -1;
2715     }
2716 
2717     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2718 }
2719 
2720 void kvmppc_check_papr_resize_hpt(Error **errp)
2721 {
2722     if (!kvm_enabled()) {
2723         return; /* No KVM, we're good */
2724     }
2725 
2726     if (cap_resize_hpt) {
2727         return; /* Kernel has explicit support, we're good */
2728     }
2729 
2730     /* Otherwise fallback on looking for PR KVM */
2731     if (kvmppc_is_pr(kvm_state)) {
2732         return;
2733     }
2734 
2735     error_setg(errp,
2736                "Hash page table resizing not available with this KVM version");
2737 }
2738 
2739 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2740 {
2741     CPUState *cs = CPU(cpu);
2742     struct kvm_ppc_resize_hpt rhpt = {
2743         .flags = flags,
2744         .shift = shift,
2745     };
2746 
2747     if (!cap_resize_hpt) {
2748         return -ENOSYS;
2749     }
2750 
2751     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2752 }
2753 
2754 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2755 {
2756     CPUState *cs = CPU(cpu);
2757     struct kvm_ppc_resize_hpt rhpt = {
2758         .flags = flags,
2759         .shift = shift,
2760     };
2761 
2762     if (!cap_resize_hpt) {
2763         return -ENOSYS;
2764     }
2765 
2766     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2767 }
2768 
2769 /*
2770  * This is a helper function to detect a post migration scenario
2771  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2772  * the guest kernel can't handle a PVR value other than the actual host
2773  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2774  *
2775  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2776  * (so, we're HV), return true. The workaround itself is done in
2777  * cpu_post_load.
2778  *
2779  * The order here is important: we'll only check for KVM PR as a
2780  * fallback if the guest kernel can't handle the situation itself.
2781  * We need to avoid as much as possible querying the running KVM type
2782  * in QEMU level.
2783  */
2784 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2785 {
2786     CPUState *cs = CPU(cpu);
2787 
2788     if (!kvm_enabled()) {
2789         return false;
2790     }
2791 
2792     if (cap_ppc_pvr_compat) {
2793         return false;
2794     }
2795 
2796     return !kvmppc_is_pr(cs->kvm_state);
2797 }
2798 
2799 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2800 {
2801     CPUState *cs = CPU(cpu);
2802 
2803     if (kvm_enabled()) {
2804         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2805     }
2806 }
2807