xref: /qemu/target/ppc/kvm.c (revision dc03272d)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
36 
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
52 
53 //#define DEBUG_KVM
54 
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60     do { } while (0)
61 #endif
62 
63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
64 
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66     KVM_CAP_LAST_INFO
67 };
68 
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm;             /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
94 
95 static uint32_t debug_inst_opcode;
96 
97 /* XXX We have a race condition where we actually have a level triggered
98  *     interrupt, but the infrastructure can't expose that yet, so the guest
99  *     takes but ignores it, goes to sleep and never gets notified that there's
100  *     still an interrupt pending.
101  *
102  *     As a quick workaround, let's just wake up again 20 ms after we injected
103  *     an interrupt. That way we can assure that we're always reinjecting
104  *     interrupts in case the guest swallowed them.
105  */
106 static QEMUTimer *idle_timer;
107 
108 static void kvm_kick_cpu(void *opaque)
109 {
110     PowerPCCPU *cpu = opaque;
111 
112     qemu_cpu_kick(CPU(cpu));
113 }
114 
115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
116  * should only be used for fallback tests - generally we should use
117  * explicit capabilities for the features we want, rather than
118  * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
120 {
121     /* Assume KVM-PR if the GET_PVINFO capability is available */
122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
123 }
124 
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
127 
128 int kvm_arch_init(MachineState *ms, KVMState *s)
129 {
130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143     /* Note: we don't set cap_papr here, because this capability is
144      * only activated after this by kvmppc_set_papr() */
145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152     kvmppc_get_cpu_characteristics(s);
153     /*
154      * Note: setting it to false because there is not such capability
155      * in KVM at this moment.
156      *
157      * TODO: call kvm_vm_check_extension() with the right capability
158      * after the kernel starts implementing it.*/
159     cap_ppc_pvr_compat = false;
160 
161     if (!cap_interrupt_level) {
162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163                         "VM to stall at times!\n");
164     }
165 
166     kvm_ppc_register_host_cpu_type(ms);
167 
168     return 0;
169 }
170 
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
172 {
173     return 0;
174 }
175 
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
177 {
178     CPUPPCState *cenv = &cpu->env;
179     CPUState *cs = CPU(cpu);
180     struct kvm_sregs sregs;
181     int ret;
182 
183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184         /* What we're really trying to say is "if we're on BookE, we use
185            the native PVR for now". This is the only sane way to check
186            it though, so we potentially confuse users that they can run
187            BookE guests on BookS. Let's hope nobody dares enough :) */
188         return 0;
189     } else {
190         if (!cap_segstate) {
191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
192             return -ENOSYS;
193         }
194     }
195 
196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197     if (ret) {
198         return ret;
199     }
200 
201     sregs.pvr = cenv->spr[SPR_PVR];
202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
203 }
204 
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
207 {
208     CPUPPCState *env = &cpu->env;
209     CPUState *cs = CPU(cpu);
210     struct kvm_book3e_206_tlb_params params = {};
211     struct kvm_config_tlb cfg = {};
212     unsigned int entries = 0;
213     int ret, i;
214 
215     if (!kvm_enabled() ||
216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217         return 0;
218     }
219 
220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
221 
222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
225         entries += params.tlb_sizes[i];
226     }
227 
228     assert(entries == env->nb_tlb);
229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
230 
231     env->tlb_dirty = true;
232 
233     cfg.array = (uintptr_t)env->tlb.tlbm;
234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235     cfg.params = (uintptr_t)&params;
236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
237 
238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239     if (ret < 0) {
240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241                 __func__, strerror(-ret));
242         return ret;
243     }
244 
245     env->kvm_sw_tlb = true;
246     return 0;
247 }
248 
249 
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252                                        struct kvm_ppc_smmu_info *info)
253 {
254     CPUPPCState *env = &cpu->env;
255     CPUState *cs = CPU(cpu);
256 
257     memset(info, 0, sizeof(*info));
258 
259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260      * need to "guess" what the supported page sizes are.
261      *
262      * For that to work we make a few assumptions:
263      *
264      * - Check whether we are running "PR" KVM which only supports 4K
265      *   and 16M pages, but supports them regardless of the backing
266      *   store characteritics. We also don't support 1T segments.
267      *
268      *   This is safe as if HV KVM ever supports that capability or PR
269      *   KVM grows supports for more page/segment sizes, those versions
270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271      *   will not hit this fallback
272      *
273      * - Else we are running HV KVM. This means we only support page
274      *   sizes that fit in the backing store. Additionally we only
275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
276      *   P7 encodings for the SLB and hash table. Here too, we assume
277      *   support for any newer processor will mean a kernel that
278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279      *   this fallback.
280      */
281     if (kvmppc_is_pr(cs->kvm_state)) {
282         /* No flags */
283         info->flags = 0;
284         info->slb_size = 64;
285 
286         /* Standard 4k base page size segment */
287         info->sps[0].page_shift = 12;
288         info->sps[0].slb_enc = 0;
289         info->sps[0].enc[0].page_shift = 12;
290         info->sps[0].enc[0].pte_enc = 0;
291 
292         /* Standard 16M large page size segment */
293         info->sps[1].page_shift = 24;
294         info->sps[1].slb_enc = SLB_VSID_L;
295         info->sps[1].enc[0].page_shift = 24;
296         info->sps[1].enc[0].pte_enc = 0;
297     } else {
298         int i = 0;
299 
300         /* HV KVM has backing store size restrictions */
301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
302 
303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304             info->flags |= KVM_PPC_1T_SEGMENTS;
305         }
306 
307         if (env->mmu_model == POWERPC_MMU_2_06 ||
308             env->mmu_model == POWERPC_MMU_2_07) {
309             info->slb_size = 32;
310         } else {
311             info->slb_size = 64;
312         }
313 
314         /* Standard 4k base page size segment */
315         info->sps[i].page_shift = 12;
316         info->sps[i].slb_enc = 0;
317         info->sps[i].enc[0].page_shift = 12;
318         info->sps[i].enc[0].pte_enc = 0;
319         i++;
320 
321         /* 64K on MMU 2.06 and later */
322         if (env->mmu_model == POWERPC_MMU_2_06 ||
323             env->mmu_model == POWERPC_MMU_2_07) {
324             info->sps[i].page_shift = 16;
325             info->sps[i].slb_enc = 0x110;
326             info->sps[i].enc[0].page_shift = 16;
327             info->sps[i].enc[0].pte_enc = 1;
328             i++;
329         }
330 
331         /* Standard 16M large page size segment */
332         info->sps[i].page_shift = 24;
333         info->sps[i].slb_enc = SLB_VSID_L;
334         info->sps[i].enc[0].page_shift = 24;
335         info->sps[i].enc[0].pte_enc = 0;
336     }
337 }
338 
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
340 {
341     CPUState *cs = CPU(cpu);
342     int ret;
343 
344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346         if (ret == 0) {
347             return;
348         }
349     }
350 
351     kvm_get_fallback_smmu_info(cpu, info);
352 }
353 
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
355 {
356     KVMState *s = KVM_STATE(current_machine->accelerator);
357     struct ppc_radix_page_info *radix_page_info;
358     struct kvm_ppc_rmmu_info rmmu_info;
359     int i;
360 
361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362         return NULL;
363     }
364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365         return NULL;
366     }
367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
368     radix_page_info->count = 0;
369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370         if (rmmu_info.ap_encodings[i]) {
371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372             radix_page_info->count++;
373         }
374     }
375     return radix_page_info;
376 }
377 
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379                                      bool radix, bool gtse,
380                                      uint64_t proc_tbl)
381 {
382     CPUState *cs = CPU(cpu);
383     int ret;
384     uint64_t flags = 0;
385     struct kvm_ppc_mmuv3_cfg cfg = {
386         .process_table = proc_tbl,
387     };
388 
389     if (radix) {
390         flags |= KVM_PPC_MMUV3_RADIX;
391     }
392     if (gtse) {
393         flags |= KVM_PPC_MMUV3_GTSE;
394     }
395     cfg.flags = flags;
396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397     switch (ret) {
398     case 0:
399         return H_SUCCESS;
400     case -EINVAL:
401         return H_PARAMETER;
402     case -ENODEV:
403         return H_NOT_AVAILABLE;
404     default:
405         return H_HARDWARE;
406     }
407 }
408 
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
410 {
411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412         return true;
413     }
414 
415     return (1ul << shift) <= rampgsize;
416 }
417 
418 static long max_cpu_page_size;
419 
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
421 {
422     static struct kvm_ppc_smmu_info smmu_info;
423     static bool has_smmu_info;
424     CPUPPCState *env = &cpu->env;
425     int iq, ik, jq, jk;
426 
427     /* We only handle page sizes for 64-bit server guests for now */
428     if (!(env->mmu_model & POWERPC_MMU_64)) {
429         return;
430     }
431 
432     /* Collect MMU info from kernel if not already */
433     if (!has_smmu_info) {
434         kvm_get_smmu_info(cpu, &smmu_info);
435         has_smmu_info = true;
436     }
437 
438     if (!max_cpu_page_size) {
439         max_cpu_page_size = qemu_getrampagesize();
440     }
441 
442     /* Convert to QEMU form */
443     memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
444 
445     /* If we have HV KVM, we need to forbid CI large pages if our
446      * host page size is smaller than 64K.
447      */
448     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
449         if (getpagesize() >= 0x10000) {
450             cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
451         } else {
452             cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
453         }
454     }
455 
456     /*
457      * XXX This loop should be an entry wide AND of the capabilities that
458      *     the selected CPU has with the capabilities that KVM supports.
459      */
460     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
461         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
462         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
463 
464         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
465                                  ksps->page_shift)) {
466             continue;
467         }
468         qsps->page_shift = ksps->page_shift;
469         qsps->slb_enc = ksps->slb_enc;
470         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
471             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
472                                      ksps->enc[jk].page_shift)) {
473                 continue;
474             }
475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478                 break;
479             }
480         }
481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482             break;
483         }
484     }
485     cpu->hash64_opts->slb_size = smmu_info.slb_size;
486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487         cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
488     }
489 }
490 
491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
492 {
493     Object *mem_obj = object_resolve_path(obj_path, NULL);
494     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
495 
496     return pagesize >= max_cpu_page_size;
497 }
498 
499 #else /* defined (TARGET_PPC64) */
500 
501 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
502 {
503 }
504 
505 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
506 {
507     return true;
508 }
509 
510 #endif /* !defined (TARGET_PPC64) */
511 
512 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
513 {
514     return POWERPC_CPU(cpu)->vcpu_id;
515 }
516 
517 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
518  * book3s supports only 1 watchpoint, so array size
519  * of 4 is sufficient for now.
520  */
521 #define MAX_HW_BKPTS 4
522 
523 static struct HWBreakpoint {
524     target_ulong addr;
525     int type;
526 } hw_debug_points[MAX_HW_BKPTS];
527 
528 static CPUWatchpoint hw_watchpoint;
529 
530 /* Default there is no breakpoint and watchpoint supported */
531 static int max_hw_breakpoint;
532 static int max_hw_watchpoint;
533 static int nb_hw_breakpoint;
534 static int nb_hw_watchpoint;
535 
536 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
537 {
538     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
539         max_hw_breakpoint = 2;
540         max_hw_watchpoint = 2;
541     }
542 
543     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
544         fprintf(stderr, "Error initializing h/w breakpoints\n");
545         return;
546     }
547 }
548 
549 int kvm_arch_init_vcpu(CPUState *cs)
550 {
551     PowerPCCPU *cpu = POWERPC_CPU(cs);
552     CPUPPCState *cenv = &cpu->env;
553     int ret;
554 
555     /* Gather server mmu info from KVM and update the CPU state */
556     kvm_fixup_page_sizes(cpu);
557 
558     /* Synchronize sregs with kvm */
559     ret = kvm_arch_sync_sregs(cpu);
560     if (ret) {
561         if (ret == -EINVAL) {
562             error_report("Register sync failed... If you're using kvm-hv.ko,"
563                          " only \"-cpu host\" is possible");
564         }
565         return ret;
566     }
567 
568     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
569 
570     switch (cenv->mmu_model) {
571     case POWERPC_MMU_BOOKE206:
572         /* This target supports access to KVM's guest TLB */
573         ret = kvm_booke206_tlb_init(cpu);
574         break;
575     case POWERPC_MMU_2_07:
576         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
577             /* KVM-HV has transactional memory on POWER8 also without the
578              * KVM_CAP_PPC_HTM extension, so enable it here instead as
579              * long as it's availble to userspace on the host. */
580             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
581                 cap_htm = true;
582             }
583         }
584         break;
585     default:
586         break;
587     }
588 
589     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590     kvmppc_hw_debug_points_init(cenv);
591 
592     return ret;
593 }
594 
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
596 {
597     CPUPPCState *env = &cpu->env;
598     CPUState *cs = CPU(cpu);
599     struct kvm_dirty_tlb dirty_tlb;
600     unsigned char *bitmap;
601     int ret;
602 
603     if (!env->kvm_sw_tlb) {
604         return;
605     }
606 
607     bitmap = g_malloc((env->nb_tlb + 7) / 8);
608     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
609 
610     dirty_tlb.bitmap = (uintptr_t)bitmap;
611     dirty_tlb.num_dirty = env->nb_tlb;
612 
613     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614     if (ret) {
615         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616                 __func__, strerror(-ret));
617     }
618 
619     g_free(bitmap);
620 }
621 
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
623 {
624     PowerPCCPU *cpu = POWERPC_CPU(cs);
625     CPUPPCState *env = &cpu->env;
626     union {
627         uint32_t u32;
628         uint64_t u64;
629     } val;
630     struct kvm_one_reg reg = {
631         .id = id,
632         .addr = (uintptr_t) &val,
633     };
634     int ret;
635 
636     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637     if (ret != 0) {
638         trace_kvm_failed_spr_get(spr, strerror(errno));
639     } else {
640         switch (id & KVM_REG_SIZE_MASK) {
641         case KVM_REG_SIZE_U32:
642             env->spr[spr] = val.u32;
643             break;
644 
645         case KVM_REG_SIZE_U64:
646             env->spr[spr] = val.u64;
647             break;
648 
649         default:
650             /* Don't handle this size yet */
651             abort();
652         }
653     }
654 }
655 
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
657 {
658     PowerPCCPU *cpu = POWERPC_CPU(cs);
659     CPUPPCState *env = &cpu->env;
660     union {
661         uint32_t u32;
662         uint64_t u64;
663     } val;
664     struct kvm_one_reg reg = {
665         .id = id,
666         .addr = (uintptr_t) &val,
667     };
668     int ret;
669 
670     switch (id & KVM_REG_SIZE_MASK) {
671     case KVM_REG_SIZE_U32:
672         val.u32 = env->spr[spr];
673         break;
674 
675     case KVM_REG_SIZE_U64:
676         val.u64 = env->spr[spr];
677         break;
678 
679     default:
680         /* Don't handle this size yet */
681         abort();
682     }
683 
684     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685     if (ret != 0) {
686         trace_kvm_failed_spr_set(spr, strerror(errno));
687     }
688 }
689 
690 static int kvm_put_fp(CPUState *cs)
691 {
692     PowerPCCPU *cpu = POWERPC_CPU(cs);
693     CPUPPCState *env = &cpu->env;
694     struct kvm_one_reg reg;
695     int i;
696     int ret;
697 
698     if (env->insns_flags & PPC_FLOAT) {
699         uint64_t fpscr = env->fpscr;
700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
701 
702         reg.id = KVM_REG_PPC_FPSCR;
703         reg.addr = (uintptr_t)&fpscr;
704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705         if (ret < 0) {
706             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707             return ret;
708         }
709 
710         for (i = 0; i < 32; i++) {
711             uint64_t vsr[2];
712 
713 #ifdef HOST_WORDS_BIGENDIAN
714             vsr[0] = float64_val(env->fpr[i]);
715             vsr[1] = env->vsr[i];
716 #else
717             vsr[0] = env->vsr[i];
718             vsr[1] = float64_val(env->fpr[i]);
719 #endif
720             reg.addr = (uintptr_t) &vsr;
721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
722 
723             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724             if (ret < 0) {
725                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726                         i, strerror(errno));
727                 return ret;
728             }
729         }
730     }
731 
732     if (env->insns_flags & PPC_ALTIVEC) {
733         reg.id = KVM_REG_PPC_VSCR;
734         reg.addr = (uintptr_t)&env->vscr;
735         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736         if (ret < 0) {
737             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738             return ret;
739         }
740 
741         for (i = 0; i < 32; i++) {
742             reg.id = KVM_REG_PPC_VR(i);
743             reg.addr = (uintptr_t)&env->avr[i];
744             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745             if (ret < 0) {
746                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747                 return ret;
748             }
749         }
750     }
751 
752     return 0;
753 }
754 
755 static int kvm_get_fp(CPUState *cs)
756 {
757     PowerPCCPU *cpu = POWERPC_CPU(cs);
758     CPUPPCState *env = &cpu->env;
759     struct kvm_one_reg reg;
760     int i;
761     int ret;
762 
763     if (env->insns_flags & PPC_FLOAT) {
764         uint64_t fpscr;
765         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
766 
767         reg.id = KVM_REG_PPC_FPSCR;
768         reg.addr = (uintptr_t)&fpscr;
769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770         if (ret < 0) {
771             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772             return ret;
773         } else {
774             env->fpscr = fpscr;
775         }
776 
777         for (i = 0; i < 32; i++) {
778             uint64_t vsr[2];
779 
780             reg.addr = (uintptr_t) &vsr;
781             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
782 
783             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784             if (ret < 0) {
785                 DPRINTF("Unable to get %s%d from KVM: %s\n",
786                         vsx ? "VSR" : "FPR", i, strerror(errno));
787                 return ret;
788             } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790                 env->fpr[i] = vsr[0];
791                 if (vsx) {
792                     env->vsr[i] = vsr[1];
793                 }
794 #else
795                 env->fpr[i] = vsr[1];
796                 if (vsx) {
797                     env->vsr[i] = vsr[0];
798                 }
799 #endif
800             }
801         }
802     }
803 
804     if (env->insns_flags & PPC_ALTIVEC) {
805         reg.id = KVM_REG_PPC_VSCR;
806         reg.addr = (uintptr_t)&env->vscr;
807         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808         if (ret < 0) {
809             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810             return ret;
811         }
812 
813         for (i = 0; i < 32; i++) {
814             reg.id = KVM_REG_PPC_VR(i);
815             reg.addr = (uintptr_t)&env->avr[i];
816             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817             if (ret < 0) {
818                 DPRINTF("Unable to get VR%d from KVM: %s\n",
819                         i, strerror(errno));
820                 return ret;
821             }
822         }
823     }
824 
825     return 0;
826 }
827 
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
830 {
831     PowerPCCPU *cpu = POWERPC_CPU(cs);
832     CPUPPCState *env = &cpu->env;
833     struct kvm_one_reg reg;
834     int ret;
835 
836     reg.id = KVM_REG_PPC_VPA_ADDR;
837     reg.addr = (uintptr_t)&env->vpa_addr;
838     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839     if (ret < 0) {
840         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841         return ret;
842     }
843 
844     assert((uintptr_t)&env->slb_shadow_size
845            == ((uintptr_t)&env->slb_shadow_addr + 8));
846     reg.id = KVM_REG_PPC_VPA_SLB;
847     reg.addr = (uintptr_t)&env->slb_shadow_addr;
848     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849     if (ret < 0) {
850         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851                 strerror(errno));
852         return ret;
853     }
854 
855     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
856     reg.id = KVM_REG_PPC_VPA_DTL;
857     reg.addr = (uintptr_t)&env->dtl_addr;
858     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
859     if (ret < 0) {
860         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
861                 strerror(errno));
862         return ret;
863     }
864 
865     return 0;
866 }
867 
868 static int kvm_put_vpa(CPUState *cs)
869 {
870     PowerPCCPU *cpu = POWERPC_CPU(cs);
871     CPUPPCState *env = &cpu->env;
872     struct kvm_one_reg reg;
873     int ret;
874 
875     /* SLB shadow or DTL can't be registered unless a master VPA is
876      * registered.  That means when restoring state, if a VPA *is*
877      * registered, we need to set that up first.  If not, we need to
878      * deregister the others before deregistering the master VPA */
879     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
880 
881     if (env->vpa_addr) {
882         reg.id = KVM_REG_PPC_VPA_ADDR;
883         reg.addr = (uintptr_t)&env->vpa_addr;
884         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
885         if (ret < 0) {
886             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
887             return ret;
888         }
889     }
890 
891     assert((uintptr_t)&env->slb_shadow_size
892            == ((uintptr_t)&env->slb_shadow_addr + 8));
893     reg.id = KVM_REG_PPC_VPA_SLB;
894     reg.addr = (uintptr_t)&env->slb_shadow_addr;
895     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896     if (ret < 0) {
897         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
898         return ret;
899     }
900 
901     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
902     reg.id = KVM_REG_PPC_VPA_DTL;
903     reg.addr = (uintptr_t)&env->dtl_addr;
904     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905     if (ret < 0) {
906         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
907                 strerror(errno));
908         return ret;
909     }
910 
911     if (!env->vpa_addr) {
912         reg.id = KVM_REG_PPC_VPA_ADDR;
913         reg.addr = (uintptr_t)&env->vpa_addr;
914         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915         if (ret < 0) {
916             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917             return ret;
918         }
919     }
920 
921     return 0;
922 }
923 #endif /* TARGET_PPC64 */
924 
925 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
926 {
927     CPUPPCState *env = &cpu->env;
928     struct kvm_sregs sregs;
929     int i;
930 
931     sregs.pvr = env->spr[SPR_PVR];
932 
933     if (cpu->vhyp) {
934         PPCVirtualHypervisorClass *vhc =
935             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
936         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
937     } else {
938         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
939     }
940 
941     /* Sync SLB */
942 #ifdef TARGET_PPC64
943     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
944         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
945         if (env->slb[i].esid & SLB_ESID_V) {
946             sregs.u.s.ppc64.slb[i].slbe |= i;
947         }
948         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
949     }
950 #endif
951 
952     /* Sync SRs */
953     for (i = 0; i < 16; i++) {
954         sregs.u.s.ppc32.sr[i] = env->sr[i];
955     }
956 
957     /* Sync BATs */
958     for (i = 0; i < 8; i++) {
959         /* Beware. We have to swap upper and lower bits here */
960         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
961             | env->DBAT[1][i];
962         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
963             | env->IBAT[1][i];
964     }
965 
966     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
967 }
968 
969 int kvm_arch_put_registers(CPUState *cs, int level)
970 {
971     PowerPCCPU *cpu = POWERPC_CPU(cs);
972     CPUPPCState *env = &cpu->env;
973     struct kvm_regs regs;
974     int ret;
975     int i;
976 
977     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
978     if (ret < 0) {
979         return ret;
980     }
981 
982     regs.ctr = env->ctr;
983     regs.lr  = env->lr;
984     regs.xer = cpu_read_xer(env);
985     regs.msr = env->msr;
986     regs.pc = env->nip;
987 
988     regs.srr0 = env->spr[SPR_SRR0];
989     regs.srr1 = env->spr[SPR_SRR1];
990 
991     regs.sprg0 = env->spr[SPR_SPRG0];
992     regs.sprg1 = env->spr[SPR_SPRG1];
993     regs.sprg2 = env->spr[SPR_SPRG2];
994     regs.sprg3 = env->spr[SPR_SPRG3];
995     regs.sprg4 = env->spr[SPR_SPRG4];
996     regs.sprg5 = env->spr[SPR_SPRG5];
997     regs.sprg6 = env->spr[SPR_SPRG6];
998     regs.sprg7 = env->spr[SPR_SPRG7];
999 
1000     regs.pid = env->spr[SPR_BOOKE_PID];
1001 
1002     for (i = 0;i < 32; i++)
1003         regs.gpr[i] = env->gpr[i];
1004 
1005     regs.cr = 0;
1006     for (i = 0; i < 8; i++) {
1007         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1008     }
1009 
1010     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1011     if (ret < 0)
1012         return ret;
1013 
1014     kvm_put_fp(cs);
1015 
1016     if (env->tlb_dirty) {
1017         kvm_sw_tlb_put(cpu);
1018         env->tlb_dirty = false;
1019     }
1020 
1021     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1022         ret = kvmppc_put_books_sregs(cpu);
1023         if (ret < 0) {
1024             return ret;
1025         }
1026     }
1027 
1028     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1029         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1030     }
1031 
1032     if (cap_one_reg) {
1033         int i;
1034 
1035         /* We deliberately ignore errors here, for kernels which have
1036          * the ONE_REG calls, but don't support the specific
1037          * registers, there's a reasonable chance things will still
1038          * work, at least until we try to migrate. */
1039         for (i = 0; i < 1024; i++) {
1040             uint64_t id = env->spr_cb[i].one_reg_id;
1041 
1042             if (id != 0) {
1043                 kvm_put_one_spr(cs, id, i);
1044             }
1045         }
1046 
1047 #ifdef TARGET_PPC64
1048         if (msr_ts) {
1049             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1050                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1051             }
1052             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1054             }
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1065         }
1066 
1067         if (cap_papr) {
1068             if (kvm_put_vpa(cs) < 0) {
1069                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1070             }
1071         }
1072 
1073         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1074 #endif /* TARGET_PPC64 */
1075     }
1076 
1077     return ret;
1078 }
1079 
1080 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1081 {
1082      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1083 }
1084 
1085 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1086 {
1087     CPUPPCState *env = &cpu->env;
1088     struct kvm_sregs sregs;
1089     int ret;
1090 
1091     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1092     if (ret < 0) {
1093         return ret;
1094     }
1095 
1096     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1097         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1098         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1099         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1100         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1101         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1102         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1103         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1104         env->spr[SPR_DECR] = sregs.u.e.dec;
1105         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1106         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1107         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1108     }
1109 
1110     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1111         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1112         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1113         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1114         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1115         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1116     }
1117 
1118     if (sregs.u.e.features & KVM_SREGS_E_64) {
1119         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1120     }
1121 
1122     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1123         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1124     }
1125 
1126     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1127         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1128         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1129         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1130         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1131         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1132         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1133         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1134         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1135         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1136         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1137         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1138         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1139         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1140         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1141         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1142         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1143         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1144         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1145         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1146         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1147         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1148         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1149         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1150         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1151         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1152         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1153         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1154         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1155         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1156         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1157         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1158         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1159 
1160         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1161             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1162             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1163             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1164             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1165             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1166             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1167         }
1168 
1169         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1170             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1171             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1172         }
1173 
1174         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1175             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1176             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1177             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1178             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1179         }
1180     }
1181 
1182     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1183         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1184         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1185         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1186         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1187         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1188         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1189         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1190         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1191         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1192         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1193     }
1194 
1195     if (sregs.u.e.features & KVM_SREGS_EXP) {
1196         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1197     }
1198 
1199     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1200         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1201         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1202     }
1203 
1204     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1205         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1206         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1207         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1208 
1209         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1210             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1211             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1212         }
1213     }
1214 
1215     return 0;
1216 }
1217 
1218 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1219 {
1220     CPUPPCState *env = &cpu->env;
1221     struct kvm_sregs sregs;
1222     int ret;
1223     int i;
1224 
1225     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1226     if (ret < 0) {
1227         return ret;
1228     }
1229 
1230     if (!cpu->vhyp) {
1231         ppc_store_sdr1(env, sregs.u.s.sdr1);
1232     }
1233 
1234     /* Sync SLB */
1235 #ifdef TARGET_PPC64
1236     /*
1237      * The packed SLB array we get from KVM_GET_SREGS only contains
1238      * information about valid entries. So we flush our internal copy
1239      * to get rid of stale ones, then put all valid SLB entries back
1240      * in.
1241      */
1242     memset(env->slb, 0, sizeof(env->slb));
1243     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1244         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1245         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1246         /*
1247          * Only restore valid entries
1248          */
1249         if (rb & SLB_ESID_V) {
1250             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1251         }
1252     }
1253 #endif
1254 
1255     /* Sync SRs */
1256     for (i = 0; i < 16; i++) {
1257         env->sr[i] = sregs.u.s.ppc32.sr[i];
1258     }
1259 
1260     /* Sync BATs */
1261     for (i = 0; i < 8; i++) {
1262         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1263         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1264         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1265         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1266     }
1267 
1268     return 0;
1269 }
1270 
1271 int kvm_arch_get_registers(CPUState *cs)
1272 {
1273     PowerPCCPU *cpu = POWERPC_CPU(cs);
1274     CPUPPCState *env = &cpu->env;
1275     struct kvm_regs regs;
1276     uint32_t cr;
1277     int i, ret;
1278 
1279     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1280     if (ret < 0)
1281         return ret;
1282 
1283     cr = regs.cr;
1284     for (i = 7; i >= 0; i--) {
1285         env->crf[i] = cr & 15;
1286         cr >>= 4;
1287     }
1288 
1289     env->ctr = regs.ctr;
1290     env->lr = regs.lr;
1291     cpu_write_xer(env, regs.xer);
1292     env->msr = regs.msr;
1293     env->nip = regs.pc;
1294 
1295     env->spr[SPR_SRR0] = regs.srr0;
1296     env->spr[SPR_SRR1] = regs.srr1;
1297 
1298     env->spr[SPR_SPRG0] = regs.sprg0;
1299     env->spr[SPR_SPRG1] = regs.sprg1;
1300     env->spr[SPR_SPRG2] = regs.sprg2;
1301     env->spr[SPR_SPRG3] = regs.sprg3;
1302     env->spr[SPR_SPRG4] = regs.sprg4;
1303     env->spr[SPR_SPRG5] = regs.sprg5;
1304     env->spr[SPR_SPRG6] = regs.sprg6;
1305     env->spr[SPR_SPRG7] = regs.sprg7;
1306 
1307     env->spr[SPR_BOOKE_PID] = regs.pid;
1308 
1309     for (i = 0;i < 32; i++)
1310         env->gpr[i] = regs.gpr[i];
1311 
1312     kvm_get_fp(cs);
1313 
1314     if (cap_booke_sregs) {
1315         ret = kvmppc_get_booke_sregs(cpu);
1316         if (ret < 0) {
1317             return ret;
1318         }
1319     }
1320 
1321     if (cap_segstate) {
1322         ret = kvmppc_get_books_sregs(cpu);
1323         if (ret < 0) {
1324             return ret;
1325         }
1326     }
1327 
1328     if (cap_hior) {
1329         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1330     }
1331 
1332     if (cap_one_reg) {
1333         int i;
1334 
1335         /* We deliberately ignore errors here, for kernels which have
1336          * the ONE_REG calls, but don't support the specific
1337          * registers, there's a reasonable chance things will still
1338          * work, at least until we try to migrate. */
1339         for (i = 0; i < 1024; i++) {
1340             uint64_t id = env->spr_cb[i].one_reg_id;
1341 
1342             if (id != 0) {
1343                 kvm_get_one_spr(cs, id, i);
1344             }
1345         }
1346 
1347 #ifdef TARGET_PPC64
1348         if (msr_ts) {
1349             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1350                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1351             }
1352             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1354             }
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1365         }
1366 
1367         if (cap_papr) {
1368             if (kvm_get_vpa(cs) < 0) {
1369                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1370             }
1371         }
1372 
1373         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1374 #endif
1375     }
1376 
1377     return 0;
1378 }
1379 
1380 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1381 {
1382     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1383 
1384     if (irq != PPC_INTERRUPT_EXT) {
1385         return 0;
1386     }
1387 
1388     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1389         return 0;
1390     }
1391 
1392     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1393 
1394     return 0;
1395 }
1396 
1397 #if defined(TARGET_PPCEMB)
1398 #define PPC_INPUT_INT PPC40x_INPUT_INT
1399 #elif defined(TARGET_PPC64)
1400 #define PPC_INPUT_INT PPC970_INPUT_INT
1401 #else
1402 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1403 #endif
1404 
1405 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1406 {
1407     PowerPCCPU *cpu = POWERPC_CPU(cs);
1408     CPUPPCState *env = &cpu->env;
1409     int r;
1410     unsigned irq;
1411 
1412     qemu_mutex_lock_iothread();
1413 
1414     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1415      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1416     if (!cap_interrupt_level &&
1417         run->ready_for_interrupt_injection &&
1418         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1419         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1420     {
1421         /* For now KVM disregards the 'irq' argument. However, in the
1422          * future KVM could cache it in-kernel to avoid a heavyweight exit
1423          * when reading the UIC.
1424          */
1425         irq = KVM_INTERRUPT_SET;
1426 
1427         DPRINTF("injected interrupt %d\n", irq);
1428         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1429         if (r < 0) {
1430             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1431         }
1432 
1433         /* Always wake up soon in case the interrupt was level based */
1434         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1435                        (NANOSECONDS_PER_SECOND / 50));
1436     }
1437 
1438     /* We don't know if there are more interrupts pending after this. However,
1439      * the guest will return to userspace in the course of handling this one
1440      * anyways, so we will get a chance to deliver the rest. */
1441 
1442     qemu_mutex_unlock_iothread();
1443 }
1444 
1445 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1446 {
1447     return MEMTXATTRS_UNSPECIFIED;
1448 }
1449 
1450 int kvm_arch_process_async_events(CPUState *cs)
1451 {
1452     return cs->halted;
1453 }
1454 
1455 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1456 {
1457     CPUState *cs = CPU(cpu);
1458     CPUPPCState *env = &cpu->env;
1459 
1460     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1461         cs->halted = 1;
1462         cs->exception_index = EXCP_HLT;
1463     }
1464 
1465     return 0;
1466 }
1467 
1468 /* map dcr access to existing qemu dcr emulation */
1469 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1470 {
1471     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1472         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1473 
1474     return 0;
1475 }
1476 
1477 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1478 {
1479     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1480         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1481 
1482     return 0;
1483 }
1484 
1485 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1486 {
1487     /* Mixed endian case is not handled */
1488     uint32_t sc = debug_inst_opcode;
1489 
1490     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1491                             sizeof(sc), 0) ||
1492         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1493         return -EINVAL;
1494     }
1495 
1496     return 0;
1497 }
1498 
1499 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1500 {
1501     uint32_t sc;
1502 
1503     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1504         sc != debug_inst_opcode ||
1505         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1506                             sizeof(sc), 1)) {
1507         return -EINVAL;
1508     }
1509 
1510     return 0;
1511 }
1512 
1513 static int find_hw_breakpoint(target_ulong addr, int type)
1514 {
1515     int n;
1516 
1517     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1518            <= ARRAY_SIZE(hw_debug_points));
1519 
1520     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1521         if (hw_debug_points[n].addr == addr &&
1522              hw_debug_points[n].type == type) {
1523             return n;
1524         }
1525     }
1526 
1527     return -1;
1528 }
1529 
1530 static int find_hw_watchpoint(target_ulong addr, int *flag)
1531 {
1532     int n;
1533 
1534     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1535     if (n >= 0) {
1536         *flag = BP_MEM_ACCESS;
1537         return n;
1538     }
1539 
1540     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1541     if (n >= 0) {
1542         *flag = BP_MEM_WRITE;
1543         return n;
1544     }
1545 
1546     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1547     if (n >= 0) {
1548         *flag = BP_MEM_READ;
1549         return n;
1550     }
1551 
1552     return -1;
1553 }
1554 
1555 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1559         return -ENOBUFS;
1560     }
1561 
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1563     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1564 
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1568             return -ENOBUFS;
1569         }
1570 
1571         if (find_hw_breakpoint(addr, type) >= 0) {
1572             return -EEXIST;
1573         }
1574 
1575         nb_hw_breakpoint++;
1576         break;
1577 
1578     case GDB_WATCHPOINT_WRITE:
1579     case GDB_WATCHPOINT_READ:
1580     case GDB_WATCHPOINT_ACCESS:
1581         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1582             return -ENOBUFS;
1583         }
1584 
1585         if (find_hw_breakpoint(addr, type) >= 0) {
1586             return -EEXIST;
1587         }
1588 
1589         nb_hw_watchpoint++;
1590         break;
1591 
1592     default:
1593         return -ENOSYS;
1594     }
1595 
1596     return 0;
1597 }
1598 
1599 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1600                                   target_ulong len, int type)
1601 {
1602     int n;
1603 
1604     n = find_hw_breakpoint(addr, type);
1605     if (n < 0) {
1606         return -ENOENT;
1607     }
1608 
1609     switch (type) {
1610     case GDB_BREAKPOINT_HW:
1611         nb_hw_breakpoint--;
1612         break;
1613 
1614     case GDB_WATCHPOINT_WRITE:
1615     case GDB_WATCHPOINT_READ:
1616     case GDB_WATCHPOINT_ACCESS:
1617         nb_hw_watchpoint--;
1618         break;
1619 
1620     default:
1621         return -ENOSYS;
1622     }
1623     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1624 
1625     return 0;
1626 }
1627 
1628 void kvm_arch_remove_all_hw_breakpoints(void)
1629 {
1630     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1631 }
1632 
1633 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1634 {
1635     int n;
1636 
1637     /* Software Breakpoint updates */
1638     if (kvm_sw_breakpoints_active(cs)) {
1639         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1640     }
1641 
1642     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1643            <= ARRAY_SIZE(hw_debug_points));
1644     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1645 
1646     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1647         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1648         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1649         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1650             switch (hw_debug_points[n].type) {
1651             case GDB_BREAKPOINT_HW:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1653                 break;
1654             case GDB_WATCHPOINT_WRITE:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1656                 break;
1657             case GDB_WATCHPOINT_READ:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1659                 break;
1660             case GDB_WATCHPOINT_ACCESS:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1662                                         KVMPPC_DEBUG_WATCH_READ;
1663                 break;
1664             default:
1665                 cpu_abort(cs, "Unsupported breakpoint type\n");
1666             }
1667             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1668         }
1669     }
1670 }
1671 
1672 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1673 {
1674     CPUState *cs = CPU(cpu);
1675     CPUPPCState *env = &cpu->env;
1676     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1677     int handle = 0;
1678     int n;
1679     int flag = 0;
1680 
1681     if (cs->singlestep_enabled) {
1682         handle = 1;
1683     } else if (arch_info->status) {
1684         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1685             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1686                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1687                 if (n >= 0) {
1688                     handle = 1;
1689                 }
1690             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1691                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1692                 n = find_hw_watchpoint(arch_info->address,  &flag);
1693                 if (n >= 0) {
1694                     handle = 1;
1695                     cs->watchpoint_hit = &hw_watchpoint;
1696                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1697                     hw_watchpoint.flags = flag;
1698                 }
1699             }
1700         }
1701     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1702         handle = 1;
1703     } else {
1704         /* QEMU is not able to handle debug exception, so inject
1705          * program exception to guest;
1706          * Yes program exception NOT debug exception !!
1707          * When QEMU is using debug resources then debug exception must
1708          * be always set. To achieve this we set MSR_DE and also set
1709          * MSRP_DEP so guest cannot change MSR_DE.
1710          * When emulating debug resource for guest we want guest
1711          * to control MSR_DE (enable/disable debug interrupt on need).
1712          * Supporting both configurations are NOT possible.
1713          * So the result is that we cannot share debug resources
1714          * between QEMU and Guest on BOOKE architecture.
1715          * In the current design QEMU gets the priority over guest,
1716          * this means that if QEMU is using debug resources then guest
1717          * cannot use them;
1718          * For software breakpoint QEMU uses a privileged instruction;
1719          * So there cannot be any reason that we are here for guest
1720          * set debug exception, only possibility is guest executed a
1721          * privileged / illegal instruction and that's why we are
1722          * injecting a program interrupt.
1723          */
1724 
1725         cpu_synchronize_state(cs);
1726         /* env->nip is PC, so increment this by 4 to use
1727          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1728          */
1729         env->nip += 4;
1730         cs->exception_index = POWERPC_EXCP_PROGRAM;
1731         env->error_code = POWERPC_EXCP_INVAL;
1732         ppc_cpu_do_interrupt(cs);
1733     }
1734 
1735     return handle;
1736 }
1737 
1738 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1739 {
1740     PowerPCCPU *cpu = POWERPC_CPU(cs);
1741     CPUPPCState *env = &cpu->env;
1742     int ret;
1743 
1744     qemu_mutex_lock_iothread();
1745 
1746     switch (run->exit_reason) {
1747     case KVM_EXIT_DCR:
1748         if (run->dcr.is_write) {
1749             DPRINTF("handle dcr write\n");
1750             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1751         } else {
1752             DPRINTF("handle dcr read\n");
1753             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1754         }
1755         break;
1756     case KVM_EXIT_HLT:
1757         DPRINTF("handle halt\n");
1758         ret = kvmppc_handle_halt(cpu);
1759         break;
1760 #if defined(TARGET_PPC64)
1761     case KVM_EXIT_PAPR_HCALL:
1762         DPRINTF("handle PAPR hypercall\n");
1763         run->papr_hcall.ret = spapr_hypercall(cpu,
1764                                               run->papr_hcall.nr,
1765                                               run->papr_hcall.args);
1766         ret = 0;
1767         break;
1768 #endif
1769     case KVM_EXIT_EPR:
1770         DPRINTF("handle epr\n");
1771         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1772         ret = 0;
1773         break;
1774     case KVM_EXIT_WATCHDOG:
1775         DPRINTF("handle watchdog expiry\n");
1776         watchdog_perform_action();
1777         ret = 0;
1778         break;
1779 
1780     case KVM_EXIT_DEBUG:
1781         DPRINTF("handle debug exception\n");
1782         if (kvm_handle_debug(cpu, run)) {
1783             ret = EXCP_DEBUG;
1784             break;
1785         }
1786         /* re-enter, this exception was guest-internal */
1787         ret = 0;
1788         break;
1789 
1790     default:
1791         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1792         ret = -1;
1793         break;
1794     }
1795 
1796     qemu_mutex_unlock_iothread();
1797     return ret;
1798 }
1799 
1800 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1801 {
1802     CPUState *cs = CPU(cpu);
1803     uint32_t bits = tsr_bits;
1804     struct kvm_one_reg reg = {
1805         .id = KVM_REG_PPC_OR_TSR,
1806         .addr = (uintptr_t) &bits,
1807     };
1808 
1809     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1810 }
1811 
1812 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1813 {
1814 
1815     CPUState *cs = CPU(cpu);
1816     uint32_t bits = tsr_bits;
1817     struct kvm_one_reg reg = {
1818         .id = KVM_REG_PPC_CLEAR_TSR,
1819         .addr = (uintptr_t) &bits,
1820     };
1821 
1822     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 }
1824 
1825 int kvmppc_set_tcr(PowerPCCPU *cpu)
1826 {
1827     CPUState *cs = CPU(cpu);
1828     CPUPPCState *env = &cpu->env;
1829     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1830 
1831     struct kvm_one_reg reg = {
1832         .id = KVM_REG_PPC_TCR,
1833         .addr = (uintptr_t) &tcr,
1834     };
1835 
1836     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1837 }
1838 
1839 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1840 {
1841     CPUState *cs = CPU(cpu);
1842     int ret;
1843 
1844     if (!kvm_enabled()) {
1845         return -1;
1846     }
1847 
1848     if (!cap_ppc_watchdog) {
1849         printf("warning: KVM does not support watchdog");
1850         return -1;
1851     }
1852 
1853     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1854     if (ret < 0) {
1855         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1856                 __func__, strerror(-ret));
1857         return ret;
1858     }
1859 
1860     return ret;
1861 }
1862 
1863 static int read_cpuinfo(const char *field, char *value, int len)
1864 {
1865     FILE *f;
1866     int ret = -1;
1867     int field_len = strlen(field);
1868     char line[512];
1869 
1870     f = fopen("/proc/cpuinfo", "r");
1871     if (!f) {
1872         return -1;
1873     }
1874 
1875     do {
1876         if (!fgets(line, sizeof(line), f)) {
1877             break;
1878         }
1879         if (!strncmp(line, field, field_len)) {
1880             pstrcpy(value, len, line);
1881             ret = 0;
1882             break;
1883         }
1884     } while(*line);
1885 
1886     fclose(f);
1887 
1888     return ret;
1889 }
1890 
1891 uint32_t kvmppc_get_tbfreq(void)
1892 {
1893     char line[512];
1894     char *ns;
1895     uint32_t retval = NANOSECONDS_PER_SECOND;
1896 
1897     if (read_cpuinfo("timebase", line, sizeof(line))) {
1898         return retval;
1899     }
1900 
1901     if (!(ns = strchr(line, ':'))) {
1902         return retval;
1903     }
1904 
1905     ns++;
1906 
1907     return atoi(ns);
1908 }
1909 
1910 bool kvmppc_get_host_serial(char **value)
1911 {
1912     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1913                                NULL);
1914 }
1915 
1916 bool kvmppc_get_host_model(char **value)
1917 {
1918     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1919 }
1920 
1921 /* Try to find a device tree node for a CPU with clock-frequency property */
1922 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1923 {
1924     struct dirent *dirp;
1925     DIR *dp;
1926 
1927     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1928         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1929         return -1;
1930     }
1931 
1932     buf[0] = '\0';
1933     while ((dirp = readdir(dp)) != NULL) {
1934         FILE *f;
1935         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1936                  dirp->d_name);
1937         f = fopen(buf, "r");
1938         if (f) {
1939             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1940             fclose(f);
1941             break;
1942         }
1943         buf[0] = '\0';
1944     }
1945     closedir(dp);
1946     if (buf[0] == '\0') {
1947         printf("Unknown host!\n");
1948         return -1;
1949     }
1950 
1951     return 0;
1952 }
1953 
1954 static uint64_t kvmppc_read_int_dt(const char *filename)
1955 {
1956     union {
1957         uint32_t v32;
1958         uint64_t v64;
1959     } u;
1960     FILE *f;
1961     int len;
1962 
1963     f = fopen(filename, "rb");
1964     if (!f) {
1965         return -1;
1966     }
1967 
1968     len = fread(&u, 1, sizeof(u), f);
1969     fclose(f);
1970     switch (len) {
1971     case 4:
1972         /* property is a 32-bit quantity */
1973         return be32_to_cpu(u.v32);
1974     case 8:
1975         return be64_to_cpu(u.v64);
1976     }
1977 
1978     return 0;
1979 }
1980 
1981 /* Read a CPU node property from the host device tree that's a single
1982  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1983  * (can't find or open the property, or doesn't understand the
1984  * format) */
1985 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1986 {
1987     char buf[PATH_MAX], *tmp;
1988     uint64_t val;
1989 
1990     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1991         return -1;
1992     }
1993 
1994     tmp = g_strdup_printf("%s/%s", buf, propname);
1995     val = kvmppc_read_int_dt(tmp);
1996     g_free(tmp);
1997 
1998     return val;
1999 }
2000 
2001 uint64_t kvmppc_get_clockfreq(void)
2002 {
2003     return kvmppc_read_int_cpu_dt("clock-frequency");
2004 }
2005 
2006 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2007  {
2008      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2009      CPUState *cs = CPU(cpu);
2010 
2011     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2012         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2013         return 0;
2014     }
2015 
2016     return 1;
2017 }
2018 
2019 int kvmppc_get_hasidle(CPUPPCState *env)
2020 {
2021     struct kvm_ppc_pvinfo pvinfo;
2022 
2023     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2024         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2025         return 1;
2026     }
2027 
2028     return 0;
2029 }
2030 
2031 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2032 {
2033     uint32_t *hc = (uint32_t*)buf;
2034     struct kvm_ppc_pvinfo pvinfo;
2035 
2036     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2037         memcpy(buf, pvinfo.hcall, buf_len);
2038         return 0;
2039     }
2040 
2041     /*
2042      * Fallback to always fail hypercalls regardless of endianness:
2043      *
2044      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2045      *     li r3, -1
2046      *     b .+8       (becomes nop in wrong endian)
2047      *     bswap32(li r3, -1)
2048      */
2049 
2050     hc[0] = cpu_to_be32(0x08000048);
2051     hc[1] = cpu_to_be32(0x3860ffff);
2052     hc[2] = cpu_to_be32(0x48000008);
2053     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2054 
2055     return 1;
2056 }
2057 
2058 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2059 {
2060     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2061 }
2062 
2063 void kvmppc_enable_logical_ci_hcalls(void)
2064 {
2065     /*
2066      * FIXME: it would be nice if we could detect the cases where
2067      * we're using a device which requires the in kernel
2068      * implementation of these hcalls, but the kernel lacks them and
2069      * produce a warning.
2070      */
2071     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2072     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2073 }
2074 
2075 void kvmppc_enable_set_mode_hcall(void)
2076 {
2077     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2078 }
2079 
2080 void kvmppc_enable_clear_ref_mod_hcalls(void)
2081 {
2082     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2083     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2084 }
2085 
2086 void kvmppc_set_papr(PowerPCCPU *cpu)
2087 {
2088     CPUState *cs = CPU(cpu);
2089     int ret;
2090 
2091     if (!kvm_enabled()) {
2092         return;
2093     }
2094 
2095     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096     if (ret) {
2097         error_report("This vCPU type or KVM version does not support PAPR");
2098         exit(1);
2099     }
2100 
2101     /* Update the capability flag so we sync the right information
2102      * with kvm */
2103     cap_papr = 1;
2104 }
2105 
2106 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2107 {
2108     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2109 }
2110 
2111 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112 {
2113     CPUState *cs = CPU(cpu);
2114     int ret;
2115 
2116     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117     if (ret && mpic_proxy) {
2118         error_report("This KVM version does not support EPR");
2119         exit(1);
2120     }
2121 }
2122 
2123 int kvmppc_smt_threads(void)
2124 {
2125     return cap_ppc_smt ? cap_ppc_smt : 1;
2126 }
2127 
2128 int kvmppc_set_smt_threads(int smt)
2129 {
2130     int ret;
2131 
2132     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2133     if (!ret) {
2134         cap_ppc_smt = smt;
2135     }
2136     return ret;
2137 }
2138 
2139 void kvmppc_hint_smt_possible(Error **errp)
2140 {
2141     int i;
2142     GString *g;
2143     char *s;
2144 
2145     assert(kvm_enabled());
2146     if (cap_ppc_smt_possible) {
2147         g = g_string_new("Available VSMT modes:");
2148         for (i = 63; i >= 0; i--) {
2149             if ((1UL << i) & cap_ppc_smt_possible) {
2150                 g_string_append_printf(g, " %lu", (1UL << i));
2151             }
2152         }
2153         s = g_string_free(g, false);
2154         error_append_hint(errp, "%s.\n", s);
2155         g_free(s);
2156     } else {
2157         error_append_hint(errp,
2158                           "This KVM seems to be too old to support VSMT.\n");
2159     }
2160 }
2161 
2162 
2163 #ifdef TARGET_PPC64
2164 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2165 {
2166     struct kvm_ppc_smmu_info info;
2167     long rampagesize, best_page_shift;
2168     int i;
2169 
2170     /* Find the largest hardware supported page size that's less than
2171      * or equal to the (logical) backing page size of guest RAM */
2172     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2173     rampagesize = qemu_getrampagesize();
2174     best_page_shift = 0;
2175 
2176     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2177         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2178 
2179         if (!sps->page_shift) {
2180             continue;
2181         }
2182 
2183         if ((sps->page_shift > best_page_shift)
2184             && ((1UL << sps->page_shift) <= rampagesize)) {
2185             best_page_shift = sps->page_shift;
2186         }
2187     }
2188 
2189     return MIN(current_size,
2190                1ULL << (best_page_shift + hash_shift - 7));
2191 }
2192 #endif
2193 
2194 bool kvmppc_spapr_use_multitce(void)
2195 {
2196     return cap_spapr_multitce;
2197 }
2198 
2199 int kvmppc_spapr_enable_inkernel_multitce(void)
2200 {
2201     int ret;
2202 
2203     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2204                             H_PUT_TCE_INDIRECT, 1);
2205     if (!ret) {
2206         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                                 H_STUFF_TCE, 1);
2208     }
2209 
2210     return ret;
2211 }
2212 
2213 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2214                               uint64_t bus_offset, uint32_t nb_table,
2215                               int *pfd, bool need_vfio)
2216 {
2217     long len;
2218     int fd;
2219     void *table;
2220 
2221     /* Must set fd to -1 so we don't try to munmap when called for
2222      * destroying the table, which the upper layers -will- do
2223      */
2224     *pfd = -1;
2225     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2226         return NULL;
2227     }
2228 
2229     if (cap_spapr_tce_64) {
2230         struct kvm_create_spapr_tce_64 args = {
2231             .liobn = liobn,
2232             .page_shift = page_shift,
2233             .offset = bus_offset >> page_shift,
2234             .size = nb_table,
2235             .flags = 0
2236         };
2237         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2238         if (fd < 0) {
2239             fprintf(stderr,
2240                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2241                     liobn);
2242             return NULL;
2243         }
2244     } else if (cap_spapr_tce) {
2245         uint64_t window_size = (uint64_t) nb_table << page_shift;
2246         struct kvm_create_spapr_tce args = {
2247             .liobn = liobn,
2248             .window_size = window_size,
2249         };
2250         if ((window_size != args.window_size) || bus_offset) {
2251             return NULL;
2252         }
2253         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2254         if (fd < 0) {
2255             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2256                     liobn);
2257             return NULL;
2258         }
2259     } else {
2260         return NULL;
2261     }
2262 
2263     len = nb_table * sizeof(uint64_t);
2264     /* FIXME: round this up to page size */
2265 
2266     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2267     if (table == MAP_FAILED) {
2268         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2269                 liobn);
2270         close(fd);
2271         return NULL;
2272     }
2273 
2274     *pfd = fd;
2275     return table;
2276 }
2277 
2278 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2279 {
2280     long len;
2281 
2282     if (fd < 0) {
2283         return -1;
2284     }
2285 
2286     len = nb_table * sizeof(uint64_t);
2287     if ((munmap(table, len) < 0) ||
2288         (close(fd) < 0)) {
2289         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2290                 strerror(errno));
2291         /* Leak the table */
2292     }
2293 
2294     return 0;
2295 }
2296 
2297 int kvmppc_reset_htab(int shift_hint)
2298 {
2299     uint32_t shift = shift_hint;
2300 
2301     if (!kvm_enabled()) {
2302         /* Full emulation, tell caller to allocate htab itself */
2303         return 0;
2304     }
2305     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2306         int ret;
2307         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2308         if (ret == -ENOTTY) {
2309             /* At least some versions of PR KVM advertise the
2310              * capability, but don't implement the ioctl().  Oops.
2311              * Return 0 so that we allocate the htab in qemu, as is
2312              * correct for PR. */
2313             return 0;
2314         } else if (ret < 0) {
2315             return ret;
2316         }
2317         return shift;
2318     }
2319 
2320     /* We have a kernel that predates the htab reset calls.  For PR
2321      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2322      * this era, it has allocated a 16MB fixed size hash table already. */
2323     if (kvmppc_is_pr(kvm_state)) {
2324         /* PR - tell caller to allocate htab */
2325         return 0;
2326     } else {
2327         /* HV - assume 16MB kernel allocated htab */
2328         return 24;
2329     }
2330 }
2331 
2332 static inline uint32_t mfpvr(void)
2333 {
2334     uint32_t pvr;
2335 
2336     asm ("mfpvr %0"
2337          : "=r"(pvr));
2338     return pvr;
2339 }
2340 
2341 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2342 {
2343     if (on) {
2344         *word |= flags;
2345     } else {
2346         *word &= ~flags;
2347     }
2348 }
2349 
2350 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2351 {
2352     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2353     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2354     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2355 
2356     /* Now fix up the class with information we can query from the host */
2357     pcc->pvr = mfpvr();
2358 
2359     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2360                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2361     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2362                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2363     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2364                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2365 
2366     if (dcache_size != -1) {
2367         pcc->l1_dcache_size = dcache_size;
2368     }
2369 
2370     if (icache_size != -1) {
2371         pcc->l1_icache_size = icache_size;
2372     }
2373 
2374 #if defined(TARGET_PPC64)
2375     pcc->radix_page_info = kvm_get_radix_page_info();
2376 
2377     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2378         /*
2379          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2380          * compliant.  More importantly, advertising ISA 3.00
2381          * architected mode may prevent guests from activating
2382          * necessary DD1 workarounds.
2383          */
2384         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2385                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2386     }
2387 #endif /* defined(TARGET_PPC64) */
2388 }
2389 
2390 bool kvmppc_has_cap_epr(void)
2391 {
2392     return cap_epr;
2393 }
2394 
2395 bool kvmppc_has_cap_fixup_hcalls(void)
2396 {
2397     return cap_fixup_hcalls;
2398 }
2399 
2400 bool kvmppc_has_cap_htm(void)
2401 {
2402     return cap_htm;
2403 }
2404 
2405 bool kvmppc_has_cap_mmu_radix(void)
2406 {
2407     return cap_mmu_radix;
2408 }
2409 
2410 bool kvmppc_has_cap_mmu_hash_v3(void)
2411 {
2412     return cap_mmu_hash_v3;
2413 }
2414 
2415 static void kvmppc_get_cpu_characteristics(KVMState *s)
2416 {
2417     struct kvm_ppc_cpu_char c;
2418     int ret;
2419 
2420     /* Assume broken */
2421     cap_ppc_safe_cache = 0;
2422     cap_ppc_safe_bounds_check = 0;
2423     cap_ppc_safe_indirect_branch = 0;
2424 
2425     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2426     if (!ret) {
2427         return;
2428     }
2429     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2430     if (ret < 0) {
2431         return;
2432     }
2433     /* Parse and set cap_ppc_safe_cache */
2434     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2435         cap_ppc_safe_cache = 2;
2436     } else if ((c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2437                (c.character & c.character_mask
2438                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2439         cap_ppc_safe_cache = 1;
2440     }
2441     /* Parse and set cap_ppc_safe_bounds_check */
2442     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2443         cap_ppc_safe_bounds_check = 2;
2444     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2445         cap_ppc_safe_bounds_check = 1;
2446     }
2447     /* Parse and set cap_ppc_safe_indirect_branch */
2448     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2449         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_CCD;
2450     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2451         cap_ppc_safe_indirect_branch = SPAPR_CAP_FIXED_IBS;
2452     }
2453 }
2454 
2455 int kvmppc_get_cap_safe_cache(void)
2456 {
2457     return cap_ppc_safe_cache;
2458 }
2459 
2460 int kvmppc_get_cap_safe_bounds_check(void)
2461 {
2462     return cap_ppc_safe_bounds_check;
2463 }
2464 
2465 int kvmppc_get_cap_safe_indirect_branch(void)
2466 {
2467     return cap_ppc_safe_indirect_branch;
2468 }
2469 
2470 bool kvmppc_has_cap_spapr_vfio(void)
2471 {
2472     return cap_spapr_vfio;
2473 }
2474 
2475 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2476 {
2477     uint32_t host_pvr = mfpvr();
2478     PowerPCCPUClass *pvr_pcc;
2479 
2480     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2481     if (pvr_pcc == NULL) {
2482         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2483     }
2484 
2485     return pvr_pcc;
2486 }
2487 
2488 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2489 {
2490     TypeInfo type_info = {
2491         .name = TYPE_HOST_POWERPC_CPU,
2492         .class_init = kvmppc_host_cpu_class_init,
2493     };
2494     MachineClass *mc = MACHINE_GET_CLASS(ms);
2495     PowerPCCPUClass *pvr_pcc;
2496     ObjectClass *oc;
2497     DeviceClass *dc;
2498     int i;
2499 
2500     pvr_pcc = kvm_ppc_get_host_cpu_class();
2501     if (pvr_pcc == NULL) {
2502         return -1;
2503     }
2504     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2505     type_register(&type_info);
2506     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2507         /* override TCG default cpu type with 'host' cpu model */
2508         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2509     }
2510 
2511     oc = object_class_by_name(type_info.name);
2512     g_assert(oc);
2513 
2514     /*
2515      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2516      * we want "POWER8" to be a "family" alias that points to the current
2517      * host CPU type, too)
2518      */
2519     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2520     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2521         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2522             char *suffix;
2523 
2524             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2525             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2526             if (suffix) {
2527                 *suffix = 0;
2528             }
2529             break;
2530         }
2531     }
2532 
2533     return 0;
2534 }
2535 
2536 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2537 {
2538     struct kvm_rtas_token_args args = {
2539         .token = token,
2540     };
2541 
2542     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2543         return -ENOENT;
2544     }
2545 
2546     strncpy(args.name, function, sizeof(args.name));
2547 
2548     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2549 }
2550 
2551 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2552 {
2553     struct kvm_get_htab_fd s = {
2554         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2555         .start_index = index,
2556     };
2557     int ret;
2558 
2559     if (!cap_htab_fd) {
2560         error_setg(errp, "KVM version doesn't support %s the HPT",
2561                    write ? "writing" : "reading");
2562         return -ENOTSUP;
2563     }
2564 
2565     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2566     if (ret < 0) {
2567         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2568                    write ? "writing" : "reading", write ? "to" : "from",
2569                    strerror(errno));
2570         return -errno;
2571     }
2572 
2573     return ret;
2574 }
2575 
2576 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2577 {
2578     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2579     uint8_t buf[bufsize];
2580     ssize_t rc;
2581 
2582     do {
2583         rc = read(fd, buf, bufsize);
2584         if (rc < 0) {
2585             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2586                     strerror(errno));
2587             return rc;
2588         } else if (rc) {
2589             uint8_t *buffer = buf;
2590             ssize_t n = rc;
2591             while (n) {
2592                 struct kvm_get_htab_header *head =
2593                     (struct kvm_get_htab_header *) buffer;
2594                 size_t chunksize = sizeof(*head) +
2595                      HASH_PTE_SIZE_64 * head->n_valid;
2596 
2597                 qemu_put_be32(f, head->index);
2598                 qemu_put_be16(f, head->n_valid);
2599                 qemu_put_be16(f, head->n_invalid);
2600                 qemu_put_buffer(f, (void *)(head + 1),
2601                                 HASH_PTE_SIZE_64 * head->n_valid);
2602 
2603                 buffer += chunksize;
2604                 n -= chunksize;
2605             }
2606         }
2607     } while ((rc != 0)
2608              && ((max_ns < 0)
2609                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2610 
2611     return (rc == 0) ? 1 : 0;
2612 }
2613 
2614 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2615                            uint16_t n_valid, uint16_t n_invalid)
2616 {
2617     struct kvm_get_htab_header *buf;
2618     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2619     ssize_t rc;
2620 
2621     buf = alloca(chunksize);
2622     buf->index = index;
2623     buf->n_valid = n_valid;
2624     buf->n_invalid = n_invalid;
2625 
2626     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2627 
2628     rc = write(fd, buf, chunksize);
2629     if (rc < 0) {
2630         fprintf(stderr, "Error writing KVM hash table: %s\n",
2631                 strerror(errno));
2632         return rc;
2633     }
2634     if (rc != chunksize) {
2635         /* We should never get a short write on a single chunk */
2636         fprintf(stderr, "Short write, restoring KVM hash table\n");
2637         return -1;
2638     }
2639     return 0;
2640 }
2641 
2642 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2643 {
2644     return true;
2645 }
2646 
2647 void kvm_arch_init_irq_routing(KVMState *s)
2648 {
2649 }
2650 
2651 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2652 {
2653     int fd, rc;
2654     int i;
2655 
2656     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2657 
2658     i = 0;
2659     while (i < n) {
2660         struct kvm_get_htab_header *hdr;
2661         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2662         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2663 
2664         rc = read(fd, buf, sizeof(buf));
2665         if (rc < 0) {
2666             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2667         }
2668 
2669         hdr = (struct kvm_get_htab_header *)buf;
2670         while ((i < n) && ((char *)hdr < (buf + rc))) {
2671             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2672 
2673             if (hdr->index != (ptex + i)) {
2674                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2675                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2676             }
2677 
2678             if (n - i < valid) {
2679                 valid = n - i;
2680             }
2681             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2682             i += valid;
2683 
2684             if ((n - i) < invalid) {
2685                 invalid = n - i;
2686             }
2687             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2688             i += invalid;
2689 
2690             hdr = (struct kvm_get_htab_header *)
2691                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2692         }
2693     }
2694 
2695     close(fd);
2696 }
2697 
2698 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2699 {
2700     int fd, rc;
2701     struct {
2702         struct kvm_get_htab_header hdr;
2703         uint64_t pte0;
2704         uint64_t pte1;
2705     } buf;
2706 
2707     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2708 
2709     buf.hdr.n_valid = 1;
2710     buf.hdr.n_invalid = 0;
2711     buf.hdr.index = ptex;
2712     buf.pte0 = cpu_to_be64(pte0);
2713     buf.pte1 = cpu_to_be64(pte1);
2714 
2715     rc = write(fd, &buf, sizeof(buf));
2716     if (rc != sizeof(buf)) {
2717         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2718     }
2719     close(fd);
2720 }
2721 
2722 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2723                              uint64_t address, uint32_t data, PCIDevice *dev)
2724 {
2725     return 0;
2726 }
2727 
2728 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2729                                 int vector, PCIDevice *dev)
2730 {
2731     return 0;
2732 }
2733 
2734 int kvm_arch_release_virq_post(int virq)
2735 {
2736     return 0;
2737 }
2738 
2739 int kvm_arch_msi_data_to_gsi(uint32_t data)
2740 {
2741     return data & 0xffff;
2742 }
2743 
2744 int kvmppc_enable_hwrng(void)
2745 {
2746     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2747         return -1;
2748     }
2749 
2750     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2751 }
2752 
2753 void kvmppc_check_papr_resize_hpt(Error **errp)
2754 {
2755     if (!kvm_enabled()) {
2756         return; /* No KVM, we're good */
2757     }
2758 
2759     if (cap_resize_hpt) {
2760         return; /* Kernel has explicit support, we're good */
2761     }
2762 
2763     /* Otherwise fallback on looking for PR KVM */
2764     if (kvmppc_is_pr(kvm_state)) {
2765         return;
2766     }
2767 
2768     error_setg(errp,
2769                "Hash page table resizing not available with this KVM version");
2770 }
2771 
2772 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2773 {
2774     CPUState *cs = CPU(cpu);
2775     struct kvm_ppc_resize_hpt rhpt = {
2776         .flags = flags,
2777         .shift = shift,
2778     };
2779 
2780     if (!cap_resize_hpt) {
2781         return -ENOSYS;
2782     }
2783 
2784     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2785 }
2786 
2787 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2788 {
2789     CPUState *cs = CPU(cpu);
2790     struct kvm_ppc_resize_hpt rhpt = {
2791         .flags = flags,
2792         .shift = shift,
2793     };
2794 
2795     if (!cap_resize_hpt) {
2796         return -ENOSYS;
2797     }
2798 
2799     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2800 }
2801 
2802 /*
2803  * This is a helper function to detect a post migration scenario
2804  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2805  * the guest kernel can't handle a PVR value other than the actual host
2806  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2807  *
2808  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2809  * (so, we're HV), return true. The workaround itself is done in
2810  * cpu_post_load.
2811  *
2812  * The order here is important: we'll only check for KVM PR as a
2813  * fallback if the guest kernel can't handle the situation itself.
2814  * We need to avoid as much as possible querying the running KVM type
2815  * in QEMU level.
2816  */
2817 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2818 {
2819     CPUState *cs = CPU(cpu);
2820 
2821     if (!kvm_enabled()) {
2822         return false;
2823     }
2824 
2825     if (cap_ppc_pvr_compat) {
2826         return false;
2827     }
2828 
2829     return !kvmppc_is_pr(cs->kvm_state);
2830 }
2831