xref: /qemu/target/ppc/kvm.c (revision ef291226)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/hw_accel.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
50 
51 //#define DEBUG_KVM
52 
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58     do { } while (0)
59 #endif
60 
61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
62 
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64     KVM_CAP_LAST_INFO
65 };
66 
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83 static int cap_htm;             /* Hardware transactional memory support */
84 
85 static uint32_t debug_inst_opcode;
86 
87 /* XXX We have a race condition where we actually have a level triggered
88  *     interrupt, but the infrastructure can't expose that yet, so the guest
89  *     takes but ignores it, goes to sleep and never gets notified that there's
90  *     still an interrupt pending.
91  *
92  *     As a quick workaround, let's just wake up again 20 ms after we injected
93  *     an interrupt. That way we can assure that we're always reinjecting
94  *     interrupts in case the guest swallowed them.
95  */
96 static QEMUTimer *idle_timer;
97 
98 static void kvm_kick_cpu(void *opaque)
99 {
100     PowerPCCPU *cpu = opaque;
101 
102     qemu_cpu_kick(CPU(cpu));
103 }
104 
105 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
106  * should only be used for fallback tests - generally we should use
107  * explicit capabilities for the features we want, rather than
108  * assuming what is/isn't available depending on the KVM variant. */
109 static bool kvmppc_is_pr(KVMState *ks)
110 {
111     /* Assume KVM-PR if the GET_PVINFO capability is available */
112     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
113 }
114 
115 static int kvm_ppc_register_host_cpu_type(void);
116 
117 int kvm_arch_init(MachineState *ms, KVMState *s)
118 {
119     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
120     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
121     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
122     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
123     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
124     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
125     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
126     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
127     cap_spapr_vfio = false;
128     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
129     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
130     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
131     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
132     /* Note: we don't set cap_papr here, because this capability is
133      * only activated after this by kvmppc_set_papr() */
134     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
135     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
136     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
137 
138     if (!cap_interrupt_level) {
139         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
140                         "VM to stall at times!\n");
141     }
142 
143     kvm_ppc_register_host_cpu_type();
144 
145     return 0;
146 }
147 
148 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
149 {
150     return 0;
151 }
152 
153 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
154 {
155     CPUPPCState *cenv = &cpu->env;
156     CPUState *cs = CPU(cpu);
157     struct kvm_sregs sregs;
158     int ret;
159 
160     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
161         /* What we're really trying to say is "if we're on BookE, we use
162            the native PVR for now". This is the only sane way to check
163            it though, so we potentially confuse users that they can run
164            BookE guests on BookS. Let's hope nobody dares enough :) */
165         return 0;
166     } else {
167         if (!cap_segstate) {
168             fprintf(stderr, "kvm error: missing PVR setting capability\n");
169             return -ENOSYS;
170         }
171     }
172 
173     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
174     if (ret) {
175         return ret;
176     }
177 
178     sregs.pvr = cenv->spr[SPR_PVR];
179     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
180 }
181 
182 /* Set up a shared TLB array with KVM */
183 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
184 {
185     CPUPPCState *env = &cpu->env;
186     CPUState *cs = CPU(cpu);
187     struct kvm_book3e_206_tlb_params params = {};
188     struct kvm_config_tlb cfg = {};
189     unsigned int entries = 0;
190     int ret, i;
191 
192     if (!kvm_enabled() ||
193         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
194         return 0;
195     }
196 
197     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
198 
199     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
200         params.tlb_sizes[i] = booke206_tlb_size(env, i);
201         params.tlb_ways[i] = booke206_tlb_ways(env, i);
202         entries += params.tlb_sizes[i];
203     }
204 
205     assert(entries == env->nb_tlb);
206     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
207 
208     env->tlb_dirty = true;
209 
210     cfg.array = (uintptr_t)env->tlb.tlbm;
211     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
212     cfg.params = (uintptr_t)&params;
213     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
214 
215     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
216     if (ret < 0) {
217         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
218                 __func__, strerror(-ret));
219         return ret;
220     }
221 
222     env->kvm_sw_tlb = true;
223     return 0;
224 }
225 
226 
227 #if defined(TARGET_PPC64)
228 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
229                                        struct kvm_ppc_smmu_info *info)
230 {
231     CPUPPCState *env = &cpu->env;
232     CPUState *cs = CPU(cpu);
233 
234     memset(info, 0, sizeof(*info));
235 
236     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
237      * need to "guess" what the supported page sizes are.
238      *
239      * For that to work we make a few assumptions:
240      *
241      * - Check whether we are running "PR" KVM which only supports 4K
242      *   and 16M pages, but supports them regardless of the backing
243      *   store characteritics. We also don't support 1T segments.
244      *
245      *   This is safe as if HV KVM ever supports that capability or PR
246      *   KVM grows supports for more page/segment sizes, those versions
247      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
248      *   will not hit this fallback
249      *
250      * - Else we are running HV KVM. This means we only support page
251      *   sizes that fit in the backing store. Additionally we only
252      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
253      *   P7 encodings for the SLB and hash table. Here too, we assume
254      *   support for any newer processor will mean a kernel that
255      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
256      *   this fallback.
257      */
258     if (kvmppc_is_pr(cs->kvm_state)) {
259         /* No flags */
260         info->flags = 0;
261         info->slb_size = 64;
262 
263         /* Standard 4k base page size segment */
264         info->sps[0].page_shift = 12;
265         info->sps[0].slb_enc = 0;
266         info->sps[0].enc[0].page_shift = 12;
267         info->sps[0].enc[0].pte_enc = 0;
268 
269         /* Standard 16M large page size segment */
270         info->sps[1].page_shift = 24;
271         info->sps[1].slb_enc = SLB_VSID_L;
272         info->sps[1].enc[0].page_shift = 24;
273         info->sps[1].enc[0].pte_enc = 0;
274     } else {
275         int i = 0;
276 
277         /* HV KVM has backing store size restrictions */
278         info->flags = KVM_PPC_PAGE_SIZES_REAL;
279 
280         if (env->mmu_model & POWERPC_MMU_1TSEG) {
281             info->flags |= KVM_PPC_1T_SEGMENTS;
282         }
283 
284         if (env->mmu_model == POWERPC_MMU_2_06 ||
285             env->mmu_model == POWERPC_MMU_2_07) {
286             info->slb_size = 32;
287         } else {
288             info->slb_size = 64;
289         }
290 
291         /* Standard 4k base page size segment */
292         info->sps[i].page_shift = 12;
293         info->sps[i].slb_enc = 0;
294         info->sps[i].enc[0].page_shift = 12;
295         info->sps[i].enc[0].pte_enc = 0;
296         i++;
297 
298         /* 64K on MMU 2.06 and later */
299         if (env->mmu_model == POWERPC_MMU_2_06 ||
300             env->mmu_model == POWERPC_MMU_2_07) {
301             info->sps[i].page_shift = 16;
302             info->sps[i].slb_enc = 0x110;
303             info->sps[i].enc[0].page_shift = 16;
304             info->sps[i].enc[0].pte_enc = 1;
305             i++;
306         }
307 
308         /* Standard 16M large page size segment */
309         info->sps[i].page_shift = 24;
310         info->sps[i].slb_enc = SLB_VSID_L;
311         info->sps[i].enc[0].page_shift = 24;
312         info->sps[i].enc[0].pte_enc = 0;
313     }
314 }
315 
316 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
317 {
318     CPUState *cs = CPU(cpu);
319     int ret;
320 
321     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
322         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
323         if (ret == 0) {
324             return;
325         }
326     }
327 
328     kvm_get_fallback_smmu_info(cpu, info);
329 }
330 
331 static long gethugepagesize(const char *mem_path)
332 {
333     struct statfs fs;
334     int ret;
335 
336     do {
337         ret = statfs(mem_path, &fs);
338     } while (ret != 0 && errno == EINTR);
339 
340     if (ret != 0) {
341         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
342                 strerror(errno));
343         exit(1);
344     }
345 
346 #define HUGETLBFS_MAGIC       0x958458f6
347 
348     if (fs.f_type != HUGETLBFS_MAGIC) {
349         /* Explicit mempath, but it's ordinary pages */
350         return getpagesize();
351     }
352 
353     /* It's hugepage, return the huge page size */
354     return fs.f_bsize;
355 }
356 
357 /*
358  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
359  * may or may not name the same files / on the same filesystem now as
360  * when we actually open and map them.  Iterate over the file
361  * descriptors instead, and use qemu_fd_getpagesize().
362  */
363 static int find_max_supported_pagesize(Object *obj, void *opaque)
364 {
365     char *mem_path;
366     long *hpsize_min = opaque;
367 
368     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
369         mem_path = object_property_get_str(obj, "mem-path", NULL);
370         if (mem_path) {
371             long hpsize = gethugepagesize(mem_path);
372             if (hpsize < *hpsize_min) {
373                 *hpsize_min = hpsize;
374             }
375         } else {
376             *hpsize_min = getpagesize();
377         }
378     }
379 
380     return 0;
381 }
382 
383 static long getrampagesize(void)
384 {
385     long hpsize = LONG_MAX;
386     long mainrampagesize;
387     Object *memdev_root;
388 
389     if (mem_path) {
390         mainrampagesize = gethugepagesize(mem_path);
391     } else {
392         mainrampagesize = getpagesize();
393     }
394 
395     /* it's possible we have memory-backend objects with
396      * hugepage-backed RAM. these may get mapped into system
397      * address space via -numa parameters or memory hotplug
398      * hooks. we want to take these into account, but we
399      * also want to make sure these supported hugepage
400      * sizes are applicable across the entire range of memory
401      * we may boot from, so we take the min across all
402      * backends, and assume normal pages in cases where a
403      * backend isn't backed by hugepages.
404      */
405     memdev_root = object_resolve_path("/objects", NULL);
406     if (memdev_root) {
407         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
408     }
409     if (hpsize == LONG_MAX) {
410         /* No additional memory regions found ==> Report main RAM page size */
411         return mainrampagesize;
412     }
413 
414     /* If NUMA is disabled or the NUMA nodes are not backed with a
415      * memory-backend, then there is at least one node using "normal" RAM,
416      * so if its page size is smaller we have got to report that size instead.
417      */
418     if (hpsize > mainrampagesize &&
419         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
420         static bool warned;
421         if (!warned) {
422             error_report("Huge page support disabled (n/a for main memory).");
423             warned = true;
424         }
425         return mainrampagesize;
426     }
427 
428     return hpsize;
429 }
430 
431 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
432 {
433     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
434         return true;
435     }
436 
437     return (1ul << shift) <= rampgsize;
438 }
439 
440 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
441 {
442     static struct kvm_ppc_smmu_info smmu_info;
443     static bool has_smmu_info;
444     CPUPPCState *env = &cpu->env;
445     long rampagesize;
446     int iq, ik, jq, jk;
447     bool has_64k_pages = false;
448 
449     /* We only handle page sizes for 64-bit server guests for now */
450     if (!(env->mmu_model & POWERPC_MMU_64)) {
451         return;
452     }
453 
454     /* Collect MMU info from kernel if not already */
455     if (!has_smmu_info) {
456         kvm_get_smmu_info(cpu, &smmu_info);
457         has_smmu_info = true;
458     }
459 
460     rampagesize = getrampagesize();
461 
462     /* Convert to QEMU form */
463     memset(&env->sps, 0, sizeof(env->sps));
464 
465     /* If we have HV KVM, we need to forbid CI large pages if our
466      * host page size is smaller than 64K.
467      */
468     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
469         env->ci_large_pages = getpagesize() >= 0x10000;
470     }
471 
472     /*
473      * XXX This loop should be an entry wide AND of the capabilities that
474      *     the selected CPU has with the capabilities that KVM supports.
475      */
476     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
477         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
478         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
479 
480         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
481                                  ksps->page_shift)) {
482             continue;
483         }
484         qsps->page_shift = ksps->page_shift;
485         qsps->slb_enc = ksps->slb_enc;
486         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
487             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
488                                      ksps->enc[jk].page_shift)) {
489                 continue;
490             }
491             if (ksps->enc[jk].page_shift == 16) {
492                 has_64k_pages = true;
493             }
494             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
495             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
496             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
497                 break;
498             }
499         }
500         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
501             break;
502         }
503     }
504     env->slb_nr = smmu_info.slb_size;
505     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
506         env->mmu_model &= ~POWERPC_MMU_1TSEG;
507     }
508     if (!has_64k_pages) {
509         env->mmu_model &= ~POWERPC_MMU_64K;
510     }
511 }
512 #else /* defined (TARGET_PPC64) */
513 
514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
515 {
516 }
517 
518 #endif /* !defined (TARGET_PPC64) */
519 
520 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
521 {
522     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
523 }
524 
525 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
526  * book3s supports only 1 watchpoint, so array size
527  * of 4 is sufficient for now.
528  */
529 #define MAX_HW_BKPTS 4
530 
531 static struct HWBreakpoint {
532     target_ulong addr;
533     int type;
534 } hw_debug_points[MAX_HW_BKPTS];
535 
536 static CPUWatchpoint hw_watchpoint;
537 
538 /* Default there is no breakpoint and watchpoint supported */
539 static int max_hw_breakpoint;
540 static int max_hw_watchpoint;
541 static int nb_hw_breakpoint;
542 static int nb_hw_watchpoint;
543 
544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
545 {
546     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
547         max_hw_breakpoint = 2;
548         max_hw_watchpoint = 2;
549     }
550 
551     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
552         fprintf(stderr, "Error initializing h/w breakpoints\n");
553         return;
554     }
555 }
556 
557 int kvm_arch_init_vcpu(CPUState *cs)
558 {
559     PowerPCCPU *cpu = POWERPC_CPU(cs);
560     CPUPPCState *cenv = &cpu->env;
561     int ret;
562 
563     /* Gather server mmu info from KVM and update the CPU state */
564     kvm_fixup_page_sizes(cpu);
565 
566     /* Synchronize sregs with kvm */
567     ret = kvm_arch_sync_sregs(cpu);
568     if (ret) {
569         if (ret == -EINVAL) {
570             error_report("Register sync failed... If you're using kvm-hv.ko,"
571                          " only \"-cpu host\" is possible");
572         }
573         return ret;
574     }
575 
576     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
577 
578     switch (cenv->mmu_model) {
579     case POWERPC_MMU_BOOKE206:
580         /* This target supports access to KVM's guest TLB */
581         ret = kvm_booke206_tlb_init(cpu);
582         break;
583     case POWERPC_MMU_2_07:
584         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
585             /* KVM-HV has transactional memory on POWER8 also without the
586              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
587             cap_htm = true;
588         }
589         break;
590     default:
591         break;
592     }
593 
594     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
595     kvmppc_hw_debug_points_init(cenv);
596 
597     return ret;
598 }
599 
600 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
601 {
602     CPUPPCState *env = &cpu->env;
603     CPUState *cs = CPU(cpu);
604     struct kvm_dirty_tlb dirty_tlb;
605     unsigned char *bitmap;
606     int ret;
607 
608     if (!env->kvm_sw_tlb) {
609         return;
610     }
611 
612     bitmap = g_malloc((env->nb_tlb + 7) / 8);
613     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
614 
615     dirty_tlb.bitmap = (uintptr_t)bitmap;
616     dirty_tlb.num_dirty = env->nb_tlb;
617 
618     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
619     if (ret) {
620         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
621                 __func__, strerror(-ret));
622     }
623 
624     g_free(bitmap);
625 }
626 
627 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
628 {
629     PowerPCCPU *cpu = POWERPC_CPU(cs);
630     CPUPPCState *env = &cpu->env;
631     union {
632         uint32_t u32;
633         uint64_t u64;
634     } val;
635     struct kvm_one_reg reg = {
636         .id = id,
637         .addr = (uintptr_t) &val,
638     };
639     int ret;
640 
641     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642     if (ret != 0) {
643         trace_kvm_failed_spr_get(spr, strerror(errno));
644     } else {
645         switch (id & KVM_REG_SIZE_MASK) {
646         case KVM_REG_SIZE_U32:
647             env->spr[spr] = val.u32;
648             break;
649 
650         case KVM_REG_SIZE_U64:
651             env->spr[spr] = val.u64;
652             break;
653 
654         default:
655             /* Don't handle this size yet */
656             abort();
657         }
658     }
659 }
660 
661 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
662 {
663     PowerPCCPU *cpu = POWERPC_CPU(cs);
664     CPUPPCState *env = &cpu->env;
665     union {
666         uint32_t u32;
667         uint64_t u64;
668     } val;
669     struct kvm_one_reg reg = {
670         .id = id,
671         .addr = (uintptr_t) &val,
672     };
673     int ret;
674 
675     switch (id & KVM_REG_SIZE_MASK) {
676     case KVM_REG_SIZE_U32:
677         val.u32 = env->spr[spr];
678         break;
679 
680     case KVM_REG_SIZE_U64:
681         val.u64 = env->spr[spr];
682         break;
683 
684     default:
685         /* Don't handle this size yet */
686         abort();
687     }
688 
689     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690     if (ret != 0) {
691         trace_kvm_failed_spr_set(spr, strerror(errno));
692     }
693 }
694 
695 static int kvm_put_fp(CPUState *cs)
696 {
697     PowerPCCPU *cpu = POWERPC_CPU(cs);
698     CPUPPCState *env = &cpu->env;
699     struct kvm_one_reg reg;
700     int i;
701     int ret;
702 
703     if (env->insns_flags & PPC_FLOAT) {
704         uint64_t fpscr = env->fpscr;
705         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
706 
707         reg.id = KVM_REG_PPC_FPSCR;
708         reg.addr = (uintptr_t)&fpscr;
709         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
710         if (ret < 0) {
711             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
712             return ret;
713         }
714 
715         for (i = 0; i < 32; i++) {
716             uint64_t vsr[2];
717 
718 #ifdef HOST_WORDS_BIGENDIAN
719             vsr[0] = float64_val(env->fpr[i]);
720             vsr[1] = env->vsr[i];
721 #else
722             vsr[0] = env->vsr[i];
723             vsr[1] = float64_val(env->fpr[i]);
724 #endif
725             reg.addr = (uintptr_t) &vsr;
726             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
727 
728             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729             if (ret < 0) {
730                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
731                         i, strerror(errno));
732                 return ret;
733             }
734         }
735     }
736 
737     if (env->insns_flags & PPC_ALTIVEC) {
738         reg.id = KVM_REG_PPC_VSCR;
739         reg.addr = (uintptr_t)&env->vscr;
740         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
741         if (ret < 0) {
742             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
743             return ret;
744         }
745 
746         for (i = 0; i < 32; i++) {
747             reg.id = KVM_REG_PPC_VR(i);
748             reg.addr = (uintptr_t)&env->avr[i];
749             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750             if (ret < 0) {
751                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
752                 return ret;
753             }
754         }
755     }
756 
757     return 0;
758 }
759 
760 static int kvm_get_fp(CPUState *cs)
761 {
762     PowerPCCPU *cpu = POWERPC_CPU(cs);
763     CPUPPCState *env = &cpu->env;
764     struct kvm_one_reg reg;
765     int i;
766     int ret;
767 
768     if (env->insns_flags & PPC_FLOAT) {
769         uint64_t fpscr;
770         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
771 
772         reg.id = KVM_REG_PPC_FPSCR;
773         reg.addr = (uintptr_t)&fpscr;
774         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
775         if (ret < 0) {
776             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
777             return ret;
778         } else {
779             env->fpscr = fpscr;
780         }
781 
782         for (i = 0; i < 32; i++) {
783             uint64_t vsr[2];
784 
785             reg.addr = (uintptr_t) &vsr;
786             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
787 
788             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
789             if (ret < 0) {
790                 DPRINTF("Unable to get %s%d from KVM: %s\n",
791                         vsx ? "VSR" : "FPR", i, strerror(errno));
792                 return ret;
793             } else {
794 #ifdef HOST_WORDS_BIGENDIAN
795                 env->fpr[i] = vsr[0];
796                 if (vsx) {
797                     env->vsr[i] = vsr[1];
798                 }
799 #else
800                 env->fpr[i] = vsr[1];
801                 if (vsx) {
802                     env->vsr[i] = vsr[0];
803                 }
804 #endif
805             }
806         }
807     }
808 
809     if (env->insns_flags & PPC_ALTIVEC) {
810         reg.id = KVM_REG_PPC_VSCR;
811         reg.addr = (uintptr_t)&env->vscr;
812         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
813         if (ret < 0) {
814             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
815             return ret;
816         }
817 
818         for (i = 0; i < 32; i++) {
819             reg.id = KVM_REG_PPC_VR(i);
820             reg.addr = (uintptr_t)&env->avr[i];
821             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
822             if (ret < 0) {
823                 DPRINTF("Unable to get VR%d from KVM: %s\n",
824                         i, strerror(errno));
825                 return ret;
826             }
827         }
828     }
829 
830     return 0;
831 }
832 
833 #if defined(TARGET_PPC64)
834 static int kvm_get_vpa(CPUState *cs)
835 {
836     PowerPCCPU *cpu = POWERPC_CPU(cs);
837     CPUPPCState *env = &cpu->env;
838     struct kvm_one_reg reg;
839     int ret;
840 
841     reg.id = KVM_REG_PPC_VPA_ADDR;
842     reg.addr = (uintptr_t)&env->vpa_addr;
843     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
844     if (ret < 0) {
845         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
846         return ret;
847     }
848 
849     assert((uintptr_t)&env->slb_shadow_size
850            == ((uintptr_t)&env->slb_shadow_addr + 8));
851     reg.id = KVM_REG_PPC_VPA_SLB;
852     reg.addr = (uintptr_t)&env->slb_shadow_addr;
853     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
854     if (ret < 0) {
855         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
856                 strerror(errno));
857         return ret;
858     }
859 
860     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
861     reg.id = KVM_REG_PPC_VPA_DTL;
862     reg.addr = (uintptr_t)&env->dtl_addr;
863     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
864     if (ret < 0) {
865         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
866                 strerror(errno));
867         return ret;
868     }
869 
870     return 0;
871 }
872 
873 static int kvm_put_vpa(CPUState *cs)
874 {
875     PowerPCCPU *cpu = POWERPC_CPU(cs);
876     CPUPPCState *env = &cpu->env;
877     struct kvm_one_reg reg;
878     int ret;
879 
880     /* SLB shadow or DTL can't be registered unless a master VPA is
881      * registered.  That means when restoring state, if a VPA *is*
882      * registered, we need to set that up first.  If not, we need to
883      * deregister the others before deregistering the master VPA */
884     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
885 
886     if (env->vpa_addr) {
887         reg.id = KVM_REG_PPC_VPA_ADDR;
888         reg.addr = (uintptr_t)&env->vpa_addr;
889         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
890         if (ret < 0) {
891             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
892             return ret;
893         }
894     }
895 
896     assert((uintptr_t)&env->slb_shadow_size
897            == ((uintptr_t)&env->slb_shadow_addr + 8));
898     reg.id = KVM_REG_PPC_VPA_SLB;
899     reg.addr = (uintptr_t)&env->slb_shadow_addr;
900     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
901     if (ret < 0) {
902         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
903         return ret;
904     }
905 
906     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
907     reg.id = KVM_REG_PPC_VPA_DTL;
908     reg.addr = (uintptr_t)&env->dtl_addr;
909     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
910     if (ret < 0) {
911         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
912                 strerror(errno));
913         return ret;
914     }
915 
916     if (!env->vpa_addr) {
917         reg.id = KVM_REG_PPC_VPA_ADDR;
918         reg.addr = (uintptr_t)&env->vpa_addr;
919         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
920         if (ret < 0) {
921             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
922             return ret;
923         }
924     }
925 
926     return 0;
927 }
928 #endif /* TARGET_PPC64 */
929 
930 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
931 {
932     CPUPPCState *env = &cpu->env;
933     struct kvm_sregs sregs;
934     int i;
935 
936     sregs.pvr = env->spr[SPR_PVR];
937 
938     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
939 
940     /* Sync SLB */
941 #ifdef TARGET_PPC64
942     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
943         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
944         if (env->slb[i].esid & SLB_ESID_V) {
945             sregs.u.s.ppc64.slb[i].slbe |= i;
946         }
947         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
948     }
949 #endif
950 
951     /* Sync SRs */
952     for (i = 0; i < 16; i++) {
953         sregs.u.s.ppc32.sr[i] = env->sr[i];
954     }
955 
956     /* Sync BATs */
957     for (i = 0; i < 8; i++) {
958         /* Beware. We have to swap upper and lower bits here */
959         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
960             | env->DBAT[1][i];
961         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
962             | env->IBAT[1][i];
963     }
964 
965     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
966 }
967 
968 int kvm_arch_put_registers(CPUState *cs, int level)
969 {
970     PowerPCCPU *cpu = POWERPC_CPU(cs);
971     CPUPPCState *env = &cpu->env;
972     struct kvm_regs regs;
973     int ret;
974     int i;
975 
976     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
977     if (ret < 0) {
978         return ret;
979     }
980 
981     regs.ctr = env->ctr;
982     regs.lr  = env->lr;
983     regs.xer = cpu_read_xer(env);
984     regs.msr = env->msr;
985     regs.pc = env->nip;
986 
987     regs.srr0 = env->spr[SPR_SRR0];
988     regs.srr1 = env->spr[SPR_SRR1];
989 
990     regs.sprg0 = env->spr[SPR_SPRG0];
991     regs.sprg1 = env->spr[SPR_SPRG1];
992     regs.sprg2 = env->spr[SPR_SPRG2];
993     regs.sprg3 = env->spr[SPR_SPRG3];
994     regs.sprg4 = env->spr[SPR_SPRG4];
995     regs.sprg5 = env->spr[SPR_SPRG5];
996     regs.sprg6 = env->spr[SPR_SPRG6];
997     regs.sprg7 = env->spr[SPR_SPRG7];
998 
999     regs.pid = env->spr[SPR_BOOKE_PID];
1000 
1001     for (i = 0;i < 32; i++)
1002         regs.gpr[i] = env->gpr[i];
1003 
1004     regs.cr = 0;
1005     for (i = 0; i < 8; i++) {
1006         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1007     }
1008 
1009     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012 
1013     kvm_put_fp(cs);
1014 
1015     if (env->tlb_dirty) {
1016         kvm_sw_tlb_put(cpu);
1017         env->tlb_dirty = false;
1018     }
1019 
1020     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1021         ret = kvmppc_put_books_sregs(cpu);
1022         if (ret < 0) {
1023             return ret;
1024         }
1025     }
1026 
1027     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1028         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1029     }
1030 
1031     if (cap_one_reg) {
1032         int i;
1033 
1034         /* We deliberately ignore errors here, for kernels which have
1035          * the ONE_REG calls, but don't support the specific
1036          * registers, there's a reasonable chance things will still
1037          * work, at least until we try to migrate. */
1038         for (i = 0; i < 1024; i++) {
1039             uint64_t id = env->spr_cb[i].one_reg_id;
1040 
1041             if (id != 0) {
1042                 kvm_put_one_spr(cs, id, i);
1043             }
1044         }
1045 
1046 #ifdef TARGET_PPC64
1047         if (msr_ts) {
1048             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1050             }
1051             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1053             }
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1064         }
1065 
1066         if (cap_papr) {
1067             if (kvm_put_vpa(cs) < 0) {
1068                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1069             }
1070         }
1071 
1072         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1073 #endif /* TARGET_PPC64 */
1074     }
1075 
1076     return ret;
1077 }
1078 
1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1080 {
1081      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1082 }
1083 
1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1085 {
1086     CPUPPCState *env = &cpu->env;
1087     struct kvm_sregs sregs;
1088     int ret;
1089 
1090     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1091     if (ret < 0) {
1092         return ret;
1093     }
1094 
1095     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103         env->spr[SPR_DECR] = sregs.u.e.dec;
1104         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1107     }
1108 
1109     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1115     }
1116 
1117     if (sregs.u.e.features & KVM_SREGS_E_64) {
1118         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1119     }
1120 
1121     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1123     }
1124 
1125     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1128         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1130         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1132         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1134         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1136         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1138         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1140         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1142         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1144         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1146         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1148         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1150         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1152         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1154         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1156         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1158 
1159         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1162             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1164             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1166         }
1167 
1168         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1171         }
1172 
1173         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1176             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1178         }
1179     }
1180 
1181     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1192     }
1193 
1194     if (sregs.u.e.features & KVM_SREGS_EXP) {
1195         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1196     }
1197 
1198     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1201     }
1202 
1203     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1207 
1208         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1211         }
1212     }
1213 
1214     return 0;
1215 }
1216 
1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1218 {
1219     CPUPPCState *env = &cpu->env;
1220     struct kvm_sregs sregs;
1221     int ret;
1222     int i;
1223 
1224     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1225     if (ret < 0) {
1226         return ret;
1227     }
1228 
1229     if (!env->external_htab) {
1230         ppc_store_sdr1(env, sregs.u.s.sdr1);
1231     }
1232 
1233     /* Sync SLB */
1234 #ifdef TARGET_PPC64
1235     /*
1236      * The packed SLB array we get from KVM_GET_SREGS only contains
1237      * information about valid entries. So we flush our internal copy
1238      * to get rid of stale ones, then put all valid SLB entries back
1239      * in.
1240      */
1241     memset(env->slb, 0, sizeof(env->slb));
1242     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1245         /*
1246          * Only restore valid entries
1247          */
1248         if (rb & SLB_ESID_V) {
1249             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1250         }
1251     }
1252 #endif
1253 
1254     /* Sync SRs */
1255     for (i = 0; i < 16; i++) {
1256         env->sr[i] = sregs.u.s.ppc32.sr[i];
1257     }
1258 
1259     /* Sync BATs */
1260     for (i = 0; i < 8; i++) {
1261         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1265     }
1266 
1267     return 0;
1268 }
1269 
1270 int kvm_arch_get_registers(CPUState *cs)
1271 {
1272     PowerPCCPU *cpu = POWERPC_CPU(cs);
1273     CPUPPCState *env = &cpu->env;
1274     struct kvm_regs regs;
1275     uint32_t cr;
1276     int i, ret;
1277 
1278     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1279     if (ret < 0)
1280         return ret;
1281 
1282     cr = regs.cr;
1283     for (i = 7; i >= 0; i--) {
1284         env->crf[i] = cr & 15;
1285         cr >>= 4;
1286     }
1287 
1288     env->ctr = regs.ctr;
1289     env->lr = regs.lr;
1290     cpu_write_xer(env, regs.xer);
1291     env->msr = regs.msr;
1292     env->nip = regs.pc;
1293 
1294     env->spr[SPR_SRR0] = regs.srr0;
1295     env->spr[SPR_SRR1] = regs.srr1;
1296 
1297     env->spr[SPR_SPRG0] = regs.sprg0;
1298     env->spr[SPR_SPRG1] = regs.sprg1;
1299     env->spr[SPR_SPRG2] = regs.sprg2;
1300     env->spr[SPR_SPRG3] = regs.sprg3;
1301     env->spr[SPR_SPRG4] = regs.sprg4;
1302     env->spr[SPR_SPRG5] = regs.sprg5;
1303     env->spr[SPR_SPRG6] = regs.sprg6;
1304     env->spr[SPR_SPRG7] = regs.sprg7;
1305 
1306     env->spr[SPR_BOOKE_PID] = regs.pid;
1307 
1308     for (i = 0;i < 32; i++)
1309         env->gpr[i] = regs.gpr[i];
1310 
1311     kvm_get_fp(cs);
1312 
1313     if (cap_booke_sregs) {
1314         ret = kvmppc_get_booke_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319 
1320     if (cap_segstate) {
1321         ret = kvmppc_get_books_sregs(cpu);
1322         if (ret < 0) {
1323             return ret;
1324         }
1325     }
1326 
1327     if (cap_hior) {
1328         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1329     }
1330 
1331     if (cap_one_reg) {
1332         int i;
1333 
1334         /* We deliberately ignore errors here, for kernels which have
1335          * the ONE_REG calls, but don't support the specific
1336          * registers, there's a reasonable chance things will still
1337          * work, at least until we try to migrate. */
1338         for (i = 0; i < 1024; i++) {
1339             uint64_t id = env->spr_cb[i].one_reg_id;
1340 
1341             if (id != 0) {
1342                 kvm_get_one_spr(cs, id, i);
1343             }
1344         }
1345 
1346 #ifdef TARGET_PPC64
1347         if (msr_ts) {
1348             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1350             }
1351             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1353             }
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1364         }
1365 
1366         if (cap_papr) {
1367             if (kvm_get_vpa(cs) < 0) {
1368                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1369             }
1370         }
1371 
1372         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1373 #endif
1374     }
1375 
1376     return 0;
1377 }
1378 
1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1380 {
1381     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1382 
1383     if (irq != PPC_INTERRUPT_EXT) {
1384         return 0;
1385     }
1386 
1387     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1388         return 0;
1389     }
1390 
1391     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1392 
1393     return 0;
1394 }
1395 
1396 #if defined(TARGET_PPCEMB)
1397 #define PPC_INPUT_INT PPC40x_INPUT_INT
1398 #elif defined(TARGET_PPC64)
1399 #define PPC_INPUT_INT PPC970_INPUT_INT
1400 #else
1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1402 #endif
1403 
1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1405 {
1406     PowerPCCPU *cpu = POWERPC_CPU(cs);
1407     CPUPPCState *env = &cpu->env;
1408     int r;
1409     unsigned irq;
1410 
1411     qemu_mutex_lock_iothread();
1412 
1413     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1414      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1415     if (!cap_interrupt_level &&
1416         run->ready_for_interrupt_injection &&
1417         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1418         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1419     {
1420         /* For now KVM disregards the 'irq' argument. However, in the
1421          * future KVM could cache it in-kernel to avoid a heavyweight exit
1422          * when reading the UIC.
1423          */
1424         irq = KVM_INTERRUPT_SET;
1425 
1426         DPRINTF("injected interrupt %d\n", irq);
1427         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1428         if (r < 0) {
1429             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1430         }
1431 
1432         /* Always wake up soon in case the interrupt was level based */
1433         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1434                        (NANOSECONDS_PER_SECOND / 50));
1435     }
1436 
1437     /* We don't know if there are more interrupts pending after this. However,
1438      * the guest will return to userspace in the course of handling this one
1439      * anyways, so we will get a chance to deliver the rest. */
1440 
1441     qemu_mutex_unlock_iothread();
1442 }
1443 
1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1445 {
1446     return MEMTXATTRS_UNSPECIFIED;
1447 }
1448 
1449 int kvm_arch_process_async_events(CPUState *cs)
1450 {
1451     return cs->halted;
1452 }
1453 
1454 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1455 {
1456     CPUState *cs = CPU(cpu);
1457     CPUPPCState *env = &cpu->env;
1458 
1459     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1460         cs->halted = 1;
1461         cs->exception_index = EXCP_HLT;
1462     }
1463 
1464     return 0;
1465 }
1466 
1467 /* map dcr access to existing qemu dcr emulation */
1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1469 {
1470     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1472 
1473     return 0;
1474 }
1475 
1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1477 {
1478     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1480 
1481     return 0;
1482 }
1483 
1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1485 {
1486     /* Mixed endian case is not handled */
1487     uint32_t sc = debug_inst_opcode;
1488 
1489     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1490                             sizeof(sc), 0) ||
1491         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1492         return -EINVAL;
1493     }
1494 
1495     return 0;
1496 }
1497 
1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499 {
1500     uint32_t sc;
1501 
1502     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503         sc != debug_inst_opcode ||
1504         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1505                             sizeof(sc), 1)) {
1506         return -EINVAL;
1507     }
1508 
1509     return 0;
1510 }
1511 
1512 static int find_hw_breakpoint(target_ulong addr, int type)
1513 {
1514     int n;
1515 
1516     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517            <= ARRAY_SIZE(hw_debug_points));
1518 
1519     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520         if (hw_debug_points[n].addr == addr &&
1521              hw_debug_points[n].type == type) {
1522             return n;
1523         }
1524     }
1525 
1526     return -1;
1527 }
1528 
1529 static int find_hw_watchpoint(target_ulong addr, int *flag)
1530 {
1531     int n;
1532 
1533     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1534     if (n >= 0) {
1535         *flag = BP_MEM_ACCESS;
1536         return n;
1537     }
1538 
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1540     if (n >= 0) {
1541         *flag = BP_MEM_WRITE;
1542         return n;
1543     }
1544 
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1546     if (n >= 0) {
1547         *flag = BP_MEM_READ;
1548         return n;
1549     }
1550 
1551     return -1;
1552 }
1553 
1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555                                   target_ulong len, int type)
1556 {
1557     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1558         return -ENOBUFS;
1559     }
1560 
1561     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1563 
1564     switch (type) {
1565     case GDB_BREAKPOINT_HW:
1566         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1567             return -ENOBUFS;
1568         }
1569 
1570         if (find_hw_breakpoint(addr, type) >= 0) {
1571             return -EEXIST;
1572         }
1573 
1574         nb_hw_breakpoint++;
1575         break;
1576 
1577     case GDB_WATCHPOINT_WRITE:
1578     case GDB_WATCHPOINT_READ:
1579     case GDB_WATCHPOINT_ACCESS:
1580         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1581             return -ENOBUFS;
1582         }
1583 
1584         if (find_hw_breakpoint(addr, type) >= 0) {
1585             return -EEXIST;
1586         }
1587 
1588         nb_hw_watchpoint++;
1589         break;
1590 
1591     default:
1592         return -ENOSYS;
1593     }
1594 
1595     return 0;
1596 }
1597 
1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599                                   target_ulong len, int type)
1600 {
1601     int n;
1602 
1603     n = find_hw_breakpoint(addr, type);
1604     if (n < 0) {
1605         return -ENOENT;
1606     }
1607 
1608     switch (type) {
1609     case GDB_BREAKPOINT_HW:
1610         nb_hw_breakpoint--;
1611         break;
1612 
1613     case GDB_WATCHPOINT_WRITE:
1614     case GDB_WATCHPOINT_READ:
1615     case GDB_WATCHPOINT_ACCESS:
1616         nb_hw_watchpoint--;
1617         break;
1618 
1619     default:
1620         return -ENOSYS;
1621     }
1622     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1623 
1624     return 0;
1625 }
1626 
1627 void kvm_arch_remove_all_hw_breakpoints(void)
1628 {
1629     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1630 }
1631 
1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1633 {
1634     int n;
1635 
1636     /* Software Breakpoint updates */
1637     if (kvm_sw_breakpoints_active(cs)) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1639     }
1640 
1641     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642            <= ARRAY_SIZE(hw_debug_points));
1643     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1644 
1645     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649             switch (hw_debug_points[n].type) {
1650             case GDB_BREAKPOINT_HW:
1651                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1652                 break;
1653             case GDB_WATCHPOINT_WRITE:
1654                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1655                 break;
1656             case GDB_WATCHPOINT_READ:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             case GDB_WATCHPOINT_ACCESS:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661                                         KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             default:
1664                 cpu_abort(cs, "Unsupported breakpoint type\n");
1665             }
1666             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667         }
1668     }
1669 }
1670 
1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1672 {
1673     CPUState *cs = CPU(cpu);
1674     CPUPPCState *env = &cpu->env;
1675     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1676     int handle = 0;
1677     int n;
1678     int flag = 0;
1679 
1680     if (cs->singlestep_enabled) {
1681         handle = 1;
1682     } else if (arch_info->status) {
1683         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1686                 if (n >= 0) {
1687                     handle = 1;
1688                 }
1689             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1691                 n = find_hw_watchpoint(arch_info->address,  &flag);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                     cs->watchpoint_hit = &hw_watchpoint;
1695                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696                     hw_watchpoint.flags = flag;
1697                 }
1698             }
1699         }
1700     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1701         handle = 1;
1702     } else {
1703         /* QEMU is not able to handle debug exception, so inject
1704          * program exception to guest;
1705          * Yes program exception NOT debug exception !!
1706          * When QEMU is using debug resources then debug exception must
1707          * be always set. To achieve this we set MSR_DE and also set
1708          * MSRP_DEP so guest cannot change MSR_DE.
1709          * When emulating debug resource for guest we want guest
1710          * to control MSR_DE (enable/disable debug interrupt on need).
1711          * Supporting both configurations are NOT possible.
1712          * So the result is that we cannot share debug resources
1713          * between QEMU and Guest on BOOKE architecture.
1714          * In the current design QEMU gets the priority over guest,
1715          * this means that if QEMU is using debug resources then guest
1716          * cannot use them;
1717          * For software breakpoint QEMU uses a privileged instruction;
1718          * So there cannot be any reason that we are here for guest
1719          * set debug exception, only possibility is guest executed a
1720          * privileged / illegal instruction and that's why we are
1721          * injecting a program interrupt.
1722          */
1723 
1724         cpu_synchronize_state(cs);
1725         /* env->nip is PC, so increment this by 4 to use
1726          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1727          */
1728         env->nip += 4;
1729         cs->exception_index = POWERPC_EXCP_PROGRAM;
1730         env->error_code = POWERPC_EXCP_INVAL;
1731         ppc_cpu_do_interrupt(cs);
1732     }
1733 
1734     return handle;
1735 }
1736 
1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1738 {
1739     PowerPCCPU *cpu = POWERPC_CPU(cs);
1740     CPUPPCState *env = &cpu->env;
1741     int ret;
1742 
1743     qemu_mutex_lock_iothread();
1744 
1745     switch (run->exit_reason) {
1746     case KVM_EXIT_DCR:
1747         if (run->dcr.is_write) {
1748             DPRINTF("handle dcr write\n");
1749             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1750         } else {
1751             DPRINTF("handle dcr read\n");
1752             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1753         }
1754         break;
1755     case KVM_EXIT_HLT:
1756         DPRINTF("handle halt\n");
1757         ret = kvmppc_handle_halt(cpu);
1758         break;
1759 #if defined(TARGET_PPC64)
1760     case KVM_EXIT_PAPR_HCALL:
1761         DPRINTF("handle PAPR hypercall\n");
1762         run->papr_hcall.ret = spapr_hypercall(cpu,
1763                                               run->papr_hcall.nr,
1764                                               run->papr_hcall.args);
1765         ret = 0;
1766         break;
1767 #endif
1768     case KVM_EXIT_EPR:
1769         DPRINTF("handle epr\n");
1770         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1771         ret = 0;
1772         break;
1773     case KVM_EXIT_WATCHDOG:
1774         DPRINTF("handle watchdog expiry\n");
1775         watchdog_perform_action();
1776         ret = 0;
1777         break;
1778 
1779     case KVM_EXIT_DEBUG:
1780         DPRINTF("handle debug exception\n");
1781         if (kvm_handle_debug(cpu, run)) {
1782             ret = EXCP_DEBUG;
1783             break;
1784         }
1785         /* re-enter, this exception was guest-internal */
1786         ret = 0;
1787         break;
1788 
1789     default:
1790         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1791         ret = -1;
1792         break;
1793     }
1794 
1795     qemu_mutex_unlock_iothread();
1796     return ret;
1797 }
1798 
1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1800 {
1801     CPUState *cs = CPU(cpu);
1802     uint32_t bits = tsr_bits;
1803     struct kvm_one_reg reg = {
1804         .id = KVM_REG_PPC_OR_TSR,
1805         .addr = (uintptr_t) &bits,
1806     };
1807 
1808     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1809 }
1810 
1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813 
1814     CPUState *cs = CPU(cpu);
1815     uint32_t bits = tsr_bits;
1816     struct kvm_one_reg reg = {
1817         .id = KVM_REG_PPC_CLEAR_TSR,
1818         .addr = (uintptr_t) &bits,
1819     };
1820 
1821     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1822 }
1823 
1824 int kvmppc_set_tcr(PowerPCCPU *cpu)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     CPUPPCState *env = &cpu->env;
1828     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1829 
1830     struct kvm_one_reg reg = {
1831         .id = KVM_REG_PPC_TCR,
1832         .addr = (uintptr_t) &tcr,
1833     };
1834 
1835     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 }
1837 
1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1839 {
1840     CPUState *cs = CPU(cpu);
1841     int ret;
1842 
1843     if (!kvm_enabled()) {
1844         return -1;
1845     }
1846 
1847     if (!cap_ppc_watchdog) {
1848         printf("warning: KVM does not support watchdog");
1849         return -1;
1850     }
1851 
1852     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1853     if (ret < 0) {
1854         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855                 __func__, strerror(-ret));
1856         return ret;
1857     }
1858 
1859     return ret;
1860 }
1861 
1862 static int read_cpuinfo(const char *field, char *value, int len)
1863 {
1864     FILE *f;
1865     int ret = -1;
1866     int field_len = strlen(field);
1867     char line[512];
1868 
1869     f = fopen("/proc/cpuinfo", "r");
1870     if (!f) {
1871         return -1;
1872     }
1873 
1874     do {
1875         if (!fgets(line, sizeof(line), f)) {
1876             break;
1877         }
1878         if (!strncmp(line, field, field_len)) {
1879             pstrcpy(value, len, line);
1880             ret = 0;
1881             break;
1882         }
1883     } while(*line);
1884 
1885     fclose(f);
1886 
1887     return ret;
1888 }
1889 
1890 uint32_t kvmppc_get_tbfreq(void)
1891 {
1892     char line[512];
1893     char *ns;
1894     uint32_t retval = NANOSECONDS_PER_SECOND;
1895 
1896     if (read_cpuinfo("timebase", line, sizeof(line))) {
1897         return retval;
1898     }
1899 
1900     if (!(ns = strchr(line, ':'))) {
1901         return retval;
1902     }
1903 
1904     ns++;
1905 
1906     return atoi(ns);
1907 }
1908 
1909 bool kvmppc_get_host_serial(char **value)
1910 {
1911     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1912                                NULL);
1913 }
1914 
1915 bool kvmppc_get_host_model(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1918 }
1919 
1920 /* Try to find a device tree node for a CPU with clock-frequency property */
1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1922 {
1923     struct dirent *dirp;
1924     DIR *dp;
1925 
1926     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1928         return -1;
1929     }
1930 
1931     buf[0] = '\0';
1932     while ((dirp = readdir(dp)) != NULL) {
1933         FILE *f;
1934         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1935                  dirp->d_name);
1936         f = fopen(buf, "r");
1937         if (f) {
1938             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1939             fclose(f);
1940             break;
1941         }
1942         buf[0] = '\0';
1943     }
1944     closedir(dp);
1945     if (buf[0] == '\0') {
1946         printf("Unknown host!\n");
1947         return -1;
1948     }
1949 
1950     return 0;
1951 }
1952 
1953 static uint64_t kvmppc_read_int_dt(const char *filename)
1954 {
1955     union {
1956         uint32_t v32;
1957         uint64_t v64;
1958     } u;
1959     FILE *f;
1960     int len;
1961 
1962     f = fopen(filename, "rb");
1963     if (!f) {
1964         return -1;
1965     }
1966 
1967     len = fread(&u, 1, sizeof(u), f);
1968     fclose(f);
1969     switch (len) {
1970     case 4:
1971         /* property is a 32-bit quantity */
1972         return be32_to_cpu(u.v32);
1973     case 8:
1974         return be64_to_cpu(u.v64);
1975     }
1976 
1977     return 0;
1978 }
1979 
1980 /* Read a CPU node property from the host device tree that's a single
1981  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1982  * (can't find or open the property, or doesn't understand the
1983  * format) */
1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1985 {
1986     char buf[PATH_MAX], *tmp;
1987     uint64_t val;
1988 
1989     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1990         return -1;
1991     }
1992 
1993     tmp = g_strdup_printf("%s/%s", buf, propname);
1994     val = kvmppc_read_int_dt(tmp);
1995     g_free(tmp);
1996 
1997     return val;
1998 }
1999 
2000 uint64_t kvmppc_get_clockfreq(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("clock-frequency");
2003 }
2004 
2005 uint32_t kvmppc_get_vmx(void)
2006 {
2007     return kvmppc_read_int_cpu_dt("ibm,vmx");
2008 }
2009 
2010 uint32_t kvmppc_get_dfp(void)
2011 {
2012     return kvmppc_read_int_cpu_dt("ibm,dfp");
2013 }
2014 
2015 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2016  {
2017      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2018      CPUState *cs = CPU(cpu);
2019 
2020     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2021         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2022         return 0;
2023     }
2024 
2025     return 1;
2026 }
2027 
2028 int kvmppc_get_hasidle(CPUPPCState *env)
2029 {
2030     struct kvm_ppc_pvinfo pvinfo;
2031 
2032     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2033         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2034         return 1;
2035     }
2036 
2037     return 0;
2038 }
2039 
2040 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2041 {
2042     uint32_t *hc = (uint32_t*)buf;
2043     struct kvm_ppc_pvinfo pvinfo;
2044 
2045     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2046         memcpy(buf, pvinfo.hcall, buf_len);
2047         return 0;
2048     }
2049 
2050     /*
2051      * Fallback to always fail hypercalls regardless of endianness:
2052      *
2053      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2054      *     li r3, -1
2055      *     b .+8       (becomes nop in wrong endian)
2056      *     bswap32(li r3, -1)
2057      */
2058 
2059     hc[0] = cpu_to_be32(0x08000048);
2060     hc[1] = cpu_to_be32(0x3860ffff);
2061     hc[2] = cpu_to_be32(0x48000008);
2062     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2063 
2064     return 1;
2065 }
2066 
2067 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2068 {
2069     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2070 }
2071 
2072 void kvmppc_enable_logical_ci_hcalls(void)
2073 {
2074     /*
2075      * FIXME: it would be nice if we could detect the cases where
2076      * we're using a device which requires the in kernel
2077      * implementation of these hcalls, but the kernel lacks them and
2078      * produce a warning.
2079      */
2080     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2081     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2082 }
2083 
2084 void kvmppc_enable_set_mode_hcall(void)
2085 {
2086     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2087 }
2088 
2089 void kvmppc_enable_clear_ref_mod_hcalls(void)
2090 {
2091     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2092     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2093 }
2094 
2095 void kvmppc_set_papr(PowerPCCPU *cpu)
2096 {
2097     CPUState *cs = CPU(cpu);
2098     int ret;
2099 
2100     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2101     if (ret) {
2102         error_report("This vCPU type or KVM version does not support PAPR");
2103         exit(1);
2104     }
2105 
2106     /* Update the capability flag so we sync the right information
2107      * with kvm */
2108     cap_papr = 1;
2109 }
2110 
2111 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2112 {
2113     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2114 }
2115 
2116 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2117 {
2118     CPUState *cs = CPU(cpu);
2119     int ret;
2120 
2121     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2122     if (ret && mpic_proxy) {
2123         error_report("This KVM version does not support EPR");
2124         exit(1);
2125     }
2126 }
2127 
2128 int kvmppc_smt_threads(void)
2129 {
2130     return cap_ppc_smt ? cap_ppc_smt : 1;
2131 }
2132 
2133 #ifdef TARGET_PPC64
2134 off_t kvmppc_alloc_rma(void **rma)
2135 {
2136     off_t size;
2137     int fd;
2138     struct kvm_allocate_rma ret;
2139 
2140     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2141      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2142      *                      not necessary on this hardware
2143      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2144      *
2145      * FIXME: We should allow the user to force contiguous RMA
2146      * allocation in the cap_ppc_rma==1 case.
2147      */
2148     if (cap_ppc_rma < 2) {
2149         return 0;
2150     }
2151 
2152     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2153     if (fd < 0) {
2154         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2155                 strerror(errno));
2156         return -1;
2157     }
2158 
2159     size = MIN(ret.rma_size, 256ul << 20);
2160 
2161     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2162     if (*rma == MAP_FAILED) {
2163         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2164         return -1;
2165     };
2166 
2167     return size;
2168 }
2169 
2170 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2171 {
2172     struct kvm_ppc_smmu_info info;
2173     long rampagesize, best_page_shift;
2174     int i;
2175 
2176     if (cap_ppc_rma >= 2) {
2177         return current_size;
2178     }
2179 
2180     /* Find the largest hardware supported page size that's less than
2181      * or equal to the (logical) backing page size of guest RAM */
2182     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2183     rampagesize = getrampagesize();
2184     best_page_shift = 0;
2185 
2186     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2187         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2188 
2189         if (!sps->page_shift) {
2190             continue;
2191         }
2192 
2193         if ((sps->page_shift > best_page_shift)
2194             && ((1UL << sps->page_shift) <= rampagesize)) {
2195             best_page_shift = sps->page_shift;
2196         }
2197     }
2198 
2199     return MIN(current_size,
2200                1ULL << (best_page_shift + hash_shift - 7));
2201 }
2202 #endif
2203 
2204 bool kvmppc_spapr_use_multitce(void)
2205 {
2206     return cap_spapr_multitce;
2207 }
2208 
2209 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2210                               bool need_vfio)
2211 {
2212     struct kvm_create_spapr_tce args = {
2213         .liobn = liobn,
2214         .window_size = window_size,
2215     };
2216     long len;
2217     int fd;
2218     void *table;
2219 
2220     /* Must set fd to -1 so we don't try to munmap when called for
2221      * destroying the table, which the upper layers -will- do
2222      */
2223     *pfd = -1;
2224     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2225         return NULL;
2226     }
2227 
2228     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2229     if (fd < 0) {
2230         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2231                 liobn);
2232         return NULL;
2233     }
2234 
2235     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2236     /* FIXME: round this up to page size */
2237 
2238     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2239     if (table == MAP_FAILED) {
2240         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2241                 liobn);
2242         close(fd);
2243         return NULL;
2244     }
2245 
2246     *pfd = fd;
2247     return table;
2248 }
2249 
2250 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2251 {
2252     long len;
2253 
2254     if (fd < 0) {
2255         return -1;
2256     }
2257 
2258     len = nb_table * sizeof(uint64_t);
2259     if ((munmap(table, len) < 0) ||
2260         (close(fd) < 0)) {
2261         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2262                 strerror(errno));
2263         /* Leak the table */
2264     }
2265 
2266     return 0;
2267 }
2268 
2269 int kvmppc_reset_htab(int shift_hint)
2270 {
2271     uint32_t shift = shift_hint;
2272 
2273     if (!kvm_enabled()) {
2274         /* Full emulation, tell caller to allocate htab itself */
2275         return 0;
2276     }
2277     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2278         int ret;
2279         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2280         if (ret == -ENOTTY) {
2281             /* At least some versions of PR KVM advertise the
2282              * capability, but don't implement the ioctl().  Oops.
2283              * Return 0 so that we allocate the htab in qemu, as is
2284              * correct for PR. */
2285             return 0;
2286         } else if (ret < 0) {
2287             return ret;
2288         }
2289         return shift;
2290     }
2291 
2292     /* We have a kernel that predates the htab reset calls.  For PR
2293      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2294      * this era, it has allocated a 16MB fixed size hash table already. */
2295     if (kvmppc_is_pr(kvm_state)) {
2296         /* PR - tell caller to allocate htab */
2297         return 0;
2298     } else {
2299         /* HV - assume 16MB kernel allocated htab */
2300         return 24;
2301     }
2302 }
2303 
2304 static inline uint32_t mfpvr(void)
2305 {
2306     uint32_t pvr;
2307 
2308     asm ("mfpvr %0"
2309          : "=r"(pvr));
2310     return pvr;
2311 }
2312 
2313 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2314 {
2315     if (on) {
2316         *word |= flags;
2317     } else {
2318         *word &= ~flags;
2319     }
2320 }
2321 
2322 static void kvmppc_host_cpu_initfn(Object *obj)
2323 {
2324     assert(kvm_enabled());
2325 }
2326 
2327 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2328 {
2329     DeviceClass *dc = DEVICE_CLASS(oc);
2330     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2331     uint32_t vmx = kvmppc_get_vmx();
2332     uint32_t dfp = kvmppc_get_dfp();
2333     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2334     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2335 
2336     /* Now fix up the class with information we can query from the host */
2337     pcc->pvr = mfpvr();
2338 
2339     if (vmx != -1) {
2340         /* Only override when we know what the host supports */
2341         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2342         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2343     }
2344     if (dfp != -1) {
2345         /* Only override when we know what the host supports */
2346         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2347     }
2348 
2349     if (dcache_size != -1) {
2350         pcc->l1_dcache_size = dcache_size;
2351     }
2352 
2353     if (icache_size != -1) {
2354         pcc->l1_icache_size = icache_size;
2355     }
2356 
2357     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2358     dc->cannot_destroy_with_object_finalize_yet = true;
2359 }
2360 
2361 bool kvmppc_has_cap_epr(void)
2362 {
2363     return cap_epr;
2364 }
2365 
2366 bool kvmppc_has_cap_htab_fd(void)
2367 {
2368     return cap_htab_fd;
2369 }
2370 
2371 bool kvmppc_has_cap_fixup_hcalls(void)
2372 {
2373     return cap_fixup_hcalls;
2374 }
2375 
2376 bool kvmppc_has_cap_htm(void)
2377 {
2378     return cap_htm;
2379 }
2380 
2381 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2382 {
2383     ObjectClass *oc = OBJECT_CLASS(pcc);
2384 
2385     while (oc && !object_class_is_abstract(oc)) {
2386         oc = object_class_get_parent(oc);
2387     }
2388     assert(oc);
2389 
2390     return POWERPC_CPU_CLASS(oc);
2391 }
2392 
2393 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2394 {
2395     uint32_t host_pvr = mfpvr();
2396     PowerPCCPUClass *pvr_pcc;
2397 
2398     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2399     if (pvr_pcc == NULL) {
2400         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2401     }
2402 
2403     return pvr_pcc;
2404 }
2405 
2406 static int kvm_ppc_register_host_cpu_type(void)
2407 {
2408     TypeInfo type_info = {
2409         .name = TYPE_HOST_POWERPC_CPU,
2410         .instance_init = kvmppc_host_cpu_initfn,
2411         .class_init = kvmppc_host_cpu_class_init,
2412     };
2413     PowerPCCPUClass *pvr_pcc;
2414     DeviceClass *dc;
2415 
2416     pvr_pcc = kvm_ppc_get_host_cpu_class();
2417     if (pvr_pcc == NULL) {
2418         return -1;
2419     }
2420     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2421     type_register(&type_info);
2422 
2423     /* Register generic family CPU class for a family */
2424     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2425     dc = DEVICE_CLASS(pvr_pcc);
2426     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2427     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2428     type_register(&type_info);
2429 
2430 #if defined(TARGET_PPC64)
2431     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2432     type_info.parent = TYPE_SPAPR_CPU_CORE,
2433     type_info.instance_size = sizeof(sPAPRCPUCore);
2434     type_info.instance_init = NULL;
2435     type_info.class_init = spapr_cpu_core_class_init;
2436     type_info.class_data = (void *) "host";
2437     type_register(&type_info);
2438     g_free((void *)type_info.name);
2439 
2440     /* Register generic spapr CPU family class for current host CPU type */
2441     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2442     type_info.class_data = (void *) dc->desc;
2443     type_register(&type_info);
2444     g_free((void *)type_info.name);
2445 #endif
2446 
2447     return 0;
2448 }
2449 
2450 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2451 {
2452     struct kvm_rtas_token_args args = {
2453         .token = token,
2454     };
2455 
2456     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2457         return -ENOENT;
2458     }
2459 
2460     strncpy(args.name, function, sizeof(args.name));
2461 
2462     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2463 }
2464 
2465 int kvmppc_get_htab_fd(bool write)
2466 {
2467     struct kvm_get_htab_fd s = {
2468         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2469         .start_index = 0,
2470     };
2471 
2472     if (!cap_htab_fd) {
2473         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2474         return -1;
2475     }
2476 
2477     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2478 }
2479 
2480 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2481 {
2482     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2483     uint8_t buf[bufsize];
2484     ssize_t rc;
2485 
2486     do {
2487         rc = read(fd, buf, bufsize);
2488         if (rc < 0) {
2489             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2490                     strerror(errno));
2491             return rc;
2492         } else if (rc) {
2493             uint8_t *buffer = buf;
2494             ssize_t n = rc;
2495             while (n) {
2496                 struct kvm_get_htab_header *head =
2497                     (struct kvm_get_htab_header *) buffer;
2498                 size_t chunksize = sizeof(*head) +
2499                      HASH_PTE_SIZE_64 * head->n_valid;
2500 
2501                 qemu_put_be32(f, head->index);
2502                 qemu_put_be16(f, head->n_valid);
2503                 qemu_put_be16(f, head->n_invalid);
2504                 qemu_put_buffer(f, (void *)(head + 1),
2505                                 HASH_PTE_SIZE_64 * head->n_valid);
2506 
2507                 buffer += chunksize;
2508                 n -= chunksize;
2509             }
2510         }
2511     } while ((rc != 0)
2512              && ((max_ns < 0)
2513                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2514 
2515     return (rc == 0) ? 1 : 0;
2516 }
2517 
2518 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2519                            uint16_t n_valid, uint16_t n_invalid)
2520 {
2521     struct kvm_get_htab_header *buf;
2522     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2523     ssize_t rc;
2524 
2525     buf = alloca(chunksize);
2526     buf->index = index;
2527     buf->n_valid = n_valid;
2528     buf->n_invalid = n_invalid;
2529 
2530     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2531 
2532     rc = write(fd, buf, chunksize);
2533     if (rc < 0) {
2534         fprintf(stderr, "Error writing KVM hash table: %s\n",
2535                 strerror(errno));
2536         return rc;
2537     }
2538     if (rc != chunksize) {
2539         /* We should never get a short write on a single chunk */
2540         fprintf(stderr, "Short write, restoring KVM hash table\n");
2541         return -1;
2542     }
2543     return 0;
2544 }
2545 
2546 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2547 {
2548     return true;
2549 }
2550 
2551 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2552 {
2553     return 1;
2554 }
2555 
2556 int kvm_arch_on_sigbus(int code, void *addr)
2557 {
2558     return 1;
2559 }
2560 
2561 void kvm_arch_init_irq_routing(KVMState *s)
2562 {
2563 }
2564 
2565 struct kvm_get_htab_buf {
2566     struct kvm_get_htab_header header;
2567     /*
2568      * We require one extra byte for read
2569      */
2570     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2571 };
2572 
2573 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2574 {
2575     int htab_fd;
2576     struct kvm_get_htab_fd ghf;
2577     struct kvm_get_htab_buf  *hpte_buf;
2578 
2579     ghf.flags = 0;
2580     ghf.start_index = pte_index;
2581     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2582     if (htab_fd < 0) {
2583         goto error_out;
2584     }
2585 
2586     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2587     /*
2588      * Read the hpte group
2589      */
2590     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2591         goto out_close;
2592     }
2593 
2594     close(htab_fd);
2595     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2596 
2597 out_close:
2598     g_free(hpte_buf);
2599     close(htab_fd);
2600 error_out:
2601     return 0;
2602 }
2603 
2604 void kvmppc_hash64_free_pteg(uint64_t token)
2605 {
2606     struct kvm_get_htab_buf *htab_buf;
2607 
2608     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2609                             hpte);
2610     g_free(htab_buf);
2611     return;
2612 }
2613 
2614 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2615                              target_ulong pte0, target_ulong pte1)
2616 {
2617     int htab_fd;
2618     struct kvm_get_htab_fd ghf;
2619     struct kvm_get_htab_buf hpte_buf;
2620 
2621     ghf.flags = 0;
2622     ghf.start_index = 0;     /* Ignored */
2623     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2624     if (htab_fd < 0) {
2625         goto error_out;
2626     }
2627 
2628     hpte_buf.header.n_valid = 1;
2629     hpte_buf.header.n_invalid = 0;
2630     hpte_buf.header.index = pte_index;
2631     hpte_buf.hpte[0] = pte0;
2632     hpte_buf.hpte[1] = pte1;
2633     /*
2634      * Write the hpte entry.
2635      * CAUTION: write() has the warn_unused_result attribute. Hence we
2636      * need to check the return value, even though we do nothing.
2637      */
2638     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2639         goto out_close;
2640     }
2641 
2642 out_close:
2643     close(htab_fd);
2644     return;
2645 
2646 error_out:
2647     return;
2648 }
2649 
2650 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2651                              uint64_t address, uint32_t data, PCIDevice *dev)
2652 {
2653     return 0;
2654 }
2655 
2656 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2657                                 int vector, PCIDevice *dev)
2658 {
2659     return 0;
2660 }
2661 
2662 int kvm_arch_release_virq_post(int virq)
2663 {
2664     return 0;
2665 }
2666 
2667 int kvm_arch_msi_data_to_gsi(uint32_t data)
2668 {
2669     return data & 0xffff;
2670 }
2671 
2672 int kvmppc_enable_hwrng(void)
2673 {
2674     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2675         return -1;
2676     }
2677 
2678     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2679 }
2680