xref: /qemu/target/ppc/kvm.c (revision 33848cee)
1 /*
2  * PowerPC implementation of KVM hooks
3  *
4  * Copyright IBM Corp. 2007
5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
6  *
7  * Authors:
8  *  Jerone Young <jyoung5@us.ibm.com>
9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10  *  Hollis Blanchard <hollisb@us.ibm.com>
11  *
12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
13  * See the COPYING file in the top-level directory.
14  *
15  */
16 
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
21 
22 #include <linux/kvm.h>
23 
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
35 
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
50 
51 //#define DEBUG_KVM
52 
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58     do { } while (0)
59 #endif
60 
61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
62 
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64     KVM_CAP_LAST_INFO
65 };
66 
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83 static int cap_htm;             /* Hardware transactional memory support */
84 
85 static uint32_t debug_inst_opcode;
86 
87 /* XXX We have a race condition where we actually have a level triggered
88  *     interrupt, but the infrastructure can't expose that yet, so the guest
89  *     takes but ignores it, goes to sleep and never gets notified that there's
90  *     still an interrupt pending.
91  *
92  *     As a quick workaround, let's just wake up again 20 ms after we injected
93  *     an interrupt. That way we can assure that we're always reinjecting
94  *     interrupts in case the guest swallowed them.
95  */
96 static QEMUTimer *idle_timer;
97 
98 static void kvm_kick_cpu(void *opaque)
99 {
100     PowerPCCPU *cpu = opaque;
101 
102     qemu_cpu_kick(CPU(cpu));
103 }
104 
105 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
106  * should only be used for fallback tests - generally we should use
107  * explicit capabilities for the features we want, rather than
108  * assuming what is/isn't available depending on the KVM variant. */
109 static bool kvmppc_is_pr(KVMState *ks)
110 {
111     /* Assume KVM-PR if the GET_PVINFO capability is available */
112     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
113 }
114 
115 static int kvm_ppc_register_host_cpu_type(void);
116 
117 int kvm_arch_init(MachineState *ms, KVMState *s)
118 {
119     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
120     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
121     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
122     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
123     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
124     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
125     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
126     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
127     cap_spapr_vfio = false;
128     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
129     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
130     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
131     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
132     /* Note: we don't set cap_papr here, because this capability is
133      * only activated after this by kvmppc_set_papr() */
134     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
135     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
136     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
137 
138     if (!cap_interrupt_level) {
139         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
140                         "VM to stall at times!\n");
141     }
142 
143     kvm_ppc_register_host_cpu_type();
144 
145     return 0;
146 }
147 
148 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
149 {
150     CPUPPCState *cenv = &cpu->env;
151     CPUState *cs = CPU(cpu);
152     struct kvm_sregs sregs;
153     int ret;
154 
155     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
156         /* What we're really trying to say is "if we're on BookE, we use
157            the native PVR for now". This is the only sane way to check
158            it though, so we potentially confuse users that they can run
159            BookE guests on BookS. Let's hope nobody dares enough :) */
160         return 0;
161     } else {
162         if (!cap_segstate) {
163             fprintf(stderr, "kvm error: missing PVR setting capability\n");
164             return -ENOSYS;
165         }
166     }
167 
168     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
169     if (ret) {
170         return ret;
171     }
172 
173     sregs.pvr = cenv->spr[SPR_PVR];
174     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
175 }
176 
177 /* Set up a shared TLB array with KVM */
178 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
179 {
180     CPUPPCState *env = &cpu->env;
181     CPUState *cs = CPU(cpu);
182     struct kvm_book3e_206_tlb_params params = {};
183     struct kvm_config_tlb cfg = {};
184     unsigned int entries = 0;
185     int ret, i;
186 
187     if (!kvm_enabled() ||
188         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
189         return 0;
190     }
191 
192     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
193 
194     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
195         params.tlb_sizes[i] = booke206_tlb_size(env, i);
196         params.tlb_ways[i] = booke206_tlb_ways(env, i);
197         entries += params.tlb_sizes[i];
198     }
199 
200     assert(entries == env->nb_tlb);
201     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
202 
203     env->tlb_dirty = true;
204 
205     cfg.array = (uintptr_t)env->tlb.tlbm;
206     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
207     cfg.params = (uintptr_t)&params;
208     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
209 
210     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
211     if (ret < 0) {
212         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
213                 __func__, strerror(-ret));
214         return ret;
215     }
216 
217     env->kvm_sw_tlb = true;
218     return 0;
219 }
220 
221 
222 #if defined(TARGET_PPC64)
223 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
224                                        struct kvm_ppc_smmu_info *info)
225 {
226     CPUPPCState *env = &cpu->env;
227     CPUState *cs = CPU(cpu);
228 
229     memset(info, 0, sizeof(*info));
230 
231     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
232      * need to "guess" what the supported page sizes are.
233      *
234      * For that to work we make a few assumptions:
235      *
236      * - Check whether we are running "PR" KVM which only supports 4K
237      *   and 16M pages, but supports them regardless of the backing
238      *   store characteritics. We also don't support 1T segments.
239      *
240      *   This is safe as if HV KVM ever supports that capability or PR
241      *   KVM grows supports for more page/segment sizes, those versions
242      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
243      *   will not hit this fallback
244      *
245      * - Else we are running HV KVM. This means we only support page
246      *   sizes that fit in the backing store. Additionally we only
247      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
248      *   P7 encodings for the SLB and hash table. Here too, we assume
249      *   support for any newer processor will mean a kernel that
250      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
251      *   this fallback.
252      */
253     if (kvmppc_is_pr(cs->kvm_state)) {
254         /* No flags */
255         info->flags = 0;
256         info->slb_size = 64;
257 
258         /* Standard 4k base page size segment */
259         info->sps[0].page_shift = 12;
260         info->sps[0].slb_enc = 0;
261         info->sps[0].enc[0].page_shift = 12;
262         info->sps[0].enc[0].pte_enc = 0;
263 
264         /* Standard 16M large page size segment */
265         info->sps[1].page_shift = 24;
266         info->sps[1].slb_enc = SLB_VSID_L;
267         info->sps[1].enc[0].page_shift = 24;
268         info->sps[1].enc[0].pte_enc = 0;
269     } else {
270         int i = 0;
271 
272         /* HV KVM has backing store size restrictions */
273         info->flags = KVM_PPC_PAGE_SIZES_REAL;
274 
275         if (env->mmu_model & POWERPC_MMU_1TSEG) {
276             info->flags |= KVM_PPC_1T_SEGMENTS;
277         }
278 
279         if (env->mmu_model == POWERPC_MMU_2_06 ||
280             env->mmu_model == POWERPC_MMU_2_07) {
281             info->slb_size = 32;
282         } else {
283             info->slb_size = 64;
284         }
285 
286         /* Standard 4k base page size segment */
287         info->sps[i].page_shift = 12;
288         info->sps[i].slb_enc = 0;
289         info->sps[i].enc[0].page_shift = 12;
290         info->sps[i].enc[0].pte_enc = 0;
291         i++;
292 
293         /* 64K on MMU 2.06 and later */
294         if (env->mmu_model == POWERPC_MMU_2_06 ||
295             env->mmu_model == POWERPC_MMU_2_07) {
296             info->sps[i].page_shift = 16;
297             info->sps[i].slb_enc = 0x110;
298             info->sps[i].enc[0].page_shift = 16;
299             info->sps[i].enc[0].pte_enc = 1;
300             i++;
301         }
302 
303         /* Standard 16M large page size segment */
304         info->sps[i].page_shift = 24;
305         info->sps[i].slb_enc = SLB_VSID_L;
306         info->sps[i].enc[0].page_shift = 24;
307         info->sps[i].enc[0].pte_enc = 0;
308     }
309 }
310 
311 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
312 {
313     CPUState *cs = CPU(cpu);
314     int ret;
315 
316     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
317         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
318         if (ret == 0) {
319             return;
320         }
321     }
322 
323     kvm_get_fallback_smmu_info(cpu, info);
324 }
325 
326 static long gethugepagesize(const char *mem_path)
327 {
328     struct statfs fs;
329     int ret;
330 
331     do {
332         ret = statfs(mem_path, &fs);
333     } while (ret != 0 && errno == EINTR);
334 
335     if (ret != 0) {
336         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
337                 strerror(errno));
338         exit(1);
339     }
340 
341 #define HUGETLBFS_MAGIC       0x958458f6
342 
343     if (fs.f_type != HUGETLBFS_MAGIC) {
344         /* Explicit mempath, but it's ordinary pages */
345         return getpagesize();
346     }
347 
348     /* It's hugepage, return the huge page size */
349     return fs.f_bsize;
350 }
351 
352 /*
353  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
354  * may or may not name the same files / on the same filesystem now as
355  * when we actually open and map them.  Iterate over the file
356  * descriptors instead, and use qemu_fd_getpagesize().
357  */
358 static int find_max_supported_pagesize(Object *obj, void *opaque)
359 {
360     char *mem_path;
361     long *hpsize_min = opaque;
362 
363     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
364         mem_path = object_property_get_str(obj, "mem-path", NULL);
365         if (mem_path) {
366             long hpsize = gethugepagesize(mem_path);
367             if (hpsize < *hpsize_min) {
368                 *hpsize_min = hpsize;
369             }
370         } else {
371             *hpsize_min = getpagesize();
372         }
373     }
374 
375     return 0;
376 }
377 
378 static long getrampagesize(void)
379 {
380     long hpsize = LONG_MAX;
381     long mainrampagesize;
382     Object *memdev_root;
383 
384     if (mem_path) {
385         mainrampagesize = gethugepagesize(mem_path);
386     } else {
387         mainrampagesize = getpagesize();
388     }
389 
390     /* it's possible we have memory-backend objects with
391      * hugepage-backed RAM. these may get mapped into system
392      * address space via -numa parameters or memory hotplug
393      * hooks. we want to take these into account, but we
394      * also want to make sure these supported hugepage
395      * sizes are applicable across the entire range of memory
396      * we may boot from, so we take the min across all
397      * backends, and assume normal pages in cases where a
398      * backend isn't backed by hugepages.
399      */
400     memdev_root = object_resolve_path("/objects", NULL);
401     if (memdev_root) {
402         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
403     }
404     if (hpsize == LONG_MAX) {
405         /* No additional memory regions found ==> Report main RAM page size */
406         return mainrampagesize;
407     }
408 
409     /* If NUMA is disabled or the NUMA nodes are not backed with a
410      * memory-backend, then there is at least one node using "normal" RAM,
411      * so if its page size is smaller we have got to report that size instead.
412      */
413     if (hpsize > mainrampagesize &&
414         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
415         static bool warned;
416         if (!warned) {
417             error_report("Huge page support disabled (n/a for main memory).");
418             warned = true;
419         }
420         return mainrampagesize;
421     }
422 
423     return hpsize;
424 }
425 
426 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
427 {
428     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
429         return true;
430     }
431 
432     return (1ul << shift) <= rampgsize;
433 }
434 
435 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
436 {
437     static struct kvm_ppc_smmu_info smmu_info;
438     static bool has_smmu_info;
439     CPUPPCState *env = &cpu->env;
440     long rampagesize;
441     int iq, ik, jq, jk;
442     bool has_64k_pages = false;
443 
444     /* We only handle page sizes for 64-bit server guests for now */
445     if (!(env->mmu_model & POWERPC_MMU_64)) {
446         return;
447     }
448 
449     /* Collect MMU info from kernel if not already */
450     if (!has_smmu_info) {
451         kvm_get_smmu_info(cpu, &smmu_info);
452         has_smmu_info = true;
453     }
454 
455     rampagesize = getrampagesize();
456 
457     /* Convert to QEMU form */
458     memset(&env->sps, 0, sizeof(env->sps));
459 
460     /* If we have HV KVM, we need to forbid CI large pages if our
461      * host page size is smaller than 64K.
462      */
463     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
464         env->ci_large_pages = getpagesize() >= 0x10000;
465     }
466 
467     /*
468      * XXX This loop should be an entry wide AND of the capabilities that
469      *     the selected CPU has with the capabilities that KVM supports.
470      */
471     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
472         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
473         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
474 
475         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
476                                  ksps->page_shift)) {
477             continue;
478         }
479         qsps->page_shift = ksps->page_shift;
480         qsps->slb_enc = ksps->slb_enc;
481         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
482             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
483                                      ksps->enc[jk].page_shift)) {
484                 continue;
485             }
486             if (ksps->enc[jk].page_shift == 16) {
487                 has_64k_pages = true;
488             }
489             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
490             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
491             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
492                 break;
493             }
494         }
495         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
496             break;
497         }
498     }
499     env->slb_nr = smmu_info.slb_size;
500     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
501         env->mmu_model &= ~POWERPC_MMU_1TSEG;
502     }
503     if (!has_64k_pages) {
504         env->mmu_model &= ~POWERPC_MMU_64K;
505     }
506 }
507 #else /* defined (TARGET_PPC64) */
508 
509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
510 {
511 }
512 
513 #endif /* !defined (TARGET_PPC64) */
514 
515 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
516 {
517     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
518 }
519 
520 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
521  * book3s supports only 1 watchpoint, so array size
522  * of 4 is sufficient for now.
523  */
524 #define MAX_HW_BKPTS 4
525 
526 static struct HWBreakpoint {
527     target_ulong addr;
528     int type;
529 } hw_debug_points[MAX_HW_BKPTS];
530 
531 static CPUWatchpoint hw_watchpoint;
532 
533 /* Default there is no breakpoint and watchpoint supported */
534 static int max_hw_breakpoint;
535 static int max_hw_watchpoint;
536 static int nb_hw_breakpoint;
537 static int nb_hw_watchpoint;
538 
539 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
540 {
541     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
542         max_hw_breakpoint = 2;
543         max_hw_watchpoint = 2;
544     }
545 
546     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
547         fprintf(stderr, "Error initializing h/w breakpoints\n");
548         return;
549     }
550 }
551 
552 int kvm_arch_init_vcpu(CPUState *cs)
553 {
554     PowerPCCPU *cpu = POWERPC_CPU(cs);
555     CPUPPCState *cenv = &cpu->env;
556     int ret;
557 
558     /* Gather server mmu info from KVM and update the CPU state */
559     kvm_fixup_page_sizes(cpu);
560 
561     /* Synchronize sregs with kvm */
562     ret = kvm_arch_sync_sregs(cpu);
563     if (ret) {
564         if (ret == -EINVAL) {
565             error_report("Register sync failed... If you're using kvm-hv.ko,"
566                          " only \"-cpu host\" is possible");
567         }
568         return ret;
569     }
570 
571     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
572 
573     switch (cenv->mmu_model) {
574     case POWERPC_MMU_BOOKE206:
575         /* This target supports access to KVM's guest TLB */
576         ret = kvm_booke206_tlb_init(cpu);
577         break;
578     case POWERPC_MMU_2_07:
579         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
580             /* KVM-HV has transactional memory on POWER8 also without the
581              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
582             cap_htm = true;
583         }
584         break;
585     default:
586         break;
587     }
588 
589     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590     kvmppc_hw_debug_points_init(cenv);
591 
592     return ret;
593 }
594 
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
596 {
597     CPUPPCState *env = &cpu->env;
598     CPUState *cs = CPU(cpu);
599     struct kvm_dirty_tlb dirty_tlb;
600     unsigned char *bitmap;
601     int ret;
602 
603     if (!env->kvm_sw_tlb) {
604         return;
605     }
606 
607     bitmap = g_malloc((env->nb_tlb + 7) / 8);
608     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
609 
610     dirty_tlb.bitmap = (uintptr_t)bitmap;
611     dirty_tlb.num_dirty = env->nb_tlb;
612 
613     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614     if (ret) {
615         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616                 __func__, strerror(-ret));
617     }
618 
619     g_free(bitmap);
620 }
621 
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
623 {
624     PowerPCCPU *cpu = POWERPC_CPU(cs);
625     CPUPPCState *env = &cpu->env;
626     union {
627         uint32_t u32;
628         uint64_t u64;
629     } val;
630     struct kvm_one_reg reg = {
631         .id = id,
632         .addr = (uintptr_t) &val,
633     };
634     int ret;
635 
636     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637     if (ret != 0) {
638         trace_kvm_failed_spr_get(spr, strerror(errno));
639     } else {
640         switch (id & KVM_REG_SIZE_MASK) {
641         case KVM_REG_SIZE_U32:
642             env->spr[spr] = val.u32;
643             break;
644 
645         case KVM_REG_SIZE_U64:
646             env->spr[spr] = val.u64;
647             break;
648 
649         default:
650             /* Don't handle this size yet */
651             abort();
652         }
653     }
654 }
655 
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
657 {
658     PowerPCCPU *cpu = POWERPC_CPU(cs);
659     CPUPPCState *env = &cpu->env;
660     union {
661         uint32_t u32;
662         uint64_t u64;
663     } val;
664     struct kvm_one_reg reg = {
665         .id = id,
666         .addr = (uintptr_t) &val,
667     };
668     int ret;
669 
670     switch (id & KVM_REG_SIZE_MASK) {
671     case KVM_REG_SIZE_U32:
672         val.u32 = env->spr[spr];
673         break;
674 
675     case KVM_REG_SIZE_U64:
676         val.u64 = env->spr[spr];
677         break;
678 
679     default:
680         /* Don't handle this size yet */
681         abort();
682     }
683 
684     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685     if (ret != 0) {
686         trace_kvm_failed_spr_set(spr, strerror(errno));
687     }
688 }
689 
690 static int kvm_put_fp(CPUState *cs)
691 {
692     PowerPCCPU *cpu = POWERPC_CPU(cs);
693     CPUPPCState *env = &cpu->env;
694     struct kvm_one_reg reg;
695     int i;
696     int ret;
697 
698     if (env->insns_flags & PPC_FLOAT) {
699         uint64_t fpscr = env->fpscr;
700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
701 
702         reg.id = KVM_REG_PPC_FPSCR;
703         reg.addr = (uintptr_t)&fpscr;
704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705         if (ret < 0) {
706             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707             return ret;
708         }
709 
710         for (i = 0; i < 32; i++) {
711             uint64_t vsr[2];
712 
713 #ifdef HOST_WORDS_BIGENDIAN
714             vsr[0] = float64_val(env->fpr[i]);
715             vsr[1] = env->vsr[i];
716 #else
717             vsr[0] = env->vsr[i];
718             vsr[1] = float64_val(env->fpr[i]);
719 #endif
720             reg.addr = (uintptr_t) &vsr;
721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
722 
723             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724             if (ret < 0) {
725                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726                         i, strerror(errno));
727                 return ret;
728             }
729         }
730     }
731 
732     if (env->insns_flags & PPC_ALTIVEC) {
733         reg.id = KVM_REG_PPC_VSCR;
734         reg.addr = (uintptr_t)&env->vscr;
735         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736         if (ret < 0) {
737             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738             return ret;
739         }
740 
741         for (i = 0; i < 32; i++) {
742             reg.id = KVM_REG_PPC_VR(i);
743             reg.addr = (uintptr_t)&env->avr[i];
744             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745             if (ret < 0) {
746                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747                 return ret;
748             }
749         }
750     }
751 
752     return 0;
753 }
754 
755 static int kvm_get_fp(CPUState *cs)
756 {
757     PowerPCCPU *cpu = POWERPC_CPU(cs);
758     CPUPPCState *env = &cpu->env;
759     struct kvm_one_reg reg;
760     int i;
761     int ret;
762 
763     if (env->insns_flags & PPC_FLOAT) {
764         uint64_t fpscr;
765         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
766 
767         reg.id = KVM_REG_PPC_FPSCR;
768         reg.addr = (uintptr_t)&fpscr;
769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770         if (ret < 0) {
771             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772             return ret;
773         } else {
774             env->fpscr = fpscr;
775         }
776 
777         for (i = 0; i < 32; i++) {
778             uint64_t vsr[2];
779 
780             reg.addr = (uintptr_t) &vsr;
781             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
782 
783             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784             if (ret < 0) {
785                 DPRINTF("Unable to get %s%d from KVM: %s\n",
786                         vsx ? "VSR" : "FPR", i, strerror(errno));
787                 return ret;
788             } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790                 env->fpr[i] = vsr[0];
791                 if (vsx) {
792                     env->vsr[i] = vsr[1];
793                 }
794 #else
795                 env->fpr[i] = vsr[1];
796                 if (vsx) {
797                     env->vsr[i] = vsr[0];
798                 }
799 #endif
800             }
801         }
802     }
803 
804     if (env->insns_flags & PPC_ALTIVEC) {
805         reg.id = KVM_REG_PPC_VSCR;
806         reg.addr = (uintptr_t)&env->vscr;
807         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808         if (ret < 0) {
809             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810             return ret;
811         }
812 
813         for (i = 0; i < 32; i++) {
814             reg.id = KVM_REG_PPC_VR(i);
815             reg.addr = (uintptr_t)&env->avr[i];
816             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817             if (ret < 0) {
818                 DPRINTF("Unable to get VR%d from KVM: %s\n",
819                         i, strerror(errno));
820                 return ret;
821             }
822         }
823     }
824 
825     return 0;
826 }
827 
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
830 {
831     PowerPCCPU *cpu = POWERPC_CPU(cs);
832     CPUPPCState *env = &cpu->env;
833     struct kvm_one_reg reg;
834     int ret;
835 
836     reg.id = KVM_REG_PPC_VPA_ADDR;
837     reg.addr = (uintptr_t)&env->vpa_addr;
838     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839     if (ret < 0) {
840         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841         return ret;
842     }
843 
844     assert((uintptr_t)&env->slb_shadow_size
845            == ((uintptr_t)&env->slb_shadow_addr + 8));
846     reg.id = KVM_REG_PPC_VPA_SLB;
847     reg.addr = (uintptr_t)&env->slb_shadow_addr;
848     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849     if (ret < 0) {
850         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851                 strerror(errno));
852         return ret;
853     }
854 
855     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
856     reg.id = KVM_REG_PPC_VPA_DTL;
857     reg.addr = (uintptr_t)&env->dtl_addr;
858     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
859     if (ret < 0) {
860         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
861                 strerror(errno));
862         return ret;
863     }
864 
865     return 0;
866 }
867 
868 static int kvm_put_vpa(CPUState *cs)
869 {
870     PowerPCCPU *cpu = POWERPC_CPU(cs);
871     CPUPPCState *env = &cpu->env;
872     struct kvm_one_reg reg;
873     int ret;
874 
875     /* SLB shadow or DTL can't be registered unless a master VPA is
876      * registered.  That means when restoring state, if a VPA *is*
877      * registered, we need to set that up first.  If not, we need to
878      * deregister the others before deregistering the master VPA */
879     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
880 
881     if (env->vpa_addr) {
882         reg.id = KVM_REG_PPC_VPA_ADDR;
883         reg.addr = (uintptr_t)&env->vpa_addr;
884         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
885         if (ret < 0) {
886             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
887             return ret;
888         }
889     }
890 
891     assert((uintptr_t)&env->slb_shadow_size
892            == ((uintptr_t)&env->slb_shadow_addr + 8));
893     reg.id = KVM_REG_PPC_VPA_SLB;
894     reg.addr = (uintptr_t)&env->slb_shadow_addr;
895     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896     if (ret < 0) {
897         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
898         return ret;
899     }
900 
901     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
902     reg.id = KVM_REG_PPC_VPA_DTL;
903     reg.addr = (uintptr_t)&env->dtl_addr;
904     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905     if (ret < 0) {
906         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
907                 strerror(errno));
908         return ret;
909     }
910 
911     if (!env->vpa_addr) {
912         reg.id = KVM_REG_PPC_VPA_ADDR;
913         reg.addr = (uintptr_t)&env->vpa_addr;
914         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915         if (ret < 0) {
916             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917             return ret;
918         }
919     }
920 
921     return 0;
922 }
923 #endif /* TARGET_PPC64 */
924 
925 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
926 {
927     CPUPPCState *env = &cpu->env;
928     struct kvm_sregs sregs;
929     int i;
930 
931     sregs.pvr = env->spr[SPR_PVR];
932 
933     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
934 
935     /* Sync SLB */
936 #ifdef TARGET_PPC64
937     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
938         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
939         if (env->slb[i].esid & SLB_ESID_V) {
940             sregs.u.s.ppc64.slb[i].slbe |= i;
941         }
942         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
943     }
944 #endif
945 
946     /* Sync SRs */
947     for (i = 0; i < 16; i++) {
948         sregs.u.s.ppc32.sr[i] = env->sr[i];
949     }
950 
951     /* Sync BATs */
952     for (i = 0; i < 8; i++) {
953         /* Beware. We have to swap upper and lower bits here */
954         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
955             | env->DBAT[1][i];
956         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
957             | env->IBAT[1][i];
958     }
959 
960     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
961 }
962 
963 int kvm_arch_put_registers(CPUState *cs, int level)
964 {
965     PowerPCCPU *cpu = POWERPC_CPU(cs);
966     CPUPPCState *env = &cpu->env;
967     struct kvm_regs regs;
968     int ret;
969     int i;
970 
971     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
972     if (ret < 0) {
973         return ret;
974     }
975 
976     regs.ctr = env->ctr;
977     regs.lr  = env->lr;
978     regs.xer = cpu_read_xer(env);
979     regs.msr = env->msr;
980     regs.pc = env->nip;
981 
982     regs.srr0 = env->spr[SPR_SRR0];
983     regs.srr1 = env->spr[SPR_SRR1];
984 
985     regs.sprg0 = env->spr[SPR_SPRG0];
986     regs.sprg1 = env->spr[SPR_SPRG1];
987     regs.sprg2 = env->spr[SPR_SPRG2];
988     regs.sprg3 = env->spr[SPR_SPRG3];
989     regs.sprg4 = env->spr[SPR_SPRG4];
990     regs.sprg5 = env->spr[SPR_SPRG5];
991     regs.sprg6 = env->spr[SPR_SPRG6];
992     regs.sprg7 = env->spr[SPR_SPRG7];
993 
994     regs.pid = env->spr[SPR_BOOKE_PID];
995 
996     for (i = 0;i < 32; i++)
997         regs.gpr[i] = env->gpr[i];
998 
999     regs.cr = 0;
1000     for (i = 0; i < 8; i++) {
1001         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1002     }
1003 
1004     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1005     if (ret < 0)
1006         return ret;
1007 
1008     kvm_put_fp(cs);
1009 
1010     if (env->tlb_dirty) {
1011         kvm_sw_tlb_put(cpu);
1012         env->tlb_dirty = false;
1013     }
1014 
1015     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1016         ret = kvmppc_put_books_sregs(cpu);
1017         if (ret < 0) {
1018             return ret;
1019         }
1020     }
1021 
1022     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1023         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1024     }
1025 
1026     if (cap_one_reg) {
1027         int i;
1028 
1029         /* We deliberately ignore errors here, for kernels which have
1030          * the ONE_REG calls, but don't support the specific
1031          * registers, there's a reasonable chance things will still
1032          * work, at least until we try to migrate. */
1033         for (i = 0; i < 1024; i++) {
1034             uint64_t id = env->spr_cb[i].one_reg_id;
1035 
1036             if (id != 0) {
1037                 kvm_put_one_spr(cs, id, i);
1038             }
1039         }
1040 
1041 #ifdef TARGET_PPC64
1042         if (msr_ts) {
1043             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1044                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1045             }
1046             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1047                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1048             }
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1059         }
1060 
1061         if (cap_papr) {
1062             if (kvm_put_vpa(cs) < 0) {
1063                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1064             }
1065         }
1066 
1067         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1068 #endif /* TARGET_PPC64 */
1069     }
1070 
1071     return ret;
1072 }
1073 
1074 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1075 {
1076      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1077 }
1078 
1079 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1080 {
1081     CPUPPCState *env = &cpu->env;
1082     struct kvm_sregs sregs;
1083     int ret;
1084 
1085     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1086     if (ret < 0) {
1087         return ret;
1088     }
1089 
1090     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1091         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1092         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1093         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1094         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1095         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1096         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1097         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1098         env->spr[SPR_DECR] = sregs.u.e.dec;
1099         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1100         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1101         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1102     }
1103 
1104     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1105         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1106         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1107         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1108         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1109         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1110     }
1111 
1112     if (sregs.u.e.features & KVM_SREGS_E_64) {
1113         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1114     }
1115 
1116     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1117         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1118     }
1119 
1120     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1121         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1122         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1123         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1124         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1125         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1126         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1127         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1128         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1129         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1130         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1131         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1132         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1133         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1134         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1135         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1136         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1137         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1138         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1139         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1140         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1141         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1142         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1143         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1144         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1145         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1146         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1147         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1148         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1149         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1150         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1151         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1152         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1153 
1154         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1155             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1156             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1157             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1158             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1159             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1160             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1161         }
1162 
1163         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1164             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1165             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1166         }
1167 
1168         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1169             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1170             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1171             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1172             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1173         }
1174     }
1175 
1176     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1177         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1178         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1179         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1180         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1181         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1182         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1183         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1184         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1185         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1186         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1187     }
1188 
1189     if (sregs.u.e.features & KVM_SREGS_EXP) {
1190         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1191     }
1192 
1193     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1194         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1195         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1196     }
1197 
1198     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1199         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1200         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1201         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1202 
1203         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1204             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1205             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1206         }
1207     }
1208 
1209     return 0;
1210 }
1211 
1212 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1213 {
1214     CPUPPCState *env = &cpu->env;
1215     struct kvm_sregs sregs;
1216     int ret;
1217     int i;
1218 
1219     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1220     if (ret < 0) {
1221         return ret;
1222     }
1223 
1224     if (!env->external_htab) {
1225         ppc_store_sdr1(env, sregs.u.s.sdr1);
1226     }
1227 
1228     /* Sync SLB */
1229 #ifdef TARGET_PPC64
1230     /*
1231      * The packed SLB array we get from KVM_GET_SREGS only contains
1232      * information about valid entries. So we flush our internal copy
1233      * to get rid of stale ones, then put all valid SLB entries back
1234      * in.
1235      */
1236     memset(env->slb, 0, sizeof(env->slb));
1237     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1238         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1239         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1240         /*
1241          * Only restore valid entries
1242          */
1243         if (rb & SLB_ESID_V) {
1244             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1245         }
1246     }
1247 #endif
1248 
1249     /* Sync SRs */
1250     for (i = 0; i < 16; i++) {
1251         env->sr[i] = sregs.u.s.ppc32.sr[i];
1252     }
1253 
1254     /* Sync BATs */
1255     for (i = 0; i < 8; i++) {
1256         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1257         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1258         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1259         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1260     }
1261 
1262     return 0;
1263 }
1264 
1265 int kvm_arch_get_registers(CPUState *cs)
1266 {
1267     PowerPCCPU *cpu = POWERPC_CPU(cs);
1268     CPUPPCState *env = &cpu->env;
1269     struct kvm_regs regs;
1270     uint32_t cr;
1271     int i, ret;
1272 
1273     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1274     if (ret < 0)
1275         return ret;
1276 
1277     cr = regs.cr;
1278     for (i = 7; i >= 0; i--) {
1279         env->crf[i] = cr & 15;
1280         cr >>= 4;
1281     }
1282 
1283     env->ctr = regs.ctr;
1284     env->lr = regs.lr;
1285     cpu_write_xer(env, regs.xer);
1286     env->msr = regs.msr;
1287     env->nip = regs.pc;
1288 
1289     env->spr[SPR_SRR0] = regs.srr0;
1290     env->spr[SPR_SRR1] = regs.srr1;
1291 
1292     env->spr[SPR_SPRG0] = regs.sprg0;
1293     env->spr[SPR_SPRG1] = regs.sprg1;
1294     env->spr[SPR_SPRG2] = regs.sprg2;
1295     env->spr[SPR_SPRG3] = regs.sprg3;
1296     env->spr[SPR_SPRG4] = regs.sprg4;
1297     env->spr[SPR_SPRG5] = regs.sprg5;
1298     env->spr[SPR_SPRG6] = regs.sprg6;
1299     env->spr[SPR_SPRG7] = regs.sprg7;
1300 
1301     env->spr[SPR_BOOKE_PID] = regs.pid;
1302 
1303     for (i = 0;i < 32; i++)
1304         env->gpr[i] = regs.gpr[i];
1305 
1306     kvm_get_fp(cs);
1307 
1308     if (cap_booke_sregs) {
1309         ret = kvmppc_get_booke_sregs(cpu);
1310         if (ret < 0) {
1311             return ret;
1312         }
1313     }
1314 
1315     if (cap_segstate) {
1316         ret = kvmppc_get_books_sregs(cpu);
1317         if (ret < 0) {
1318             return ret;
1319         }
1320     }
1321 
1322     if (cap_hior) {
1323         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1324     }
1325 
1326     if (cap_one_reg) {
1327         int i;
1328 
1329         /* We deliberately ignore errors here, for kernels which have
1330          * the ONE_REG calls, but don't support the specific
1331          * registers, there's a reasonable chance things will still
1332          * work, at least until we try to migrate. */
1333         for (i = 0; i < 1024; i++) {
1334             uint64_t id = env->spr_cb[i].one_reg_id;
1335 
1336             if (id != 0) {
1337                 kvm_get_one_spr(cs, id, i);
1338             }
1339         }
1340 
1341 #ifdef TARGET_PPC64
1342         if (msr_ts) {
1343             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1344                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1345             }
1346             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1347                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1348             }
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1359         }
1360 
1361         if (cap_papr) {
1362             if (kvm_get_vpa(cs) < 0) {
1363                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1364             }
1365         }
1366 
1367         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1368 #endif
1369     }
1370 
1371     return 0;
1372 }
1373 
1374 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1375 {
1376     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1377 
1378     if (irq != PPC_INTERRUPT_EXT) {
1379         return 0;
1380     }
1381 
1382     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1383         return 0;
1384     }
1385 
1386     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1387 
1388     return 0;
1389 }
1390 
1391 #if defined(TARGET_PPCEMB)
1392 #define PPC_INPUT_INT PPC40x_INPUT_INT
1393 #elif defined(TARGET_PPC64)
1394 #define PPC_INPUT_INT PPC970_INPUT_INT
1395 #else
1396 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1397 #endif
1398 
1399 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1400 {
1401     PowerPCCPU *cpu = POWERPC_CPU(cs);
1402     CPUPPCState *env = &cpu->env;
1403     int r;
1404     unsigned irq;
1405 
1406     qemu_mutex_lock_iothread();
1407 
1408     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410     if (!cap_interrupt_level &&
1411         run->ready_for_interrupt_injection &&
1412         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1413         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1414     {
1415         /* For now KVM disregards the 'irq' argument. However, in the
1416          * future KVM could cache it in-kernel to avoid a heavyweight exit
1417          * when reading the UIC.
1418          */
1419         irq = KVM_INTERRUPT_SET;
1420 
1421         DPRINTF("injected interrupt %d\n", irq);
1422         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1423         if (r < 0) {
1424             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1425         }
1426 
1427         /* Always wake up soon in case the interrupt was level based */
1428         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1429                        (NANOSECONDS_PER_SECOND / 50));
1430     }
1431 
1432     /* We don't know if there are more interrupts pending after this. However,
1433      * the guest will return to userspace in the course of handling this one
1434      * anyways, so we will get a chance to deliver the rest. */
1435 
1436     qemu_mutex_unlock_iothread();
1437 }
1438 
1439 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1440 {
1441     return MEMTXATTRS_UNSPECIFIED;
1442 }
1443 
1444 int kvm_arch_process_async_events(CPUState *cs)
1445 {
1446     return cs->halted;
1447 }
1448 
1449 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1450 {
1451     CPUState *cs = CPU(cpu);
1452     CPUPPCState *env = &cpu->env;
1453 
1454     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1455         cs->halted = 1;
1456         cs->exception_index = EXCP_HLT;
1457     }
1458 
1459     return 0;
1460 }
1461 
1462 /* map dcr access to existing qemu dcr emulation */
1463 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1464 {
1465     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1466         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1467 
1468     return 0;
1469 }
1470 
1471 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1472 {
1473     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1474         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1475 
1476     return 0;
1477 }
1478 
1479 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1480 {
1481     /* Mixed endian case is not handled */
1482     uint32_t sc = debug_inst_opcode;
1483 
1484     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1485                             sizeof(sc), 0) ||
1486         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1487         return -EINVAL;
1488     }
1489 
1490     return 0;
1491 }
1492 
1493 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1494 {
1495     uint32_t sc;
1496 
1497     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1498         sc != debug_inst_opcode ||
1499         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500                             sizeof(sc), 1)) {
1501         return -EINVAL;
1502     }
1503 
1504     return 0;
1505 }
1506 
1507 static int find_hw_breakpoint(target_ulong addr, int type)
1508 {
1509     int n;
1510 
1511     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1512            <= ARRAY_SIZE(hw_debug_points));
1513 
1514     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1515         if (hw_debug_points[n].addr == addr &&
1516              hw_debug_points[n].type == type) {
1517             return n;
1518         }
1519     }
1520 
1521     return -1;
1522 }
1523 
1524 static int find_hw_watchpoint(target_ulong addr, int *flag)
1525 {
1526     int n;
1527 
1528     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1529     if (n >= 0) {
1530         *flag = BP_MEM_ACCESS;
1531         return n;
1532     }
1533 
1534     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1535     if (n >= 0) {
1536         *flag = BP_MEM_WRITE;
1537         return n;
1538     }
1539 
1540     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1541     if (n >= 0) {
1542         *flag = BP_MEM_READ;
1543         return n;
1544     }
1545 
1546     return -1;
1547 }
1548 
1549 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1550                                   target_ulong len, int type)
1551 {
1552     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1553         return -ENOBUFS;
1554     }
1555 
1556     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1557     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1558 
1559     switch (type) {
1560     case GDB_BREAKPOINT_HW:
1561         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1562             return -ENOBUFS;
1563         }
1564 
1565         if (find_hw_breakpoint(addr, type) >= 0) {
1566             return -EEXIST;
1567         }
1568 
1569         nb_hw_breakpoint++;
1570         break;
1571 
1572     case GDB_WATCHPOINT_WRITE:
1573     case GDB_WATCHPOINT_READ:
1574     case GDB_WATCHPOINT_ACCESS:
1575         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1576             return -ENOBUFS;
1577         }
1578 
1579         if (find_hw_breakpoint(addr, type) >= 0) {
1580             return -EEXIST;
1581         }
1582 
1583         nb_hw_watchpoint++;
1584         break;
1585 
1586     default:
1587         return -ENOSYS;
1588     }
1589 
1590     return 0;
1591 }
1592 
1593 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1594                                   target_ulong len, int type)
1595 {
1596     int n;
1597 
1598     n = find_hw_breakpoint(addr, type);
1599     if (n < 0) {
1600         return -ENOENT;
1601     }
1602 
1603     switch (type) {
1604     case GDB_BREAKPOINT_HW:
1605         nb_hw_breakpoint--;
1606         break;
1607 
1608     case GDB_WATCHPOINT_WRITE:
1609     case GDB_WATCHPOINT_READ:
1610     case GDB_WATCHPOINT_ACCESS:
1611         nb_hw_watchpoint--;
1612         break;
1613 
1614     default:
1615         return -ENOSYS;
1616     }
1617     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1618 
1619     return 0;
1620 }
1621 
1622 void kvm_arch_remove_all_hw_breakpoints(void)
1623 {
1624     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1625 }
1626 
1627 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1628 {
1629     int n;
1630 
1631     /* Software Breakpoint updates */
1632     if (kvm_sw_breakpoints_active(cs)) {
1633         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1634     }
1635 
1636     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1637            <= ARRAY_SIZE(hw_debug_points));
1638     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1639 
1640     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1642         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1643         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1644             switch (hw_debug_points[n].type) {
1645             case GDB_BREAKPOINT_HW:
1646                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1647                 break;
1648             case GDB_WATCHPOINT_WRITE:
1649                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1650                 break;
1651             case GDB_WATCHPOINT_READ:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1653                 break;
1654             case GDB_WATCHPOINT_ACCESS:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1656                                         KVMPPC_DEBUG_WATCH_READ;
1657                 break;
1658             default:
1659                 cpu_abort(cs, "Unsupported breakpoint type\n");
1660             }
1661             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1662         }
1663     }
1664 }
1665 
1666 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1667 {
1668     CPUState *cs = CPU(cpu);
1669     CPUPPCState *env = &cpu->env;
1670     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1671     int handle = 0;
1672     int n;
1673     int flag = 0;
1674 
1675     if (cs->singlestep_enabled) {
1676         handle = 1;
1677     } else if (arch_info->status) {
1678         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1679             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1680                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1681                 if (n >= 0) {
1682                     handle = 1;
1683                 }
1684             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1685                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1686                 n = find_hw_watchpoint(arch_info->address,  &flag);
1687                 if (n >= 0) {
1688                     handle = 1;
1689                     cs->watchpoint_hit = &hw_watchpoint;
1690                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1691                     hw_watchpoint.flags = flag;
1692                 }
1693             }
1694         }
1695     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1696         handle = 1;
1697     } else {
1698         /* QEMU is not able to handle debug exception, so inject
1699          * program exception to guest;
1700          * Yes program exception NOT debug exception !!
1701          * When QEMU is using debug resources then debug exception must
1702          * be always set. To achieve this we set MSR_DE and also set
1703          * MSRP_DEP so guest cannot change MSR_DE.
1704          * When emulating debug resource for guest we want guest
1705          * to control MSR_DE (enable/disable debug interrupt on need).
1706          * Supporting both configurations are NOT possible.
1707          * So the result is that we cannot share debug resources
1708          * between QEMU and Guest on BOOKE architecture.
1709          * In the current design QEMU gets the priority over guest,
1710          * this means that if QEMU is using debug resources then guest
1711          * cannot use them;
1712          * For software breakpoint QEMU uses a privileged instruction;
1713          * So there cannot be any reason that we are here for guest
1714          * set debug exception, only possibility is guest executed a
1715          * privileged / illegal instruction and that's why we are
1716          * injecting a program interrupt.
1717          */
1718 
1719         cpu_synchronize_state(cs);
1720         /* env->nip is PC, so increment this by 4 to use
1721          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1722          */
1723         env->nip += 4;
1724         cs->exception_index = POWERPC_EXCP_PROGRAM;
1725         env->error_code = POWERPC_EXCP_INVAL;
1726         ppc_cpu_do_interrupt(cs);
1727     }
1728 
1729     return handle;
1730 }
1731 
1732 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1733 {
1734     PowerPCCPU *cpu = POWERPC_CPU(cs);
1735     CPUPPCState *env = &cpu->env;
1736     int ret;
1737 
1738     qemu_mutex_lock_iothread();
1739 
1740     switch (run->exit_reason) {
1741     case KVM_EXIT_DCR:
1742         if (run->dcr.is_write) {
1743             DPRINTF("handle dcr write\n");
1744             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1745         } else {
1746             DPRINTF("handle dcr read\n");
1747             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1748         }
1749         break;
1750     case KVM_EXIT_HLT:
1751         DPRINTF("handle halt\n");
1752         ret = kvmppc_handle_halt(cpu);
1753         break;
1754 #if defined(TARGET_PPC64)
1755     case KVM_EXIT_PAPR_HCALL:
1756         DPRINTF("handle PAPR hypercall\n");
1757         run->papr_hcall.ret = spapr_hypercall(cpu,
1758                                               run->papr_hcall.nr,
1759                                               run->papr_hcall.args);
1760         ret = 0;
1761         break;
1762 #endif
1763     case KVM_EXIT_EPR:
1764         DPRINTF("handle epr\n");
1765         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1766         ret = 0;
1767         break;
1768     case KVM_EXIT_WATCHDOG:
1769         DPRINTF("handle watchdog expiry\n");
1770         watchdog_perform_action();
1771         ret = 0;
1772         break;
1773 
1774     case KVM_EXIT_DEBUG:
1775         DPRINTF("handle debug exception\n");
1776         if (kvm_handle_debug(cpu, run)) {
1777             ret = EXCP_DEBUG;
1778             break;
1779         }
1780         /* re-enter, this exception was guest-internal */
1781         ret = 0;
1782         break;
1783 
1784     default:
1785         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1786         ret = -1;
1787         break;
1788     }
1789 
1790     qemu_mutex_unlock_iothread();
1791     return ret;
1792 }
1793 
1794 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1795 {
1796     CPUState *cs = CPU(cpu);
1797     uint32_t bits = tsr_bits;
1798     struct kvm_one_reg reg = {
1799         .id = KVM_REG_PPC_OR_TSR,
1800         .addr = (uintptr_t) &bits,
1801     };
1802 
1803     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1804 }
1805 
1806 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1807 {
1808 
1809     CPUState *cs = CPU(cpu);
1810     uint32_t bits = tsr_bits;
1811     struct kvm_one_reg reg = {
1812         .id = KVM_REG_PPC_CLEAR_TSR,
1813         .addr = (uintptr_t) &bits,
1814     };
1815 
1816     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817 }
1818 
1819 int kvmppc_set_tcr(PowerPCCPU *cpu)
1820 {
1821     CPUState *cs = CPU(cpu);
1822     CPUPPCState *env = &cpu->env;
1823     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1824 
1825     struct kvm_one_reg reg = {
1826         .id = KVM_REG_PPC_TCR,
1827         .addr = (uintptr_t) &tcr,
1828     };
1829 
1830     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1831 }
1832 
1833 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1834 {
1835     CPUState *cs = CPU(cpu);
1836     int ret;
1837 
1838     if (!kvm_enabled()) {
1839         return -1;
1840     }
1841 
1842     if (!cap_ppc_watchdog) {
1843         printf("warning: KVM does not support watchdog");
1844         return -1;
1845     }
1846 
1847     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1848     if (ret < 0) {
1849         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850                 __func__, strerror(-ret));
1851         return ret;
1852     }
1853 
1854     return ret;
1855 }
1856 
1857 static int read_cpuinfo(const char *field, char *value, int len)
1858 {
1859     FILE *f;
1860     int ret = -1;
1861     int field_len = strlen(field);
1862     char line[512];
1863 
1864     f = fopen("/proc/cpuinfo", "r");
1865     if (!f) {
1866         return -1;
1867     }
1868 
1869     do {
1870         if (!fgets(line, sizeof(line), f)) {
1871             break;
1872         }
1873         if (!strncmp(line, field, field_len)) {
1874             pstrcpy(value, len, line);
1875             ret = 0;
1876             break;
1877         }
1878     } while(*line);
1879 
1880     fclose(f);
1881 
1882     return ret;
1883 }
1884 
1885 uint32_t kvmppc_get_tbfreq(void)
1886 {
1887     char line[512];
1888     char *ns;
1889     uint32_t retval = NANOSECONDS_PER_SECOND;
1890 
1891     if (read_cpuinfo("timebase", line, sizeof(line))) {
1892         return retval;
1893     }
1894 
1895     if (!(ns = strchr(line, ':'))) {
1896         return retval;
1897     }
1898 
1899     ns++;
1900 
1901     return atoi(ns);
1902 }
1903 
1904 bool kvmppc_get_host_serial(char **value)
1905 {
1906     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1907                                NULL);
1908 }
1909 
1910 bool kvmppc_get_host_model(char **value)
1911 {
1912     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1913 }
1914 
1915 /* Try to find a device tree node for a CPU with clock-frequency property */
1916 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1917 {
1918     struct dirent *dirp;
1919     DIR *dp;
1920 
1921     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1922         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1923         return -1;
1924     }
1925 
1926     buf[0] = '\0';
1927     while ((dirp = readdir(dp)) != NULL) {
1928         FILE *f;
1929         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1930                  dirp->d_name);
1931         f = fopen(buf, "r");
1932         if (f) {
1933             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1934             fclose(f);
1935             break;
1936         }
1937         buf[0] = '\0';
1938     }
1939     closedir(dp);
1940     if (buf[0] == '\0') {
1941         printf("Unknown host!\n");
1942         return -1;
1943     }
1944 
1945     return 0;
1946 }
1947 
1948 static uint64_t kvmppc_read_int_dt(const char *filename)
1949 {
1950     union {
1951         uint32_t v32;
1952         uint64_t v64;
1953     } u;
1954     FILE *f;
1955     int len;
1956 
1957     f = fopen(filename, "rb");
1958     if (!f) {
1959         return -1;
1960     }
1961 
1962     len = fread(&u, 1, sizeof(u), f);
1963     fclose(f);
1964     switch (len) {
1965     case 4:
1966         /* property is a 32-bit quantity */
1967         return be32_to_cpu(u.v32);
1968     case 8:
1969         return be64_to_cpu(u.v64);
1970     }
1971 
1972     return 0;
1973 }
1974 
1975 /* Read a CPU node property from the host device tree that's a single
1976  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1977  * (can't find or open the property, or doesn't understand the
1978  * format) */
1979 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1980 {
1981     char buf[PATH_MAX], *tmp;
1982     uint64_t val;
1983 
1984     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1985         return -1;
1986     }
1987 
1988     tmp = g_strdup_printf("%s/%s", buf, propname);
1989     val = kvmppc_read_int_dt(tmp);
1990     g_free(tmp);
1991 
1992     return val;
1993 }
1994 
1995 uint64_t kvmppc_get_clockfreq(void)
1996 {
1997     return kvmppc_read_int_cpu_dt("clock-frequency");
1998 }
1999 
2000 uint32_t kvmppc_get_vmx(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("ibm,vmx");
2003 }
2004 
2005 uint32_t kvmppc_get_dfp(void)
2006 {
2007     return kvmppc_read_int_cpu_dt("ibm,dfp");
2008 }
2009 
2010 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2011  {
2012      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2013      CPUState *cs = CPU(cpu);
2014 
2015     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2016         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2017         return 0;
2018     }
2019 
2020     return 1;
2021 }
2022 
2023 int kvmppc_get_hasidle(CPUPPCState *env)
2024 {
2025     struct kvm_ppc_pvinfo pvinfo;
2026 
2027     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2028         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2029         return 1;
2030     }
2031 
2032     return 0;
2033 }
2034 
2035 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2036 {
2037     uint32_t *hc = (uint32_t*)buf;
2038     struct kvm_ppc_pvinfo pvinfo;
2039 
2040     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2041         memcpy(buf, pvinfo.hcall, buf_len);
2042         return 0;
2043     }
2044 
2045     /*
2046      * Fallback to always fail hypercalls regardless of endianness:
2047      *
2048      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2049      *     li r3, -1
2050      *     b .+8       (becomes nop in wrong endian)
2051      *     bswap32(li r3, -1)
2052      */
2053 
2054     hc[0] = cpu_to_be32(0x08000048);
2055     hc[1] = cpu_to_be32(0x3860ffff);
2056     hc[2] = cpu_to_be32(0x48000008);
2057     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2058 
2059     return 1;
2060 }
2061 
2062 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2063 {
2064     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2065 }
2066 
2067 void kvmppc_enable_logical_ci_hcalls(void)
2068 {
2069     /*
2070      * FIXME: it would be nice if we could detect the cases where
2071      * we're using a device which requires the in kernel
2072      * implementation of these hcalls, but the kernel lacks them and
2073      * produce a warning.
2074      */
2075     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2076     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2077 }
2078 
2079 void kvmppc_enable_set_mode_hcall(void)
2080 {
2081     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2082 }
2083 
2084 void kvmppc_enable_clear_ref_mod_hcalls(void)
2085 {
2086     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2087     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2088 }
2089 
2090 void kvmppc_set_papr(PowerPCCPU *cpu)
2091 {
2092     CPUState *cs = CPU(cpu);
2093     int ret;
2094 
2095     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096     if (ret) {
2097         error_report("This vCPU type or KVM version does not support PAPR");
2098         exit(1);
2099     }
2100 
2101     /* Update the capability flag so we sync the right information
2102      * with kvm */
2103     cap_papr = 1;
2104 }
2105 
2106 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2107 {
2108     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2109 }
2110 
2111 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2112 {
2113     CPUState *cs = CPU(cpu);
2114     int ret;
2115 
2116     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117     if (ret && mpic_proxy) {
2118         error_report("This KVM version does not support EPR");
2119         exit(1);
2120     }
2121 }
2122 
2123 int kvmppc_smt_threads(void)
2124 {
2125     return cap_ppc_smt ? cap_ppc_smt : 1;
2126 }
2127 
2128 #ifdef TARGET_PPC64
2129 off_t kvmppc_alloc_rma(void **rma)
2130 {
2131     off_t size;
2132     int fd;
2133     struct kvm_allocate_rma ret;
2134 
2135     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137      *                      not necessary on this hardware
2138      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2139      *
2140      * FIXME: We should allow the user to force contiguous RMA
2141      * allocation in the cap_ppc_rma==1 case.
2142      */
2143     if (cap_ppc_rma < 2) {
2144         return 0;
2145     }
2146 
2147     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2148     if (fd < 0) {
2149         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2150                 strerror(errno));
2151         return -1;
2152     }
2153 
2154     size = MIN(ret.rma_size, 256ul << 20);
2155 
2156     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2157     if (*rma == MAP_FAILED) {
2158         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2159         return -1;
2160     };
2161 
2162     return size;
2163 }
2164 
2165 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2166 {
2167     struct kvm_ppc_smmu_info info;
2168     long rampagesize, best_page_shift;
2169     int i;
2170 
2171     if (cap_ppc_rma >= 2) {
2172         return current_size;
2173     }
2174 
2175     /* Find the largest hardware supported page size that's less than
2176      * or equal to the (logical) backing page size of guest RAM */
2177     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2178     rampagesize = getrampagesize();
2179     best_page_shift = 0;
2180 
2181     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2182         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2183 
2184         if (!sps->page_shift) {
2185             continue;
2186         }
2187 
2188         if ((sps->page_shift > best_page_shift)
2189             && ((1UL << sps->page_shift) <= rampagesize)) {
2190             best_page_shift = sps->page_shift;
2191         }
2192     }
2193 
2194     return MIN(current_size,
2195                1ULL << (best_page_shift + hash_shift - 7));
2196 }
2197 #endif
2198 
2199 bool kvmppc_spapr_use_multitce(void)
2200 {
2201     return cap_spapr_multitce;
2202 }
2203 
2204 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2205                               bool need_vfio)
2206 {
2207     struct kvm_create_spapr_tce args = {
2208         .liobn = liobn,
2209         .window_size = window_size,
2210     };
2211     long len;
2212     int fd;
2213     void *table;
2214 
2215     /* Must set fd to -1 so we don't try to munmap when called for
2216      * destroying the table, which the upper layers -will- do
2217      */
2218     *pfd = -1;
2219     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2220         return NULL;
2221     }
2222 
2223     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2224     if (fd < 0) {
2225         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2226                 liobn);
2227         return NULL;
2228     }
2229 
2230     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2231     /* FIXME: round this up to page size */
2232 
2233     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2234     if (table == MAP_FAILED) {
2235         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2236                 liobn);
2237         close(fd);
2238         return NULL;
2239     }
2240 
2241     *pfd = fd;
2242     return table;
2243 }
2244 
2245 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2246 {
2247     long len;
2248 
2249     if (fd < 0) {
2250         return -1;
2251     }
2252 
2253     len = nb_table * sizeof(uint64_t);
2254     if ((munmap(table, len) < 0) ||
2255         (close(fd) < 0)) {
2256         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2257                 strerror(errno));
2258         /* Leak the table */
2259     }
2260 
2261     return 0;
2262 }
2263 
2264 int kvmppc_reset_htab(int shift_hint)
2265 {
2266     uint32_t shift = shift_hint;
2267 
2268     if (!kvm_enabled()) {
2269         /* Full emulation, tell caller to allocate htab itself */
2270         return 0;
2271     }
2272     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2273         int ret;
2274         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2275         if (ret == -ENOTTY) {
2276             /* At least some versions of PR KVM advertise the
2277              * capability, but don't implement the ioctl().  Oops.
2278              * Return 0 so that we allocate the htab in qemu, as is
2279              * correct for PR. */
2280             return 0;
2281         } else if (ret < 0) {
2282             return ret;
2283         }
2284         return shift;
2285     }
2286 
2287     /* We have a kernel that predates the htab reset calls.  For PR
2288      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289      * this era, it has allocated a 16MB fixed size hash table already. */
2290     if (kvmppc_is_pr(kvm_state)) {
2291         /* PR - tell caller to allocate htab */
2292         return 0;
2293     } else {
2294         /* HV - assume 16MB kernel allocated htab */
2295         return 24;
2296     }
2297 }
2298 
2299 static inline uint32_t mfpvr(void)
2300 {
2301     uint32_t pvr;
2302 
2303     asm ("mfpvr %0"
2304          : "=r"(pvr));
2305     return pvr;
2306 }
2307 
2308 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2309 {
2310     if (on) {
2311         *word |= flags;
2312     } else {
2313         *word &= ~flags;
2314     }
2315 }
2316 
2317 static void kvmppc_host_cpu_initfn(Object *obj)
2318 {
2319     assert(kvm_enabled());
2320 }
2321 
2322 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2323 {
2324     DeviceClass *dc = DEVICE_CLASS(oc);
2325     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2326     uint32_t vmx = kvmppc_get_vmx();
2327     uint32_t dfp = kvmppc_get_dfp();
2328     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2329     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2330 
2331     /* Now fix up the class with information we can query from the host */
2332     pcc->pvr = mfpvr();
2333 
2334     if (vmx != -1) {
2335         /* Only override when we know what the host supports */
2336         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2337         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2338     }
2339     if (dfp != -1) {
2340         /* Only override when we know what the host supports */
2341         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2342     }
2343 
2344     if (dcache_size != -1) {
2345         pcc->l1_dcache_size = dcache_size;
2346     }
2347 
2348     if (icache_size != -1) {
2349         pcc->l1_icache_size = icache_size;
2350     }
2351 
2352     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353     dc->cannot_destroy_with_object_finalize_yet = true;
2354 }
2355 
2356 bool kvmppc_has_cap_epr(void)
2357 {
2358     return cap_epr;
2359 }
2360 
2361 bool kvmppc_has_cap_htab_fd(void)
2362 {
2363     return cap_htab_fd;
2364 }
2365 
2366 bool kvmppc_has_cap_fixup_hcalls(void)
2367 {
2368     return cap_fixup_hcalls;
2369 }
2370 
2371 bool kvmppc_has_cap_htm(void)
2372 {
2373     return cap_htm;
2374 }
2375 
2376 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2377 {
2378     ObjectClass *oc = OBJECT_CLASS(pcc);
2379 
2380     while (oc && !object_class_is_abstract(oc)) {
2381         oc = object_class_get_parent(oc);
2382     }
2383     assert(oc);
2384 
2385     return POWERPC_CPU_CLASS(oc);
2386 }
2387 
2388 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2389 {
2390     uint32_t host_pvr = mfpvr();
2391     PowerPCCPUClass *pvr_pcc;
2392 
2393     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2394     if (pvr_pcc == NULL) {
2395         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2396     }
2397 
2398     return pvr_pcc;
2399 }
2400 
2401 static int kvm_ppc_register_host_cpu_type(void)
2402 {
2403     TypeInfo type_info = {
2404         .name = TYPE_HOST_POWERPC_CPU,
2405         .instance_init = kvmppc_host_cpu_initfn,
2406         .class_init = kvmppc_host_cpu_class_init,
2407     };
2408     PowerPCCPUClass *pvr_pcc;
2409     DeviceClass *dc;
2410 
2411     pvr_pcc = kvm_ppc_get_host_cpu_class();
2412     if (pvr_pcc == NULL) {
2413         return -1;
2414     }
2415     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2416     type_register(&type_info);
2417 
2418     /* Register generic family CPU class for a family */
2419     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2420     dc = DEVICE_CLASS(pvr_pcc);
2421     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2422     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2423     type_register(&type_info);
2424 
2425 #if defined(TARGET_PPC64)
2426     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427     type_info.parent = TYPE_SPAPR_CPU_CORE,
2428     type_info.instance_size = sizeof(sPAPRCPUCore);
2429     type_info.instance_init = NULL;
2430     type_info.class_init = spapr_cpu_core_class_init;
2431     type_info.class_data = (void *) "host";
2432     type_register(&type_info);
2433     g_free((void *)type_info.name);
2434 
2435     /* Register generic spapr CPU family class for current host CPU type */
2436     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2437     type_info.class_data = (void *) dc->desc;
2438     type_register(&type_info);
2439     g_free((void *)type_info.name);
2440 #endif
2441 
2442     return 0;
2443 }
2444 
2445 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2446 {
2447     struct kvm_rtas_token_args args = {
2448         .token = token,
2449     };
2450 
2451     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2452         return -ENOENT;
2453     }
2454 
2455     strncpy(args.name, function, sizeof(args.name));
2456 
2457     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2458 }
2459 
2460 int kvmppc_get_htab_fd(bool write)
2461 {
2462     struct kvm_get_htab_fd s = {
2463         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2464         .start_index = 0,
2465     };
2466 
2467     if (!cap_htab_fd) {
2468         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2469         return -1;
2470     }
2471 
2472     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2473 }
2474 
2475 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2476 {
2477     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2478     uint8_t buf[bufsize];
2479     ssize_t rc;
2480 
2481     do {
2482         rc = read(fd, buf, bufsize);
2483         if (rc < 0) {
2484             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2485                     strerror(errno));
2486             return rc;
2487         } else if (rc) {
2488             uint8_t *buffer = buf;
2489             ssize_t n = rc;
2490             while (n) {
2491                 struct kvm_get_htab_header *head =
2492                     (struct kvm_get_htab_header *) buffer;
2493                 size_t chunksize = sizeof(*head) +
2494                      HASH_PTE_SIZE_64 * head->n_valid;
2495 
2496                 qemu_put_be32(f, head->index);
2497                 qemu_put_be16(f, head->n_valid);
2498                 qemu_put_be16(f, head->n_invalid);
2499                 qemu_put_buffer(f, (void *)(head + 1),
2500                                 HASH_PTE_SIZE_64 * head->n_valid);
2501 
2502                 buffer += chunksize;
2503                 n -= chunksize;
2504             }
2505         }
2506     } while ((rc != 0)
2507              && ((max_ns < 0)
2508                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2509 
2510     return (rc == 0) ? 1 : 0;
2511 }
2512 
2513 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2514                            uint16_t n_valid, uint16_t n_invalid)
2515 {
2516     struct kvm_get_htab_header *buf;
2517     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2518     ssize_t rc;
2519 
2520     buf = alloca(chunksize);
2521     buf->index = index;
2522     buf->n_valid = n_valid;
2523     buf->n_invalid = n_invalid;
2524 
2525     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2526 
2527     rc = write(fd, buf, chunksize);
2528     if (rc < 0) {
2529         fprintf(stderr, "Error writing KVM hash table: %s\n",
2530                 strerror(errno));
2531         return rc;
2532     }
2533     if (rc != chunksize) {
2534         /* We should never get a short write on a single chunk */
2535         fprintf(stderr, "Short write, restoring KVM hash table\n");
2536         return -1;
2537     }
2538     return 0;
2539 }
2540 
2541 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2542 {
2543     return true;
2544 }
2545 
2546 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2547 {
2548     return 1;
2549 }
2550 
2551 int kvm_arch_on_sigbus(int code, void *addr)
2552 {
2553     return 1;
2554 }
2555 
2556 void kvm_arch_init_irq_routing(KVMState *s)
2557 {
2558 }
2559 
2560 struct kvm_get_htab_buf {
2561     struct kvm_get_htab_header header;
2562     /*
2563      * We require one extra byte for read
2564      */
2565     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2566 };
2567 
2568 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2569 {
2570     int htab_fd;
2571     struct kvm_get_htab_fd ghf;
2572     struct kvm_get_htab_buf  *hpte_buf;
2573 
2574     ghf.flags = 0;
2575     ghf.start_index = pte_index;
2576     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2577     if (htab_fd < 0) {
2578         goto error_out;
2579     }
2580 
2581     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2582     /*
2583      * Read the hpte group
2584      */
2585     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2586         goto out_close;
2587     }
2588 
2589     close(htab_fd);
2590     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2591 
2592 out_close:
2593     g_free(hpte_buf);
2594     close(htab_fd);
2595 error_out:
2596     return 0;
2597 }
2598 
2599 void kvmppc_hash64_free_pteg(uint64_t token)
2600 {
2601     struct kvm_get_htab_buf *htab_buf;
2602 
2603     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2604                             hpte);
2605     g_free(htab_buf);
2606     return;
2607 }
2608 
2609 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2610                              target_ulong pte0, target_ulong pte1)
2611 {
2612     int htab_fd;
2613     struct kvm_get_htab_fd ghf;
2614     struct kvm_get_htab_buf hpte_buf;
2615 
2616     ghf.flags = 0;
2617     ghf.start_index = 0;     /* Ignored */
2618     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2619     if (htab_fd < 0) {
2620         goto error_out;
2621     }
2622 
2623     hpte_buf.header.n_valid = 1;
2624     hpte_buf.header.n_invalid = 0;
2625     hpte_buf.header.index = pte_index;
2626     hpte_buf.hpte[0] = pte0;
2627     hpte_buf.hpte[1] = pte1;
2628     /*
2629      * Write the hpte entry.
2630      * CAUTION: write() has the warn_unused_result attribute. Hence we
2631      * need to check the return value, even though we do nothing.
2632      */
2633     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2634         goto out_close;
2635     }
2636 
2637 out_close:
2638     close(htab_fd);
2639     return;
2640 
2641 error_out:
2642     return;
2643 }
2644 
2645 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2646                              uint64_t address, uint32_t data, PCIDevice *dev)
2647 {
2648     return 0;
2649 }
2650 
2651 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2652                                 int vector, PCIDevice *dev)
2653 {
2654     return 0;
2655 }
2656 
2657 int kvm_arch_release_virq_post(int virq)
2658 {
2659     return 0;
2660 }
2661 
2662 int kvm_arch_msi_data_to_gsi(uint32_t data)
2663 {
2664     return data & 0xffff;
2665 }
2666 
2667 int kvmppc_enable_hwrng(void)
2668 {
2669     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2670         return -1;
2671     }
2672 
2673     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2674 }
2675