1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corporation, 2018 4 * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com> 5 * Paul Mackerras <paulus@ozlabs.org> 6 * 7 * Description: KVM functions specific to running nested KVM-HV guests 8 * on Book3S processors (specifically POWER9 and later). 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/kvm_host.h> 13 #include <linux/llist.h> 14 #include <linux/pgtable.h> 15 16 #include <asm/kvm_ppc.h> 17 #include <asm/kvm_book3s.h> 18 #include <asm/mmu.h> 19 #include <asm/pgalloc.h> 20 #include <asm/pte-walk.h> 21 #include <asm/reg.h> 22 23 static struct patb_entry *pseries_partition_tb; 24 25 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp); 26 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free); 27 28 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 29 { 30 struct kvmppc_vcore *vc = vcpu->arch.vcore; 31 32 hr->pcr = vc->pcr | PCR_MASK; 33 hr->dpdes = vc->dpdes; 34 hr->hfscr = vcpu->arch.hfscr; 35 hr->tb_offset = vc->tb_offset; 36 hr->dawr0 = vcpu->arch.dawr0; 37 hr->dawrx0 = vcpu->arch.dawrx0; 38 hr->ciabr = vcpu->arch.ciabr; 39 hr->purr = vcpu->arch.purr; 40 hr->spurr = vcpu->arch.spurr; 41 hr->ic = vcpu->arch.ic; 42 hr->vtb = vc->vtb; 43 hr->srr0 = vcpu->arch.shregs.srr0; 44 hr->srr1 = vcpu->arch.shregs.srr1; 45 hr->sprg[0] = vcpu->arch.shregs.sprg0; 46 hr->sprg[1] = vcpu->arch.shregs.sprg1; 47 hr->sprg[2] = vcpu->arch.shregs.sprg2; 48 hr->sprg[3] = vcpu->arch.shregs.sprg3; 49 hr->pidr = vcpu->arch.pid; 50 hr->cfar = vcpu->arch.cfar; 51 hr->ppr = vcpu->arch.ppr; 52 hr->dawr1 = vcpu->arch.dawr1; 53 hr->dawrx1 = vcpu->arch.dawrx1; 54 } 55 56 static void byteswap_pt_regs(struct pt_regs *regs) 57 { 58 unsigned long *addr = (unsigned long *) regs; 59 60 for (; addr < ((unsigned long *) (regs + 1)); addr++) 61 *addr = swab64(*addr); 62 } 63 64 static void byteswap_hv_regs(struct hv_guest_state *hr) 65 { 66 hr->version = swab64(hr->version); 67 hr->lpid = swab32(hr->lpid); 68 hr->vcpu_token = swab32(hr->vcpu_token); 69 hr->lpcr = swab64(hr->lpcr); 70 hr->pcr = swab64(hr->pcr) | PCR_MASK; 71 hr->amor = swab64(hr->amor); 72 hr->dpdes = swab64(hr->dpdes); 73 hr->hfscr = swab64(hr->hfscr); 74 hr->tb_offset = swab64(hr->tb_offset); 75 hr->dawr0 = swab64(hr->dawr0); 76 hr->dawrx0 = swab64(hr->dawrx0); 77 hr->ciabr = swab64(hr->ciabr); 78 hr->hdec_expiry = swab64(hr->hdec_expiry); 79 hr->purr = swab64(hr->purr); 80 hr->spurr = swab64(hr->spurr); 81 hr->ic = swab64(hr->ic); 82 hr->vtb = swab64(hr->vtb); 83 hr->hdar = swab64(hr->hdar); 84 hr->hdsisr = swab64(hr->hdsisr); 85 hr->heir = swab64(hr->heir); 86 hr->asdr = swab64(hr->asdr); 87 hr->srr0 = swab64(hr->srr0); 88 hr->srr1 = swab64(hr->srr1); 89 hr->sprg[0] = swab64(hr->sprg[0]); 90 hr->sprg[1] = swab64(hr->sprg[1]); 91 hr->sprg[2] = swab64(hr->sprg[2]); 92 hr->sprg[3] = swab64(hr->sprg[3]); 93 hr->pidr = swab64(hr->pidr); 94 hr->cfar = swab64(hr->cfar); 95 hr->ppr = swab64(hr->ppr); 96 hr->dawr1 = swab64(hr->dawr1); 97 hr->dawrx1 = swab64(hr->dawrx1); 98 } 99 100 static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, 101 struct hv_guest_state *hr) 102 { 103 struct kvmppc_vcore *vc = vcpu->arch.vcore; 104 105 hr->dpdes = vc->dpdes; 106 hr->hfscr = vcpu->arch.hfscr; 107 hr->purr = vcpu->arch.purr; 108 hr->spurr = vcpu->arch.spurr; 109 hr->ic = vcpu->arch.ic; 110 hr->vtb = vc->vtb; 111 hr->srr0 = vcpu->arch.shregs.srr0; 112 hr->srr1 = vcpu->arch.shregs.srr1; 113 hr->sprg[0] = vcpu->arch.shregs.sprg0; 114 hr->sprg[1] = vcpu->arch.shregs.sprg1; 115 hr->sprg[2] = vcpu->arch.shregs.sprg2; 116 hr->sprg[3] = vcpu->arch.shregs.sprg3; 117 hr->pidr = vcpu->arch.pid; 118 hr->cfar = vcpu->arch.cfar; 119 hr->ppr = vcpu->arch.ppr; 120 switch (trap) { 121 case BOOK3S_INTERRUPT_H_DATA_STORAGE: 122 hr->hdar = vcpu->arch.fault_dar; 123 hr->hdsisr = vcpu->arch.fault_dsisr; 124 hr->asdr = vcpu->arch.fault_gpa; 125 break; 126 case BOOK3S_INTERRUPT_H_INST_STORAGE: 127 hr->asdr = vcpu->arch.fault_gpa; 128 break; 129 case BOOK3S_INTERRUPT_H_EMUL_ASSIST: 130 hr->heir = vcpu->arch.emul_inst; 131 break; 132 } 133 } 134 135 /* 136 * This can result in some L0 HV register state being leaked to an L1 137 * hypervisor when the hv_guest_state is copied back to the guest after 138 * being modified here. 139 * 140 * There is no known problem with such a leak, and in many cases these 141 * register settings could be derived by the guest by observing behaviour 142 * and timing, interrupts, etc., but it is an issue to consider. 143 */ 144 static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 145 { 146 struct kvmppc_vcore *vc = vcpu->arch.vcore; 147 u64 mask; 148 149 /* 150 * Don't let L1 change LPCR bits for the L2 except these: 151 */ 152 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | 153 LPCR_LPES | LPCR_MER; 154 155 /* 156 * Additional filtering is required depending on hardware 157 * and configuration. 158 */ 159 hr->lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm, 160 (vc->lpcr & ~mask) | (hr->lpcr & mask)); 161 162 /* 163 * Don't let L1 enable features for L2 which we've disabled for L1, 164 * but preserve the interrupt cause field. 165 */ 166 hr->hfscr &= (HFSCR_INTR_CAUSE | vcpu->arch.hfscr); 167 168 /* Don't let data address watchpoint match in hypervisor state */ 169 hr->dawrx0 &= ~DAWRX_HYP; 170 hr->dawrx1 &= ~DAWRX_HYP; 171 172 /* Don't let completed instruction address breakpt match in HV state */ 173 if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) 174 hr->ciabr &= ~CIABR_PRIV; 175 } 176 177 static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) 178 { 179 struct kvmppc_vcore *vc = vcpu->arch.vcore; 180 181 vc->pcr = hr->pcr | PCR_MASK; 182 vc->dpdes = hr->dpdes; 183 vcpu->arch.hfscr = hr->hfscr; 184 vcpu->arch.dawr0 = hr->dawr0; 185 vcpu->arch.dawrx0 = hr->dawrx0; 186 vcpu->arch.ciabr = hr->ciabr; 187 vcpu->arch.purr = hr->purr; 188 vcpu->arch.spurr = hr->spurr; 189 vcpu->arch.ic = hr->ic; 190 vc->vtb = hr->vtb; 191 vcpu->arch.shregs.srr0 = hr->srr0; 192 vcpu->arch.shregs.srr1 = hr->srr1; 193 vcpu->arch.shregs.sprg0 = hr->sprg[0]; 194 vcpu->arch.shregs.sprg1 = hr->sprg[1]; 195 vcpu->arch.shregs.sprg2 = hr->sprg[2]; 196 vcpu->arch.shregs.sprg3 = hr->sprg[3]; 197 vcpu->arch.pid = hr->pidr; 198 vcpu->arch.cfar = hr->cfar; 199 vcpu->arch.ppr = hr->ppr; 200 vcpu->arch.dawr1 = hr->dawr1; 201 vcpu->arch.dawrx1 = hr->dawrx1; 202 } 203 204 void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, 205 struct hv_guest_state *hr) 206 { 207 struct kvmppc_vcore *vc = vcpu->arch.vcore; 208 209 vc->dpdes = hr->dpdes; 210 vcpu->arch.hfscr = hr->hfscr; 211 vcpu->arch.purr = hr->purr; 212 vcpu->arch.spurr = hr->spurr; 213 vcpu->arch.ic = hr->ic; 214 vc->vtb = hr->vtb; 215 vcpu->arch.fault_dar = hr->hdar; 216 vcpu->arch.fault_dsisr = hr->hdsisr; 217 vcpu->arch.fault_gpa = hr->asdr; 218 vcpu->arch.emul_inst = hr->heir; 219 vcpu->arch.shregs.srr0 = hr->srr0; 220 vcpu->arch.shregs.srr1 = hr->srr1; 221 vcpu->arch.shregs.sprg0 = hr->sprg[0]; 222 vcpu->arch.shregs.sprg1 = hr->sprg[1]; 223 vcpu->arch.shregs.sprg2 = hr->sprg[2]; 224 vcpu->arch.shregs.sprg3 = hr->sprg[3]; 225 vcpu->arch.pid = hr->pidr; 226 vcpu->arch.cfar = hr->cfar; 227 vcpu->arch.ppr = hr->ppr; 228 } 229 230 static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) 231 { 232 /* No need to reflect the page fault to L1, we've handled it */ 233 vcpu->arch.trap = 0; 234 235 /* 236 * Since the L2 gprs have already been written back into L1 memory when 237 * we complete the mmio, store the L1 memory location of the L2 gpr 238 * being loaded into by the mmio so that the loaded value can be 239 * written there in kvmppc_complete_mmio_load() 240 */ 241 if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR) 242 && (vcpu->mmio_is_write == 0)) { 243 vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr + 244 offsetof(struct pt_regs, 245 gpr[vcpu->arch.io_gpr]); 246 vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR; 247 } 248 } 249 250 static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu, 251 struct hv_guest_state *l2_hv, 252 struct pt_regs *l2_regs, 253 u64 hv_ptr, u64 regs_ptr) 254 { 255 int size; 256 257 if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version, 258 sizeof(l2_hv->version))) 259 return -1; 260 261 if (kvmppc_need_byteswap(vcpu)) 262 l2_hv->version = swab64(l2_hv->version); 263 264 size = hv_guest_state_size(l2_hv->version); 265 if (size < 0) 266 return -1; 267 268 return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) || 269 kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs, 270 sizeof(struct pt_regs)); 271 } 272 273 static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu, 274 struct hv_guest_state *l2_hv, 275 struct pt_regs *l2_regs, 276 u64 hv_ptr, u64 regs_ptr) 277 { 278 int size; 279 280 size = hv_guest_state_size(l2_hv->version); 281 if (size < 0) 282 return -1; 283 284 return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) || 285 kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs, 286 sizeof(struct pt_regs)); 287 } 288 289 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) 290 { 291 long int err, r; 292 struct kvm_nested_guest *l2; 293 struct pt_regs l2_regs, saved_l1_regs; 294 struct hv_guest_state l2_hv = {0}, saved_l1_hv; 295 struct kvmppc_vcore *vc = vcpu->arch.vcore; 296 u64 hv_ptr, regs_ptr; 297 u64 hdec_exp; 298 s64 delta_purr, delta_spurr, delta_ic, delta_vtb; 299 300 if (vcpu->kvm->arch.l1_ptcr == 0) 301 return H_NOT_AVAILABLE; 302 303 /* copy parameters in */ 304 hv_ptr = kvmppc_get_gpr(vcpu, 4); 305 regs_ptr = kvmppc_get_gpr(vcpu, 5); 306 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 307 err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, 308 hv_ptr, regs_ptr); 309 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 310 if (err) 311 return H_PARAMETER; 312 313 if (kvmppc_need_byteswap(vcpu)) 314 byteswap_hv_regs(&l2_hv); 315 if (l2_hv.version > HV_GUEST_STATE_VERSION) 316 return H_P2; 317 318 if (kvmppc_need_byteswap(vcpu)) 319 byteswap_pt_regs(&l2_regs); 320 if (l2_hv.vcpu_token >= NR_CPUS) 321 return H_PARAMETER; 322 323 /* translate lpid */ 324 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); 325 if (!l2) 326 return H_PARAMETER; 327 if (!l2->l1_gr_to_hr) { 328 mutex_lock(&l2->tlb_lock); 329 kvmhv_update_ptbl_cache(l2); 330 mutex_unlock(&l2->tlb_lock); 331 } 332 333 /* save l1 values of things */ 334 vcpu->arch.regs.msr = vcpu->arch.shregs.msr; 335 saved_l1_regs = vcpu->arch.regs; 336 kvmhv_save_hv_regs(vcpu, &saved_l1_hv); 337 338 /* convert TB values/offsets to host (L0) values */ 339 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset; 340 vc->tb_offset += l2_hv.tb_offset; 341 342 /* set L1 state to L2 state */ 343 vcpu->arch.nested = l2; 344 vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; 345 vcpu->arch.regs = l2_regs; 346 347 /* Guest must always run with ME enabled, HV disabled. */ 348 vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV; 349 350 sanitise_hv_regs(vcpu, &l2_hv); 351 restore_hv_regs(vcpu, &l2_hv); 352 353 vcpu->arch.ret = RESUME_GUEST; 354 vcpu->arch.trap = 0; 355 do { 356 if (mftb() >= hdec_exp) { 357 vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER; 358 r = RESUME_HOST; 359 break; 360 } 361 r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr); 362 } while (is_kvmppc_resume_guest(r)); 363 364 /* save L2 state for return */ 365 l2_regs = vcpu->arch.regs; 366 l2_regs.msr = vcpu->arch.shregs.msr; 367 delta_purr = vcpu->arch.purr - l2_hv.purr; 368 delta_spurr = vcpu->arch.spurr - l2_hv.spurr; 369 delta_ic = vcpu->arch.ic - l2_hv.ic; 370 delta_vtb = vc->vtb - l2_hv.vtb; 371 save_hv_return_state(vcpu, vcpu->arch.trap, &l2_hv); 372 373 /* restore L1 state */ 374 vcpu->arch.nested = NULL; 375 vcpu->arch.regs = saved_l1_regs; 376 vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK; 377 /* set L1 MSR TS field according to L2 transaction state */ 378 if (l2_regs.msr & MSR_TS_MASK) 379 vcpu->arch.shregs.msr |= MSR_TS_S; 380 vc->tb_offset = saved_l1_hv.tb_offset; 381 restore_hv_regs(vcpu, &saved_l1_hv); 382 vcpu->arch.purr += delta_purr; 383 vcpu->arch.spurr += delta_spurr; 384 vcpu->arch.ic += delta_ic; 385 vc->vtb += delta_vtb; 386 387 kvmhv_put_nested(l2); 388 389 /* copy l2_hv_state and regs back to guest */ 390 if (kvmppc_need_byteswap(vcpu)) { 391 byteswap_hv_regs(&l2_hv); 392 byteswap_pt_regs(&l2_regs); 393 } 394 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 395 err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs, 396 hv_ptr, regs_ptr); 397 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 398 if (err) 399 return H_AUTHORITY; 400 401 if (r == -EINTR) 402 return H_INTERRUPT; 403 404 if (vcpu->mmio_needed) { 405 kvmhv_nested_mmio_needed(vcpu, regs_ptr); 406 return H_TOO_HARD; 407 } 408 409 return vcpu->arch.trap; 410 } 411 412 long kvmhv_nested_init(void) 413 { 414 long int ptb_order; 415 unsigned long ptcr; 416 long rc; 417 418 if (!kvmhv_on_pseries()) 419 return 0; 420 if (!radix_enabled()) 421 return -ENODEV; 422 423 /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ 424 ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; 425 if (ptb_order < 8) 426 ptb_order = 8; 427 pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, 428 GFP_KERNEL); 429 if (!pseries_partition_tb) { 430 pr_err("kvm-hv: failed to allocated nested partition table\n"); 431 return -ENOMEM; 432 } 433 434 ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); 435 rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); 436 if (rc != H_SUCCESS) { 437 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", 438 rc); 439 kfree(pseries_partition_tb); 440 pseries_partition_tb = NULL; 441 return -ENODEV; 442 } 443 444 return 0; 445 } 446 447 void kvmhv_nested_exit(void) 448 { 449 /* 450 * N.B. the kvmhv_on_pseries() test is there because it enables 451 * the compiler to remove the call to plpar_hcall_norets() 452 * when CONFIG_PPC_PSERIES=n. 453 */ 454 if (kvmhv_on_pseries() && pseries_partition_tb) { 455 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0); 456 kfree(pseries_partition_tb); 457 pseries_partition_tb = NULL; 458 } 459 } 460 461 static void kvmhv_flush_lpid(unsigned int lpid) 462 { 463 long rc; 464 465 if (!kvmhv_on_pseries()) { 466 radix__flush_all_lpid(lpid); 467 return; 468 } 469 470 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1), 471 lpid, TLBIEL_INVAL_SET_LPID); 472 if (rc) 473 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc); 474 } 475 476 void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) 477 { 478 if (!kvmhv_on_pseries()) { 479 mmu_partition_table_set_entry(lpid, dw0, dw1, true); 480 return; 481 } 482 483 pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); 484 pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); 485 /* L0 will do the necessary barriers */ 486 kvmhv_flush_lpid(lpid); 487 } 488 489 static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) 490 { 491 unsigned long dw0; 492 493 dw0 = PATB_HR | radix__get_tree_size() | 494 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE; 495 kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); 496 } 497 498 void kvmhv_vm_nested_init(struct kvm *kvm) 499 { 500 kvm->arch.max_nested_lpid = -1; 501 } 502 503 /* 504 * Handle the H_SET_PARTITION_TABLE hcall. 505 * r4 = guest real address of partition table + log_2(size) - 12 506 * (formatted as for the PTCR). 507 */ 508 long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) 509 { 510 struct kvm *kvm = vcpu->kvm; 511 unsigned long ptcr = kvmppc_get_gpr(vcpu, 4); 512 int srcu_idx; 513 long ret = H_SUCCESS; 514 515 srcu_idx = srcu_read_lock(&kvm->srcu); 516 /* 517 * Limit the partition table to 4096 entries (because that's what 518 * hardware supports), and check the base address. 519 */ 520 if ((ptcr & PRTS_MASK) > 12 - 8 || 521 !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) 522 ret = H_PARAMETER; 523 srcu_read_unlock(&kvm->srcu, srcu_idx); 524 if (ret == H_SUCCESS) 525 kvm->arch.l1_ptcr = ptcr; 526 return ret; 527 } 528 529 /* 530 * Handle the H_COPY_TOFROM_GUEST hcall. 531 * r4 = L1 lpid of nested guest 532 * r5 = pid 533 * r6 = eaddr to access 534 * r7 = to buffer (L1 gpa) 535 * r8 = from buffer (L1 gpa) 536 * r9 = n bytes to copy 537 */ 538 long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) 539 { 540 struct kvm_nested_guest *gp; 541 int l1_lpid = kvmppc_get_gpr(vcpu, 4); 542 int pid = kvmppc_get_gpr(vcpu, 5); 543 gva_t eaddr = kvmppc_get_gpr(vcpu, 6); 544 gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7); 545 gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8); 546 void *buf; 547 unsigned long n = kvmppc_get_gpr(vcpu, 9); 548 bool is_load = !!gp_to; 549 long rc; 550 551 if (gp_to && gp_from) /* One must be NULL to determine the direction */ 552 return H_PARAMETER; 553 554 if (eaddr & (0xFFFUL << 52)) 555 return H_PARAMETER; 556 557 buf = kzalloc(n, GFP_KERNEL); 558 if (!buf) 559 return H_NO_MEM; 560 561 gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false); 562 if (!gp) { 563 rc = H_PARAMETER; 564 goto out_free; 565 } 566 567 mutex_lock(&gp->tlb_lock); 568 569 if (is_load) { 570 /* Load from the nested guest into our buffer */ 571 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, 572 eaddr, buf, NULL, n); 573 if (rc) 574 goto not_found; 575 576 /* Write what was loaded into our buffer back to the L1 guest */ 577 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 578 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); 579 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 580 if (rc) 581 goto not_found; 582 } else { 583 /* Load the data to be stored from the L1 guest into our buf */ 584 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 585 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); 586 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 587 if (rc) 588 goto not_found; 589 590 /* Store from our buffer into the nested guest */ 591 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, 592 eaddr, NULL, buf, n); 593 if (rc) 594 goto not_found; 595 } 596 597 out_unlock: 598 mutex_unlock(&gp->tlb_lock); 599 kvmhv_put_nested(gp); 600 out_free: 601 kfree(buf); 602 return rc; 603 not_found: 604 rc = H_NOT_FOUND; 605 goto out_unlock; 606 } 607 608 /* 609 * Reload the partition table entry for a guest. 610 * Caller must hold gp->tlb_lock. 611 */ 612 static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) 613 { 614 int ret; 615 struct patb_entry ptbl_entry; 616 unsigned long ptbl_addr; 617 struct kvm *kvm = gp->l1_host; 618 619 ret = -EFAULT; 620 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); 621 if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { 622 int srcu_idx = srcu_read_lock(&kvm->srcu); 623 ret = kvm_read_guest(kvm, ptbl_addr, 624 &ptbl_entry, sizeof(ptbl_entry)); 625 srcu_read_unlock(&kvm->srcu, srcu_idx); 626 } 627 if (ret) { 628 gp->l1_gr_to_hr = 0; 629 gp->process_table = 0; 630 } else { 631 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0); 632 gp->process_table = be64_to_cpu(ptbl_entry.patb1); 633 } 634 kvmhv_set_nested_ptbl(gp); 635 } 636 637 static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) 638 { 639 struct kvm_nested_guest *gp; 640 long shadow_lpid; 641 642 gp = kzalloc(sizeof(*gp), GFP_KERNEL); 643 if (!gp) 644 return NULL; 645 gp->l1_host = kvm; 646 gp->l1_lpid = lpid; 647 mutex_init(&gp->tlb_lock); 648 gp->shadow_pgtable = pgd_alloc(kvm->mm); 649 if (!gp->shadow_pgtable) 650 goto out_free; 651 shadow_lpid = kvmppc_alloc_lpid(); 652 if (shadow_lpid < 0) 653 goto out_free2; 654 gp->shadow_lpid = shadow_lpid; 655 gp->radix = 1; 656 657 memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); 658 659 return gp; 660 661 out_free2: 662 pgd_free(kvm->mm, gp->shadow_pgtable); 663 out_free: 664 kfree(gp); 665 return NULL; 666 } 667 668 /* 669 * Free up any resources allocated for a nested guest. 670 */ 671 static void kvmhv_release_nested(struct kvm_nested_guest *gp) 672 { 673 struct kvm *kvm = gp->l1_host; 674 675 if (gp->shadow_pgtable) { 676 /* 677 * No vcpu is using this struct and no call to 678 * kvmhv_get_nested can find this struct, 679 * so we don't need to hold kvm->mmu_lock. 680 */ 681 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, 682 gp->shadow_lpid); 683 pgd_free(kvm->mm, gp->shadow_pgtable); 684 } 685 kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0); 686 kvmppc_free_lpid(gp->shadow_lpid); 687 kfree(gp); 688 } 689 690 static void kvmhv_remove_nested(struct kvm_nested_guest *gp) 691 { 692 struct kvm *kvm = gp->l1_host; 693 int lpid = gp->l1_lpid; 694 long ref; 695 696 spin_lock(&kvm->mmu_lock); 697 if (gp == kvm->arch.nested_guests[lpid]) { 698 kvm->arch.nested_guests[lpid] = NULL; 699 if (lpid == kvm->arch.max_nested_lpid) { 700 while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) 701 ; 702 kvm->arch.max_nested_lpid = lpid; 703 } 704 --gp->refcnt; 705 } 706 ref = gp->refcnt; 707 spin_unlock(&kvm->mmu_lock); 708 if (ref == 0) 709 kvmhv_release_nested(gp); 710 } 711 712 /* 713 * Free up all nested resources allocated for this guest. 714 * This is called with no vcpus of the guest running, when 715 * switching the guest to HPT mode or when destroying the 716 * guest. 717 */ 718 void kvmhv_release_all_nested(struct kvm *kvm) 719 { 720 int i; 721 struct kvm_nested_guest *gp; 722 struct kvm_nested_guest *freelist = NULL; 723 struct kvm_memory_slot *memslot; 724 int srcu_idx; 725 726 spin_lock(&kvm->mmu_lock); 727 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { 728 gp = kvm->arch.nested_guests[i]; 729 if (!gp) 730 continue; 731 kvm->arch.nested_guests[i] = NULL; 732 if (--gp->refcnt == 0) { 733 gp->next = freelist; 734 freelist = gp; 735 } 736 } 737 kvm->arch.max_nested_lpid = -1; 738 spin_unlock(&kvm->mmu_lock); 739 while ((gp = freelist) != NULL) { 740 freelist = gp->next; 741 kvmhv_release_nested(gp); 742 } 743 744 srcu_idx = srcu_read_lock(&kvm->srcu); 745 kvm_for_each_memslot(memslot, kvm_memslots(kvm)) 746 kvmhv_free_memslot_nest_rmap(memslot); 747 srcu_read_unlock(&kvm->srcu, srcu_idx); 748 } 749 750 /* caller must hold gp->tlb_lock */ 751 static void kvmhv_flush_nested(struct kvm_nested_guest *gp) 752 { 753 struct kvm *kvm = gp->l1_host; 754 755 spin_lock(&kvm->mmu_lock); 756 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid); 757 spin_unlock(&kvm->mmu_lock); 758 kvmhv_flush_lpid(gp->shadow_lpid); 759 kvmhv_update_ptbl_cache(gp); 760 if (gp->l1_gr_to_hr == 0) 761 kvmhv_remove_nested(gp); 762 } 763 764 struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, 765 bool create) 766 { 767 struct kvm_nested_guest *gp, *newgp; 768 769 if (l1_lpid >= KVM_MAX_NESTED_GUESTS || 770 l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) 771 return NULL; 772 773 spin_lock(&kvm->mmu_lock); 774 gp = kvm->arch.nested_guests[l1_lpid]; 775 if (gp) 776 ++gp->refcnt; 777 spin_unlock(&kvm->mmu_lock); 778 779 if (gp || !create) 780 return gp; 781 782 newgp = kvmhv_alloc_nested(kvm, l1_lpid); 783 if (!newgp) 784 return NULL; 785 spin_lock(&kvm->mmu_lock); 786 if (kvm->arch.nested_guests[l1_lpid]) { 787 /* someone else beat us to it */ 788 gp = kvm->arch.nested_guests[l1_lpid]; 789 } else { 790 kvm->arch.nested_guests[l1_lpid] = newgp; 791 ++newgp->refcnt; 792 gp = newgp; 793 newgp = NULL; 794 if (l1_lpid > kvm->arch.max_nested_lpid) 795 kvm->arch.max_nested_lpid = l1_lpid; 796 } 797 ++gp->refcnt; 798 spin_unlock(&kvm->mmu_lock); 799 800 if (newgp) 801 kvmhv_release_nested(newgp); 802 803 return gp; 804 } 805 806 void kvmhv_put_nested(struct kvm_nested_guest *gp) 807 { 808 struct kvm *kvm = gp->l1_host; 809 long ref; 810 811 spin_lock(&kvm->mmu_lock); 812 ref = --gp->refcnt; 813 spin_unlock(&kvm->mmu_lock); 814 if (ref == 0) 815 kvmhv_release_nested(gp); 816 } 817 818 static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) 819 { 820 if (lpid > kvm->arch.max_nested_lpid) 821 return NULL; 822 return kvm->arch.nested_guests[lpid]; 823 } 824 825 pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, 826 unsigned long ea, unsigned *hshift) 827 { 828 struct kvm_nested_guest *gp; 829 pte_t *pte; 830 831 gp = kvmhv_find_nested(kvm, lpid); 832 if (!gp) 833 return NULL; 834 835 VM_WARN(!spin_is_locked(&kvm->mmu_lock), 836 "%s called with kvm mmu_lock not held \n", __func__); 837 pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); 838 839 return pte; 840 } 841 842 static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) 843 { 844 return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | 845 RMAP_NESTED_GPA_MASK)); 846 } 847 848 void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, 849 struct rmap_nested **n_rmap) 850 { 851 struct llist_node *entry = ((struct llist_head *) rmapp)->first; 852 struct rmap_nested *cursor; 853 u64 rmap, new_rmap = (*n_rmap)->rmap; 854 855 /* Are there any existing entries? */ 856 if (!(*rmapp)) { 857 /* No -> use the rmap as a single entry */ 858 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY; 859 return; 860 } 861 862 /* Do any entries match what we're trying to insert? */ 863 for_each_nest_rmap_safe(cursor, entry, &rmap) { 864 if (kvmhv_n_rmap_is_equal(rmap, new_rmap)) 865 return; 866 } 867 868 /* Do we need to create a list or just add the new entry? */ 869 rmap = *rmapp; 870 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ 871 *rmapp = 0UL; 872 llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp); 873 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */ 874 (*n_rmap)->list.next = (struct llist_node *) rmap; 875 876 /* Set NULL so not freed by caller */ 877 *n_rmap = NULL; 878 } 879 880 static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, 881 unsigned long clr, unsigned long set, 882 unsigned long hpa, unsigned long mask) 883 { 884 unsigned long gpa; 885 unsigned int shift, lpid; 886 pte_t *ptep; 887 888 gpa = n_rmap & RMAP_NESTED_GPA_MASK; 889 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; 890 891 /* Find the pte */ 892 ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); 893 /* 894 * If the pte is present and the pfn is still the same, update the pte. 895 * If the pfn has changed then this is a stale rmap entry, the nested 896 * gpa actually points somewhere else now, and there is nothing to do. 897 * XXX A future optimisation would be to remove the rmap entry here. 898 */ 899 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) { 900 __radix_pte_update(ptep, clr, set); 901 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid); 902 } 903 } 904 905 /* 906 * For a given list of rmap entries, update the rc bits in all ptes in shadow 907 * page tables for nested guests which are referenced by the rmap list. 908 */ 909 void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp, 910 unsigned long clr, unsigned long set, 911 unsigned long hpa, unsigned long nbytes) 912 { 913 struct llist_node *entry = ((struct llist_head *) rmapp)->first; 914 struct rmap_nested *cursor; 915 unsigned long rmap, mask; 916 917 if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED)) 918 return; 919 920 mask = PTE_RPN_MASK & ~(nbytes - 1); 921 hpa &= mask; 922 923 for_each_nest_rmap_safe(cursor, entry, &rmap) 924 kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask); 925 } 926 927 static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, 928 unsigned long hpa, unsigned long mask) 929 { 930 struct kvm_nested_guest *gp; 931 unsigned long gpa; 932 unsigned int shift, lpid; 933 pte_t *ptep; 934 935 gpa = n_rmap & RMAP_NESTED_GPA_MASK; 936 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; 937 gp = kvmhv_find_nested(kvm, lpid); 938 if (!gp) 939 return; 940 941 /* Find and invalidate the pte */ 942 ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); 943 /* Don't spuriously invalidate ptes if the pfn has changed */ 944 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) 945 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); 946 } 947 948 static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, 949 unsigned long hpa, unsigned long mask) 950 { 951 struct llist_node *entry = llist_del_all((struct llist_head *) rmapp); 952 struct rmap_nested *cursor; 953 unsigned long rmap; 954 955 for_each_nest_rmap_safe(cursor, entry, &rmap) { 956 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask); 957 kfree(cursor); 958 } 959 } 960 961 /* called with kvm->mmu_lock held */ 962 void kvmhv_remove_nest_rmap_range(struct kvm *kvm, 963 const struct kvm_memory_slot *memslot, 964 unsigned long gpa, unsigned long hpa, 965 unsigned long nbytes) 966 { 967 unsigned long gfn, end_gfn; 968 unsigned long addr_mask; 969 970 if (!memslot) 971 return; 972 gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn; 973 end_gfn = gfn + (nbytes >> PAGE_SHIFT); 974 975 addr_mask = PTE_RPN_MASK & ~(nbytes - 1); 976 hpa &= addr_mask; 977 978 for (; gfn < end_gfn; gfn++) { 979 unsigned long *rmap = &memslot->arch.rmap[gfn]; 980 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask); 981 } 982 } 983 984 static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free) 985 { 986 unsigned long page; 987 988 for (page = 0; page < free->npages; page++) { 989 unsigned long rmap, *rmapp = &free->arch.rmap[page]; 990 struct rmap_nested *cursor; 991 struct llist_node *entry; 992 993 entry = llist_del_all((struct llist_head *) rmapp); 994 for_each_nest_rmap_safe(cursor, entry, &rmap) 995 kfree(cursor); 996 } 997 } 998 999 static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, 1000 struct kvm_nested_guest *gp, 1001 long gpa, int *shift_ret) 1002 { 1003 struct kvm *kvm = vcpu->kvm; 1004 bool ret = false; 1005 pte_t *ptep; 1006 int shift; 1007 1008 spin_lock(&kvm->mmu_lock); 1009 ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); 1010 if (!shift) 1011 shift = PAGE_SHIFT; 1012 if (ptep && pte_present(*ptep)) { 1013 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); 1014 ret = true; 1015 } 1016 spin_unlock(&kvm->mmu_lock); 1017 1018 if (shift_ret) 1019 *shift_ret = shift; 1020 return ret; 1021 } 1022 1023 static inline int get_ric(unsigned int instr) 1024 { 1025 return (instr >> 18) & 0x3; 1026 } 1027 1028 static inline int get_prs(unsigned int instr) 1029 { 1030 return (instr >> 17) & 0x1; 1031 } 1032 1033 static inline int get_r(unsigned int instr) 1034 { 1035 return (instr >> 16) & 0x1; 1036 } 1037 1038 static inline int get_lpid(unsigned long r_val) 1039 { 1040 return r_val & 0xffffffff; 1041 } 1042 1043 static inline int get_is(unsigned long r_val) 1044 { 1045 return (r_val >> 10) & 0x3; 1046 } 1047 1048 static inline int get_ap(unsigned long r_val) 1049 { 1050 return (r_val >> 5) & 0x7; 1051 } 1052 1053 static inline long get_epn(unsigned long r_val) 1054 { 1055 return r_val >> 12; 1056 } 1057 1058 static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid, 1059 int ap, long epn) 1060 { 1061 struct kvm *kvm = vcpu->kvm; 1062 struct kvm_nested_guest *gp; 1063 long npages; 1064 int shift, shadow_shift; 1065 unsigned long addr; 1066 1067 shift = ap_to_shift(ap); 1068 addr = epn << 12; 1069 if (shift < 0) 1070 /* Invalid ap encoding */ 1071 return -EINVAL; 1072 1073 addr &= ~((1UL << shift) - 1); 1074 npages = 1UL << (shift - PAGE_SHIFT); 1075 1076 gp = kvmhv_get_nested(kvm, lpid, false); 1077 if (!gp) /* No such guest -> nothing to do */ 1078 return 0; 1079 mutex_lock(&gp->tlb_lock); 1080 1081 /* There may be more than one host page backing this single guest pte */ 1082 do { 1083 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift); 1084 1085 npages -= 1UL << (shadow_shift - PAGE_SHIFT); 1086 addr += 1UL << shadow_shift; 1087 } while (npages > 0); 1088 1089 mutex_unlock(&gp->tlb_lock); 1090 kvmhv_put_nested(gp); 1091 return 0; 1092 } 1093 1094 static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu, 1095 struct kvm_nested_guest *gp, int ric) 1096 { 1097 struct kvm *kvm = vcpu->kvm; 1098 1099 mutex_lock(&gp->tlb_lock); 1100 switch (ric) { 1101 case 0: 1102 /* Invalidate TLB */ 1103 spin_lock(&kvm->mmu_lock); 1104 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, 1105 gp->shadow_lpid); 1106 kvmhv_flush_lpid(gp->shadow_lpid); 1107 spin_unlock(&kvm->mmu_lock); 1108 break; 1109 case 1: 1110 /* 1111 * Invalidate PWC 1112 * We don't cache this -> nothing to do 1113 */ 1114 break; 1115 case 2: 1116 /* Invalidate TLB, PWC and caching of partition table entries */ 1117 kvmhv_flush_nested(gp); 1118 break; 1119 default: 1120 break; 1121 } 1122 mutex_unlock(&gp->tlb_lock); 1123 } 1124 1125 static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric) 1126 { 1127 struct kvm *kvm = vcpu->kvm; 1128 struct kvm_nested_guest *gp; 1129 int i; 1130 1131 spin_lock(&kvm->mmu_lock); 1132 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { 1133 gp = kvm->arch.nested_guests[i]; 1134 if (gp) { 1135 spin_unlock(&kvm->mmu_lock); 1136 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); 1137 spin_lock(&kvm->mmu_lock); 1138 } 1139 } 1140 spin_unlock(&kvm->mmu_lock); 1141 } 1142 1143 static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr, 1144 unsigned long rsval, unsigned long rbval) 1145 { 1146 struct kvm *kvm = vcpu->kvm; 1147 struct kvm_nested_guest *gp; 1148 int r, ric, prs, is, ap; 1149 int lpid; 1150 long epn; 1151 int ret = 0; 1152 1153 ric = get_ric(instr); 1154 prs = get_prs(instr); 1155 r = get_r(instr); 1156 lpid = get_lpid(rsval); 1157 is = get_is(rbval); 1158 1159 /* 1160 * These cases are invalid and are not handled: 1161 * r != 1 -> Only radix supported 1162 * prs == 1 -> Not HV privileged 1163 * ric == 3 -> No cluster bombs for radix 1164 * is == 1 -> Partition scoped translations not associated with pid 1165 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA 1166 */ 1167 if ((!r) || (prs) || (ric == 3) || (is == 1) || 1168 ((!is) && (ric == 1 || ric == 2))) 1169 return -EINVAL; 1170 1171 switch (is) { 1172 case 0: 1173 /* 1174 * We know ric == 0 1175 * Invalidate TLB for a given target address 1176 */ 1177 epn = get_epn(rbval); 1178 ap = get_ap(rbval); 1179 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn); 1180 break; 1181 case 2: 1182 /* Invalidate matching LPID */ 1183 gp = kvmhv_get_nested(kvm, lpid, false); 1184 if (gp) { 1185 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric); 1186 kvmhv_put_nested(gp); 1187 } 1188 break; 1189 case 3: 1190 /* Invalidate ALL LPIDs */ 1191 kvmhv_emulate_tlbie_all_lpid(vcpu, ric); 1192 break; 1193 default: 1194 ret = -EINVAL; 1195 break; 1196 } 1197 1198 return ret; 1199 } 1200 1201 /* 1202 * This handles the H_TLB_INVALIDATE hcall. 1203 * Parameters are (r4) tlbie instruction code, (r5) rS contents, 1204 * (r6) rB contents. 1205 */ 1206 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu) 1207 { 1208 int ret; 1209 1210 ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4), 1211 kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6)); 1212 if (ret) 1213 return H_PARAMETER; 1214 return H_SUCCESS; 1215 } 1216 1217 /* Used to convert a nested guest real address to a L1 guest real address */ 1218 static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, 1219 struct kvm_nested_guest *gp, 1220 unsigned long n_gpa, unsigned long dsisr, 1221 struct kvmppc_pte *gpte_p) 1222 { 1223 u64 fault_addr, flags = dsisr & DSISR_ISSTORE; 1224 int ret; 1225 1226 ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr, 1227 &fault_addr); 1228 1229 if (ret) { 1230 /* We didn't find a pte */ 1231 if (ret == -EINVAL) { 1232 /* Unsupported mmu config */ 1233 flags |= DSISR_UNSUPP_MMU; 1234 } else if (ret == -ENOENT) { 1235 /* No translation found */ 1236 flags |= DSISR_NOHPTE; 1237 } else if (ret == -EFAULT) { 1238 /* Couldn't access L1 real address */ 1239 flags |= DSISR_PRTABLE_FAULT; 1240 vcpu->arch.fault_gpa = fault_addr; 1241 } else { 1242 /* Unknown error */ 1243 return ret; 1244 } 1245 goto forward_to_l1; 1246 } else { 1247 /* We found a pte -> check permissions */ 1248 if (dsisr & DSISR_ISSTORE) { 1249 /* Can we write? */ 1250 if (!gpte_p->may_write) { 1251 flags |= DSISR_PROTFAULT; 1252 goto forward_to_l1; 1253 } 1254 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { 1255 /* Can we execute? */ 1256 if (!gpte_p->may_execute) { 1257 flags |= SRR1_ISI_N_G_OR_CIP; 1258 goto forward_to_l1; 1259 } 1260 } else { 1261 /* Can we read? */ 1262 if (!gpte_p->may_read && !gpte_p->may_write) { 1263 flags |= DSISR_PROTFAULT; 1264 goto forward_to_l1; 1265 } 1266 } 1267 } 1268 1269 return 0; 1270 1271 forward_to_l1: 1272 vcpu->arch.fault_dsisr = flags; 1273 if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { 1274 vcpu->arch.shregs.msr &= SRR1_MSR_BITS; 1275 vcpu->arch.shregs.msr |= flags; 1276 } 1277 return RESUME_HOST; 1278 } 1279 1280 static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, 1281 struct kvm_nested_guest *gp, 1282 unsigned long n_gpa, 1283 struct kvmppc_pte gpte, 1284 unsigned long dsisr) 1285 { 1286 struct kvm *kvm = vcpu->kvm; 1287 bool writing = !!(dsisr & DSISR_ISSTORE); 1288 u64 pgflags; 1289 long ret; 1290 1291 /* Are the rc bits set in the L1 partition scoped pte? */ 1292 pgflags = _PAGE_ACCESSED; 1293 if (writing) 1294 pgflags |= _PAGE_DIRTY; 1295 if (pgflags & ~gpte.rc) 1296 return RESUME_HOST; 1297 1298 spin_lock(&kvm->mmu_lock); 1299 /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ 1300 ret = kvmppc_hv_handle_set_rc(kvm, false, writing, 1301 gpte.raddr, kvm->arch.lpid); 1302 if (!ret) { 1303 ret = -EINVAL; 1304 goto out_unlock; 1305 } 1306 1307 /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ 1308 ret = kvmppc_hv_handle_set_rc(kvm, true, writing, 1309 n_gpa, gp->l1_lpid); 1310 if (!ret) 1311 ret = -EINVAL; 1312 else 1313 ret = 0; 1314 1315 out_unlock: 1316 spin_unlock(&kvm->mmu_lock); 1317 return ret; 1318 } 1319 1320 static inline int kvmppc_radix_level_to_shift(int level) 1321 { 1322 switch (level) { 1323 case 2: 1324 return PUD_SHIFT; 1325 case 1: 1326 return PMD_SHIFT; 1327 default: 1328 return PAGE_SHIFT; 1329 } 1330 } 1331 1332 static inline int kvmppc_radix_shift_to_level(int shift) 1333 { 1334 if (shift == PUD_SHIFT) 1335 return 2; 1336 if (shift == PMD_SHIFT) 1337 return 1; 1338 if (shift == PAGE_SHIFT) 1339 return 0; 1340 WARN_ON_ONCE(1); 1341 return 0; 1342 } 1343 1344 /* called with gp->tlb_lock held */ 1345 static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, 1346 struct kvm_nested_guest *gp) 1347 { 1348 struct kvm *kvm = vcpu->kvm; 1349 struct kvm_memory_slot *memslot; 1350 struct rmap_nested *n_rmap; 1351 struct kvmppc_pte gpte; 1352 pte_t pte, *pte_p; 1353 unsigned long mmu_seq; 1354 unsigned long dsisr = vcpu->arch.fault_dsisr; 1355 unsigned long ea = vcpu->arch.fault_dar; 1356 unsigned long *rmapp; 1357 unsigned long n_gpa, gpa, gfn, perm = 0UL; 1358 unsigned int shift, l1_shift, level; 1359 bool writing = !!(dsisr & DSISR_ISSTORE); 1360 bool kvm_ro = false; 1361 long int ret; 1362 1363 if (!gp->l1_gr_to_hr) { 1364 kvmhv_update_ptbl_cache(gp); 1365 if (!gp->l1_gr_to_hr) 1366 return RESUME_HOST; 1367 } 1368 1369 /* Convert the nested guest real address into a L1 guest real address */ 1370 1371 n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL; 1372 if (!(dsisr & DSISR_PRTABLE_FAULT)) 1373 n_gpa |= ea & 0xFFF; 1374 ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte); 1375 1376 /* 1377 * If the hardware found a translation but we don't now have a usable 1378 * translation in the l1 partition-scoped tree, remove the shadow pte 1379 * and let the guest retry. 1380 */ 1381 if (ret == RESUME_HOST && 1382 (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G | 1383 DSISR_BAD_COPYPASTE))) 1384 goto inval; 1385 if (ret) 1386 return ret; 1387 1388 /* Failed to set the reference/change bits */ 1389 if (dsisr & DSISR_SET_RC) { 1390 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr); 1391 if (ret == RESUME_HOST) 1392 return ret; 1393 if (ret) 1394 goto inval; 1395 dsisr &= ~DSISR_SET_RC; 1396 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE | 1397 DSISR_PROTFAULT))) 1398 return RESUME_GUEST; 1399 } 1400 1401 /* 1402 * We took an HISI or HDSI while we were running a nested guest which 1403 * means we have no partition scoped translation for that. This means 1404 * we need to insert a pte for the mapping into our shadow_pgtable. 1405 */ 1406 1407 l1_shift = gpte.page_shift; 1408 if (l1_shift < PAGE_SHIFT) { 1409 /* We don't support l1 using a page size smaller than our own */ 1410 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n", 1411 l1_shift, PAGE_SHIFT); 1412 return -EINVAL; 1413 } 1414 gpa = gpte.raddr; 1415 gfn = gpa >> PAGE_SHIFT; 1416 1417 /* 1. Get the corresponding host memslot */ 1418 1419 memslot = gfn_to_memslot(kvm, gfn); 1420 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 1421 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) { 1422 /* unusual error -> reflect to the guest as a DSI */ 1423 kvmppc_core_queue_data_storage(vcpu, ea, dsisr); 1424 return RESUME_GUEST; 1425 } 1426 1427 /* passthrough of emulated MMIO case */ 1428 return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing); 1429 } 1430 if (memslot->flags & KVM_MEM_READONLY) { 1431 if (writing) { 1432 /* Give the guest a DSI */ 1433 kvmppc_core_queue_data_storage(vcpu, ea, 1434 DSISR_ISSTORE | DSISR_PROTFAULT); 1435 return RESUME_GUEST; 1436 } 1437 kvm_ro = true; 1438 } 1439 1440 /* 2. Find the host pte for this L1 guest real address */ 1441 1442 /* Used to check for invalidations in progress */ 1443 mmu_seq = kvm->mmu_notifier_seq; 1444 smp_rmb(); 1445 1446 /* See if can find translation in our partition scoped tables for L1 */ 1447 pte = __pte(0); 1448 spin_lock(&kvm->mmu_lock); 1449 pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); 1450 if (!shift) 1451 shift = PAGE_SHIFT; 1452 if (pte_p) 1453 pte = *pte_p; 1454 spin_unlock(&kvm->mmu_lock); 1455 1456 if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) { 1457 /* No suitable pte found -> try to insert a mapping */ 1458 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, 1459 writing, kvm_ro, &pte, &level); 1460 if (ret == -EAGAIN) 1461 return RESUME_GUEST; 1462 else if (ret) 1463 return ret; 1464 shift = kvmppc_radix_level_to_shift(level); 1465 } 1466 /* Align gfn to the start of the page */ 1467 gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT; 1468 1469 /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */ 1470 1471 /* The permissions is the combination of the host and l1 guest ptes */ 1472 perm |= gpte.may_read ? 0UL : _PAGE_READ; 1473 perm |= gpte.may_write ? 0UL : _PAGE_WRITE; 1474 perm |= gpte.may_execute ? 0UL : _PAGE_EXEC; 1475 /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */ 1476 perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED; 1477 perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY; 1478 pte = __pte(pte_val(pte) & ~perm); 1479 1480 /* What size pte can we insert? */ 1481 if (shift > l1_shift) { 1482 u64 mask; 1483 unsigned int actual_shift = PAGE_SHIFT; 1484 if (PMD_SHIFT < l1_shift) 1485 actual_shift = PMD_SHIFT; 1486 mask = (1UL << shift) - (1UL << actual_shift); 1487 pte = __pte(pte_val(pte) | (gpa & mask)); 1488 shift = actual_shift; 1489 } 1490 level = kvmppc_radix_shift_to_level(shift); 1491 n_gpa &= ~((1UL << shift) - 1); 1492 1493 /* 4. Insert the pte into our shadow_pgtable */ 1494 1495 n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL); 1496 if (!n_rmap) 1497 return RESUME_GUEST; /* Let the guest try again */ 1498 n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) | 1499 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT); 1500 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; 1501 ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level, 1502 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap); 1503 kfree(n_rmap); 1504 if (ret == -EAGAIN) 1505 ret = RESUME_GUEST; /* Let the guest try again */ 1506 1507 return ret; 1508 1509 inval: 1510 kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL); 1511 return RESUME_GUEST; 1512 } 1513 1514 long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) 1515 { 1516 struct kvm_nested_guest *gp = vcpu->arch.nested; 1517 long int ret; 1518 1519 mutex_lock(&gp->tlb_lock); 1520 ret = __kvmhv_nested_page_fault(vcpu, gp); 1521 mutex_unlock(&gp->tlb_lock); 1522 return ret; 1523 } 1524 1525 int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid) 1526 { 1527 int ret = -1; 1528 1529 spin_lock(&kvm->mmu_lock); 1530 while (++lpid <= kvm->arch.max_nested_lpid) { 1531 if (kvm->arch.nested_guests[lpid]) { 1532 ret = lpid; 1533 break; 1534 } 1535 } 1536 spin_unlock(&kvm->mmu_lock); 1537 return ret; 1538 } 1539