1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <linux/vmalloc.h> 23 #include <linux/sched.h> 24 #include <linux/module.h> 25 #include <linux/workqueue.h> 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 #include <linux/vmw_vmci_defs.h> 29 #include <linux/vmw_vmci_api.h> 30 #include <asm/hypervisor.h> 31 32 MODULE_AUTHOR("VMware, Inc."); 33 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 34 MODULE_VERSION("1.5.0.0-k"); 35 MODULE_ALIAS("dmi:*:svnVMware*:*"); 36 MODULE_ALIAS("vmware_vmmemctl"); 37 MODULE_LICENSE("GPL"); 38 39 /* 40 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait 41 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page 42 * allocation failure warnings. Disallow access to emergency low-memory pools. 43 */ 44 #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 45 __GFP_NOMEMALLOC) 46 47 /* 48 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight 49 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation 50 * failure warnings. Disallow access to emergency low-memory pools. 51 */ 52 #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 53 __GFP_NOMEMALLOC|__GFP_NORETRY) 54 55 /* Maximum number of refused pages we accumulate during inflation cycle */ 56 #define VMW_BALLOON_MAX_REFUSED 16 57 58 /* 59 * Hypervisor communication port definitions. 60 */ 61 #define VMW_BALLOON_HV_PORT 0x5670 62 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 63 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 64 65 enum vmwballoon_capabilities { 66 /* 67 * Bit 0 is reserved and not associated to any capability. 68 */ 69 VMW_BALLOON_BASIC_CMDS = (1 << 1), 70 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 71 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 72 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 73 }; 74 75 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 76 | VMW_BALLOON_BATCHED_CMDS \ 77 | VMW_BALLOON_BATCHED_2M_CMDS \ 78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 79 80 #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) 81 #define VMW_BALLOON_NUM_PAGE_SIZES (2) 82 83 /* 84 * Backdoor commands availability: 85 * 86 * START, GET_TARGET and GUEST_ID are always available, 87 * 88 * VMW_BALLOON_BASIC_CMDS: 89 * LOCK and UNLOCK commands, 90 * VMW_BALLOON_BATCHED_CMDS: 91 * BATCHED_LOCK and BATCHED_UNLOCK commands. 92 * VMW BALLOON_BATCHED_2M_CMDS: 93 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands, 94 * VMW VMW_BALLOON_SIGNALLED_WAKEUP_CMD: 95 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command. 96 */ 97 #define VMW_BALLOON_CMD_START 0 98 #define VMW_BALLOON_CMD_GET_TARGET 1 99 #define VMW_BALLOON_CMD_LOCK 2 100 #define VMW_BALLOON_CMD_UNLOCK 3 101 #define VMW_BALLOON_CMD_GUEST_ID 4 102 #define VMW_BALLOON_CMD_BATCHED_LOCK 6 103 #define VMW_BALLOON_CMD_BATCHED_UNLOCK 7 104 #define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8 105 #define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9 106 #define VMW_BALLOON_CMD_VMCI_DOORBELL_SET 10 107 108 #define VMW_BALLOON_CMD_NUM 11 109 110 /* error codes */ 111 #define VMW_BALLOON_SUCCESS 0 112 #define VMW_BALLOON_FAILURE -1 113 #define VMW_BALLOON_ERROR_CMD_INVALID 1 114 #define VMW_BALLOON_ERROR_PPN_INVALID 2 115 #define VMW_BALLOON_ERROR_PPN_LOCKED 3 116 #define VMW_BALLOON_ERROR_PPN_UNLOCKED 4 117 #define VMW_BALLOON_ERROR_PPN_PINNED 5 118 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6 119 #define VMW_BALLOON_ERROR_RESET 7 120 #define VMW_BALLOON_ERROR_BUSY 8 121 122 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 123 124 #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ 125 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ 126 (1UL << VMW_BALLOON_CMD_LOCK) | \ 127 (1UL << VMW_BALLOON_CMD_UNLOCK) | \ 128 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ 129 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ 130 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ 131 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) 132 133 static const char * const vmballoon_cmd_names[] = { 134 [VMW_BALLOON_CMD_START] = "start", 135 [VMW_BALLOON_CMD_GET_TARGET] = "target", 136 [VMW_BALLOON_CMD_LOCK] = "lock", 137 [VMW_BALLOON_CMD_UNLOCK] = "unlock", 138 [VMW_BALLOON_CMD_GUEST_ID] = "guestType", 139 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", 140 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", 141 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", 142 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", 143 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" 144 }; 145 146 #ifdef CONFIG_DEBUG_FS 147 struct vmballoon_stats { 148 unsigned int timer; 149 unsigned int doorbell; 150 151 /* allocation statistics */ 152 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 153 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES]; 154 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES]; 155 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES]; 156 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES]; 157 158 /* Monitor operations. */ 159 unsigned long ops[VMW_BALLOON_CMD_NUM]; 160 unsigned long ops_fail[VMW_BALLOON_CMD_NUM]; 161 }; 162 163 #define STATS_INC(stat) (stat)++ 164 #else 165 #define STATS_INC(stat) 166 #endif 167 168 static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); 169 170 struct vmballoon_page_size { 171 /* list of reserved physical pages */ 172 struct list_head pages; 173 174 /* transient list of non-balloonable pages */ 175 struct list_head refused_pages; 176 unsigned int n_refused_pages; 177 }; 178 179 /** 180 * struct vmballoon_batch_entry - a batch entry for lock or unlock. 181 * 182 * @status: the status of the operation, which is written by the hypervisor. 183 * @reserved: reserved for future use. Must be set to zero. 184 * @pfn: the physical frame number of the page to be locked or unlocked. 185 */ 186 struct vmballoon_batch_entry { 187 u64 status : 5; 188 u64 reserved : PAGE_SHIFT - 5; 189 u64 pfn : 52; 190 } __packed; 191 192 struct vmballoon { 193 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 194 195 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */ 196 unsigned supported_page_sizes; 197 198 /* balloon size in pages */ 199 unsigned int size; 200 unsigned int target; 201 202 /* reset flag */ 203 bool reset_required; 204 205 unsigned long capabilities; 206 207 /** 208 * @batch_page: pointer to communication batch page. 209 * 210 * When batching is used, batch_page points to a page, which holds up to 211 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. 212 */ 213 struct vmballoon_batch_entry *batch_page; 214 215 unsigned int batch_max_pages; 216 struct page *page; 217 218 #ifdef CONFIG_DEBUG_FS 219 /* statistics */ 220 struct vmballoon_stats stats; 221 222 /* debugfs file exporting statistics */ 223 struct dentry *dbg_entry; 224 #endif 225 226 struct sysinfo sysinfo; 227 228 struct delayed_work dwork; 229 230 struct vmci_handle vmci_doorbell; 231 }; 232 233 static struct vmballoon balloon; 234 235 static inline unsigned long 236 __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 237 unsigned long arg2, unsigned long *result) 238 { 239 unsigned long status, dummy1, dummy2, dummy3, local_result; 240 241 STATS_INC(b->stats.ops[cmd]); 242 243 asm volatile ("inl %%dx" : 244 "=a"(status), 245 "=c"(dummy1), 246 "=d"(dummy2), 247 "=b"(local_result), 248 "=S"(dummy3) : 249 "0"(VMW_BALLOON_HV_MAGIC), 250 "1"(cmd), 251 "2"(VMW_BALLOON_HV_PORT), 252 "3"(arg1), 253 "4"(arg2) : 254 "memory"); 255 256 /* update the result if needed */ 257 if (result) 258 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : 259 local_result; 260 261 /* update target when applicable */ 262 if (status == VMW_BALLOON_SUCCESS && 263 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) 264 b->target = local_result; 265 266 if (status != VMW_BALLOON_SUCCESS && 267 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { 268 STATS_INC(b->stats.ops_fail[cmd]); 269 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", 270 __func__, vmballoon_cmd_names[cmd], arg1, arg2, 271 status); 272 } 273 274 /* mark reset required accordingly */ 275 if (status == VMW_BALLOON_ERROR_RESET) 276 b->reset_required = true; 277 278 return status; 279 } 280 281 static __always_inline unsigned long 282 vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 283 unsigned long arg2) 284 { 285 unsigned long dummy; 286 287 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); 288 } 289 290 /* 291 * Send "start" command to the host, communicating supported version 292 * of the protocol. 293 */ 294 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 295 { 296 unsigned long status, capabilities; 297 bool success; 298 299 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, 300 &capabilities); 301 302 switch (status) { 303 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 304 b->capabilities = capabilities; 305 success = true; 306 break; 307 case VMW_BALLOON_SUCCESS: 308 b->capabilities = VMW_BALLOON_BASIC_CMDS; 309 success = true; 310 break; 311 default: 312 success = false; 313 } 314 315 /* 316 * 2MB pages are only supported with batching. If batching is for some 317 * reason disabled, do not use 2MB pages, since otherwise the legacy 318 * mechanism is used with 2MB pages, causing a failure. 319 */ 320 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 321 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 322 b->supported_page_sizes = 2; 323 else 324 b->supported_page_sizes = 1; 325 326 return success; 327 } 328 329 /* 330 * Communicate guest type to the host so that it can adjust ballooning 331 * algorithm to the one most appropriate for the guest. This command 332 * is normally issued after sending "start" command and is part of 333 * standard reset sequence. 334 */ 335 static bool vmballoon_send_guest_id(struct vmballoon *b) 336 { 337 unsigned long status; 338 339 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, 340 VMW_BALLOON_GUEST_ID, 0); 341 342 if (status == VMW_BALLOON_SUCCESS) 343 return true; 344 345 return false; 346 } 347 348 static u16 vmballoon_page_size(bool is_2m_page) 349 { 350 if (is_2m_page) 351 return 1 << VMW_BALLOON_2M_ORDER; 352 353 return 1; 354 } 355 356 /* 357 * Retrieve desired balloon size from the host. 358 */ 359 static bool vmballoon_send_get_target(struct vmballoon *b) 360 { 361 unsigned long status; 362 unsigned long limit; 363 u32 limit32; 364 365 /* 366 * si_meminfo() is cheap. Moreover, we want to provide dynamic 367 * max balloon size later. So let us call si_meminfo() every 368 * iteration. 369 */ 370 si_meminfo(&b->sysinfo); 371 limit = b->sysinfo.totalram; 372 373 /* Ensure limit fits in 32-bits */ 374 limit32 = (u32)limit; 375 if (limit != limit32) 376 return false; 377 378 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); 379 380 if (status == VMW_BALLOON_SUCCESS) 381 return true; 382 383 return false; 384 } 385 386 static struct page *vmballoon_alloc_page(bool is_2m_page) 387 { 388 if (is_2m_page) 389 return alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS, 390 VMW_BALLOON_2M_ORDER); 391 392 return alloc_page(VMW_PAGE_ALLOC_FLAGS); 393 } 394 395 static void vmballoon_free_page(struct page *page, bool is_2m_page) 396 { 397 if (is_2m_page) 398 __free_pages(page, VMW_BALLOON_2M_ORDER); 399 else 400 __free_page(page); 401 } 402 403 /* 404 * Quickly release all pages allocated for the balloon. This function is 405 * called when host decides to "reset" balloon for one reason or another. 406 * Unlike normal "deflate" we do not (shall not) notify host of the pages 407 * being released. 408 */ 409 static void vmballoon_pop(struct vmballoon *b) 410 { 411 struct page *page, *next; 412 unsigned is_2m_pages; 413 414 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 415 is_2m_pages++) { 416 struct vmballoon_page_size *page_size = 417 &b->page_sizes[is_2m_pages]; 418 u16 size_per_page = vmballoon_page_size(is_2m_pages); 419 420 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 421 list_del(&page->lru); 422 vmballoon_free_page(page, is_2m_pages); 423 STATS_INC(b->stats.free[is_2m_pages]); 424 b->size -= size_per_page; 425 cond_resched(); 426 } 427 } 428 429 /* Clearing the batch_page unconditionally has no adverse effect */ 430 free_page((unsigned long)b->batch_page); 431 b->batch_page = NULL; 432 } 433 434 /** 435 * vmballoon_status_page - returns the status of (un)lock operation 436 * 437 * @b: pointer to the balloon. 438 * @idx: index for the page for which the operation is performed. 439 * @p: pointer to where the page struct is returned. 440 * 441 * Following a lock or unlock operation, returns the status of the operation for 442 * an individual page. Provides the page that the operation was performed on on 443 * the @page argument. 444 * 445 * Returns: The status of a lock or unlock operation for an individual page. 446 */ 447 static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, 448 struct page **p) 449 { 450 if (static_branch_likely(&vmw_balloon_batching)) { 451 /* batching mode */ 452 *p = pfn_to_page(b->batch_page[idx].pfn); 453 return b->batch_page[idx].status; 454 } 455 456 /* non-batching mode */ 457 *p = b->page; 458 459 /* 460 * If a failure occurs, the indication will be provided in the status 461 * of the entire operation, which is considered before the individual 462 * page status. So for non-batching mode, the indication is always of 463 * success. 464 */ 465 return VMW_BALLOON_SUCCESS; 466 } 467 468 /** 469 * vmballoon_lock_op - notifies the host about inflated/deflated pages. 470 * @b: pointer to the balloon. 471 * @num_pages: number of inflated/deflated pages. 472 * @is_2m_pages: whether the page(s) are 2M (or 4k). 473 * @lock: whether the operation is lock (or unlock). 474 * 475 * Notify the host about page(s) that were ballooned (or removed from the 476 * balloon) so that host can use it without fear that guest will need it (or 477 * stop using them since the VM does). Host may reject some pages, we need to 478 * check the return value and maybe submit a different page. The pages that are 479 * inflated/deflated are pointed by @b->page. 480 * 481 * Return: result as provided by the hypervisor. 482 */ 483 static unsigned long vmballoon_lock_op(struct vmballoon *b, 484 unsigned int num_pages, 485 bool is_2m_pages, bool lock) 486 { 487 unsigned long cmd, pfn; 488 489 if (static_branch_likely(&vmw_balloon_batching)) { 490 if (lock) 491 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK : 492 VMW_BALLOON_CMD_BATCHED_LOCK; 493 else 494 cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : 495 VMW_BALLOON_CMD_BATCHED_UNLOCK; 496 497 pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 498 } else { 499 cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK; 500 pfn = page_to_pfn(b->page); 501 502 /* In non-batching mode, PFNs must fit in 32-bit */ 503 if (unlikely(pfn != (u32)pfn)) 504 return VMW_BALLOON_ERROR_PPN_INVALID; 505 } 506 507 return vmballoon_cmd(b, cmd, pfn, num_pages); 508 } 509 510 static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages, 511 bool is_2m_pages) 512 { 513 unsigned long batch_status; 514 int i; 515 u16 size_per_page = vmballoon_page_size(is_2m_pages); 516 517 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true); 518 519 for (i = 0; i < num_pages; i++) { 520 unsigned long status; 521 struct page *p; 522 struct vmballoon_page_size *page_size = 523 &b->page_sizes[is_2m_pages]; 524 525 status = vmballoon_status_page(b, i, &p); 526 527 /* 528 * Failure of the whole batch overrides a single operation 529 * results. 530 */ 531 if (batch_status != VMW_BALLOON_SUCCESS) 532 status = batch_status; 533 534 if (status == VMW_BALLOON_SUCCESS) { 535 /* track allocated page */ 536 list_add(&p->lru, &page_size->pages); 537 538 /* update balloon size */ 539 b->size += size_per_page; 540 continue; 541 } 542 543 /* Error occurred */ 544 STATS_INC(b->stats.refused_alloc[is_2m_pages]); 545 546 /* 547 * Place page on the list of non-balloonable pages 548 * and retry allocation, unless we already accumulated 549 * too many of them, in which case take a breather. 550 */ 551 list_add(&p->lru, &page_size->refused_pages); 552 page_size->n_refused_pages++; 553 } 554 555 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 556 } 557 558 /* 559 * Release the page allocated for the balloon. Note that we first notify 560 * the host so it can make sure the page will be available for the guest 561 * to use, if needed. 562 */ 563 static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages, 564 bool is_2m_pages) 565 { 566 int i; 567 unsigned long batch_status; 568 u16 size_per_page = vmballoon_page_size(is_2m_pages); 569 570 batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false); 571 572 for (i = 0; i < num_pages; i++) { 573 struct vmballoon_page_size *page_size; 574 unsigned long status; 575 struct page *p; 576 577 status = vmballoon_status_page(b, i, &p); 578 page_size = &b->page_sizes[is_2m_pages]; 579 580 /* 581 * Failure of the whole batch overrides a single operation 582 * results. 583 */ 584 if (batch_status != VMW_BALLOON_SUCCESS) 585 status = batch_status; 586 587 if (status != VMW_BALLOON_SUCCESS) { 588 /* 589 * That page wasn't successfully unlocked by the 590 * hypervisor, re-add it to the list of pages owned by 591 * the balloon driver. 592 */ 593 list_add(&p->lru, &page_size->pages); 594 } else { 595 /* deallocate page */ 596 vmballoon_free_page(p, is_2m_pages); 597 STATS_INC(b->stats.free[is_2m_pages]); 598 599 /* update balloon size */ 600 b->size -= size_per_page; 601 } 602 } 603 604 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 605 } 606 607 /* 608 * Release pages that were allocated while attempting to inflate the 609 * balloon but were refused by the host for one reason or another. 610 */ 611 static void vmballoon_release_refused_pages(struct vmballoon *b, 612 bool is_2m_pages) 613 { 614 struct page *page, *next; 615 struct vmballoon_page_size *page_size = 616 &b->page_sizes[is_2m_pages]; 617 618 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) { 619 list_del(&page->lru); 620 vmballoon_free_page(page, is_2m_pages); 621 STATS_INC(b->stats.refused_free[is_2m_pages]); 622 } 623 624 page_size->n_refused_pages = 0; 625 } 626 627 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) 628 { 629 if (static_branch_likely(&vmw_balloon_batching)) 630 b->batch_page[idx] = (struct vmballoon_batch_entry) 631 { .pfn = page_to_pfn(p) }; 632 else 633 b->page = p; 634 } 635 636 /** 637 * vmballoon_change - retrieve the required balloon change 638 * 639 * @b: pointer for the balloon. 640 * 641 * Return: the required change for the balloon size. A positive number 642 * indicates inflation, a negative number indicates a deflation. 643 */ 644 static int64_t vmballoon_change(struct vmballoon *b) 645 { 646 int64_t size, target; 647 648 size = b->size; 649 target = b->target; 650 651 /* 652 * We must cast first because of int sizes 653 * Otherwise we might get huge positives instead of negatives 654 */ 655 656 if (b->reset_required) 657 return 0; 658 659 /* consider a 2MB slack on deflate, unless the balloon is emptied */ 660 if (target < size && size - target < vmballoon_page_size(true) && 661 target != 0) 662 return 0; 663 664 return target - size; 665 } 666 667 /* 668 * Inflate the balloon towards its target size. Note that we try to limit 669 * the rate of allocation to make sure we are not choking the rest of the 670 * system. 671 */ 672 static void vmballoon_inflate(struct vmballoon *b) 673 { 674 unsigned int num_pages = 0; 675 int error = 0; 676 bool is_2m_pages; 677 678 /* 679 * First try NOSLEEP page allocations to inflate balloon. 680 * 681 * If we do not throttle nosleep allocations, we can drain all 682 * free pages in the guest quickly (if the balloon target is high). 683 * As a side-effect, draining free pages helps to inform (force) 684 * the guest to start swapping if balloon target is not met yet, 685 * which is a desired behavior. However, balloon driver can consume 686 * all available CPU cycles if too many pages are allocated in a 687 * second. Therefore, we throttle nosleep allocations even when 688 * the guest is not under memory pressure. OTOH, if we have already 689 * predicted that the guest is under memory pressure, then we 690 * slowdown page allocations considerably. 691 */ 692 693 /* 694 * Start with no sleep allocation rate which may be higher 695 * than sleeping allocation rate. 696 */ 697 is_2m_pages = b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES; 698 699 while ((int64_t)(num_pages * vmballoon_page_size(is_2m_pages)) < 700 vmballoon_change(b)) { 701 struct page *page; 702 703 STATS_INC(b->stats.alloc[is_2m_pages]); 704 page = vmballoon_alloc_page(is_2m_pages); 705 if (!page) { 706 STATS_INC(b->stats.alloc_fail[is_2m_pages]); 707 if (is_2m_pages) { 708 vmballoon_lock(b, num_pages, true); 709 710 /* 711 * ignore errors from locking as we now switch 712 * to 4k pages and we might get different 713 * errors. 714 */ 715 716 num_pages = 0; 717 is_2m_pages = false; 718 continue; 719 } 720 break; 721 } 722 723 vmballoon_add_page(b, num_pages++, page); 724 if (num_pages == b->batch_max_pages) { 725 struct vmballoon_page_size *page_size = 726 &b->page_sizes[is_2m_pages]; 727 728 error = vmballoon_lock(b, num_pages, is_2m_pages); 729 730 num_pages = 0; 731 732 /* 733 * Stop allocating this page size if we already 734 * accumulated too many pages that the hypervisor 735 * refused. 736 */ 737 if (page_size->n_refused_pages >= 738 VMW_BALLOON_MAX_REFUSED) { 739 if (!is_2m_pages) 740 break; 741 742 /* 743 * Release the refused pages as we move to 4k 744 * pages. 745 */ 746 vmballoon_release_refused_pages(b, true); 747 is_2m_pages = true; 748 } 749 750 if (error) 751 break; 752 } 753 754 cond_resched(); 755 } 756 757 if (num_pages > 0) 758 vmballoon_lock(b, num_pages, is_2m_pages); 759 760 vmballoon_release_refused_pages(b, true); 761 vmballoon_release_refused_pages(b, false); 762 } 763 764 /* 765 * Decrease the size of the balloon allowing guest to use more memory. 766 */ 767 static void vmballoon_deflate(struct vmballoon *b) 768 { 769 unsigned is_2m_pages; 770 771 /* free pages to reach target */ 772 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes; 773 is_2m_pages++) { 774 struct page *page, *next; 775 unsigned int num_pages = 0; 776 struct vmballoon_page_size *page_size = 777 &b->page_sizes[is_2m_pages]; 778 779 list_for_each_entry_safe(page, next, &page_size->pages, lru) { 780 if ((int64_t)(num_pages * 781 vmballoon_page_size(is_2m_pages)) >= 782 -vmballoon_change(b)) 783 break; 784 785 list_del(&page->lru); 786 vmballoon_add_page(b, num_pages++, page); 787 788 if (num_pages == b->batch_max_pages) { 789 int error; 790 791 error = vmballoon_unlock(b, num_pages, 792 is_2m_pages); 793 num_pages = 0; 794 if (error) 795 return; 796 } 797 798 cond_resched(); 799 } 800 801 if (num_pages > 0) 802 vmballoon_unlock(b, num_pages, is_2m_pages); 803 } 804 } 805 806 /** 807 * vmballoon_deinit_batching - disables batching mode. 808 * 809 * @b: pointer to &struct vmballoon. 810 * 811 * Disables batching, by deallocating the page for communication with the 812 * hypervisor and disabling the static key to indicate that batching is off. 813 */ 814 static void vmballoon_deinit_batching(struct vmballoon *b) 815 { 816 free_page((unsigned long)b->batch_page); 817 b->batch_page = NULL; 818 static_branch_disable(&vmw_balloon_batching); 819 b->batch_max_pages = 1; 820 } 821 822 /** 823 * vmballoon_init_batching - enable batching mode. 824 * 825 * @b: pointer to &struct vmballoon. 826 * 827 * Enables batching, by allocating a page for communication with the hypervisor 828 * and enabling the static_key to use batching. 829 * 830 * Return: zero on success or an appropriate error-code. 831 */ 832 static int vmballoon_init_batching(struct vmballoon *b) 833 { 834 struct page *page; 835 836 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 837 if (!page) 838 return -ENOMEM; 839 840 b->batch_page = page_address(page); 841 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); 842 843 static_branch_enable(&vmw_balloon_batching); 844 845 return 0; 846 } 847 848 /* 849 * Receive notification and resize balloon 850 */ 851 static void vmballoon_doorbell(void *client_data) 852 { 853 struct vmballoon *b = client_data; 854 855 STATS_INC(b->stats.doorbell); 856 857 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 858 } 859 860 /* 861 * Clean up vmci doorbell 862 */ 863 static void vmballoon_vmci_cleanup(struct vmballoon *b) 864 { 865 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 866 VMCI_INVALID_ID, VMCI_INVALID_ID); 867 868 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 869 vmci_doorbell_destroy(b->vmci_doorbell); 870 b->vmci_doorbell = VMCI_INVALID_HANDLE; 871 } 872 } 873 874 /* 875 * Initialize vmci doorbell, to get notified as soon as balloon changes 876 */ 877 static int vmballoon_vmci_init(struct vmballoon *b) 878 { 879 unsigned long error; 880 881 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 882 return 0; 883 884 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 885 VMCI_PRIVILEGE_FLAG_RESTRICTED, 886 vmballoon_doorbell, b); 887 888 if (error != VMCI_SUCCESS) 889 goto fail; 890 891 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 892 b->vmci_doorbell.context, 893 b->vmci_doorbell.resource, NULL); 894 895 if (error != VMW_BALLOON_SUCCESS) 896 goto fail; 897 898 return 0; 899 fail: 900 vmballoon_vmci_cleanup(b); 901 return -EIO; 902 } 903 904 /* 905 * Perform standard reset sequence by popping the balloon (in case it 906 * is not empty) and then restarting protocol. This operation normally 907 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 908 */ 909 static void vmballoon_reset(struct vmballoon *b) 910 { 911 int error; 912 913 vmballoon_vmci_cleanup(b); 914 915 /* free all pages, skipping monitor unlock */ 916 vmballoon_pop(b); 917 918 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 919 return; 920 921 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 922 if (vmballoon_init_batching(b)) { 923 /* 924 * We failed to initialize batching, inform the monitor 925 * about it by sending a null capability. 926 * 927 * The guest will retry in one second. 928 */ 929 vmballoon_send_start(b, 0); 930 return; 931 } 932 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 933 vmballoon_deinit_batching(b); 934 } 935 936 b->reset_required = false; 937 938 error = vmballoon_vmci_init(b); 939 if (error) 940 pr_err("failed to initialize vmci doorbell\n"); 941 942 if (!vmballoon_send_guest_id(b)) 943 pr_err("failed to send guest ID to the host\n"); 944 } 945 946 /** 947 * vmballoon_work - periodic balloon worker for reset, inflation and deflation. 948 * 949 * @work: pointer to the &work_struct which is provided by the workqueue. 950 * 951 * Resets the protocol if needed, gets the new size and adjusts balloon as 952 * needed. Repeat in 1 sec. 953 */ 954 static void vmballoon_work(struct work_struct *work) 955 { 956 struct delayed_work *dwork = to_delayed_work(work); 957 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 958 int64_t change = 0; 959 960 STATS_INC(b->stats.timer); 961 962 if (b->reset_required) 963 vmballoon_reset(b); 964 965 if (vmballoon_send_get_target(b)) 966 change = vmballoon_change(b); 967 968 if (change != 0) { 969 pr_debug("%s - size: %u, target %u", __func__, 970 b->size, b->target); 971 972 if (change > 0) 973 vmballoon_inflate(b); 974 else /* (change < 0) */ 975 vmballoon_deflate(b); 976 } 977 978 /* 979 * We are using a freezable workqueue so that balloon operations are 980 * stopped while the system transitions to/from sleep/hibernation. 981 */ 982 queue_delayed_work(system_freezable_wq, 983 dwork, round_jiffies_relative(HZ)); 984 } 985 986 /* 987 * DEBUGFS Interface 988 */ 989 #ifdef CONFIG_DEBUG_FS 990 991 static int vmballoon_debug_show(struct seq_file *f, void *offset) 992 { 993 struct vmballoon *b = f->private; 994 struct vmballoon_stats *stats = &b->stats; 995 int i; 996 997 /* format capabilities info */ 998 seq_printf(f, 999 "balloon capabilities: %#4x\n" 1000 "used capabilities: %#4lx\n" 1001 "is resetting: %c\n", 1002 VMW_BALLOON_CAPABILITIES, b->capabilities, 1003 b->reset_required ? 'y' : 'n'); 1004 1005 /* format size info */ 1006 seq_printf(f, 1007 "target: %8d pages\n" 1008 "current: %8d pages\n", 1009 b->target, b->size); 1010 1011 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { 1012 if (vmballoon_cmd_names[i] == NULL) 1013 continue; 1014 1015 seq_printf(f, "%-22s: %16lu (%lu failed)\n", 1016 vmballoon_cmd_names[i], stats->ops[i], 1017 stats->ops_fail[i]); 1018 } 1019 1020 seq_printf(f, 1021 "\n" 1022 "timer: %8u\n" 1023 "doorbell: %8u\n" 1024 "prim2mAlloc: %8u (%4u failed)\n" 1025 "prim4kAlloc: %8u (%4u failed)\n" 1026 "prim2mFree: %8u\n" 1027 "primFree: %8u\n" 1028 "err2mAlloc: %8u\n" 1029 "errAlloc: %8u\n" 1030 "err2mFree: %8u\n" 1031 "errFree: %8u\n", 1032 stats->timer, 1033 stats->doorbell, 1034 stats->alloc[true], stats->alloc_fail[true], 1035 stats->alloc[false], stats->alloc_fail[false], 1036 stats->free[true], 1037 stats->free[false], 1038 stats->refused_alloc[true], stats->refused_alloc[false], 1039 stats->refused_free[true], stats->refused_free[false]); 1040 1041 return 0; 1042 } 1043 1044 static int vmballoon_debug_open(struct inode *inode, struct file *file) 1045 { 1046 return single_open(file, vmballoon_debug_show, inode->i_private); 1047 } 1048 1049 static const struct file_operations vmballoon_debug_fops = { 1050 .owner = THIS_MODULE, 1051 .open = vmballoon_debug_open, 1052 .read = seq_read, 1053 .llseek = seq_lseek, 1054 .release = single_release, 1055 }; 1056 1057 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1058 { 1059 int error; 1060 1061 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1062 &vmballoon_debug_fops); 1063 if (IS_ERR(b->dbg_entry)) { 1064 error = PTR_ERR(b->dbg_entry); 1065 pr_err("failed to create debugfs entry, error: %d\n", error); 1066 return error; 1067 } 1068 1069 return 0; 1070 } 1071 1072 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1073 { 1074 debugfs_remove(b->dbg_entry); 1075 } 1076 1077 #else 1078 1079 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1080 { 1081 return 0; 1082 } 1083 1084 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1085 { 1086 } 1087 1088 #endif /* CONFIG_DEBUG_FS */ 1089 1090 static int __init vmballoon_init(void) 1091 { 1092 int error; 1093 unsigned is_2m_pages; 1094 /* 1095 * Check if we are running on VMware's hypervisor and bail out 1096 * if we are not. 1097 */ 1098 if (x86_hyper_type != X86_HYPER_VMWARE) 1099 return -ENODEV; 1100 1101 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES; 1102 is_2m_pages++) { 1103 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages); 1104 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages); 1105 } 1106 1107 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1108 1109 error = vmballoon_debugfs_init(&balloon); 1110 if (error) 1111 return error; 1112 1113 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1114 balloon.batch_page = NULL; 1115 balloon.page = NULL; 1116 balloon.reset_required = true; 1117 1118 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1119 1120 return 0; 1121 } 1122 1123 /* 1124 * Using late_initcall() instead of module_init() allows the balloon to use the 1125 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1126 * VMCI is probed only after the balloon is initialized. If the balloon is used 1127 * as a module, late_initcall() is equivalent to module_init(). 1128 */ 1129 late_initcall(vmballoon_init); 1130 1131 static void __exit vmballoon_exit(void) 1132 { 1133 vmballoon_vmci_cleanup(&balloon); 1134 cancel_delayed_work_sync(&balloon.dwork); 1135 1136 vmballoon_debugfs_exit(&balloon); 1137 1138 /* 1139 * Deallocate all reserved memory, and reset connection with monitor. 1140 * Reset connection before deallocating memory to avoid potential for 1141 * additional spurious resets from guest touching deallocated pages. 1142 */ 1143 vmballoon_send_start(&balloon, 0); 1144 vmballoon_pop(&balloon); 1145 } 1146 module_exit(vmballoon_exit); 1147