1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $ 65 * $DragonFly: src/sys/vm/vm_object.c,v 1.19 2004/09/17 10:02:12 dillon Exp $ 66 */ 67 68 /* 69 * Virtual memory object module. 70 */ 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> /* for curproc, pageproc */ 75 #include <sys/vnode.h> 76 #include <sys/vmmeter.h> 77 #include <sys/mman.h> 78 #include <sys/mount.h> 79 #include <sys/kernel.h> 80 #include <sys/sysctl.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pageout.h> 89 #include <vm/vm_pager.h> 90 #include <vm/swap_pager.h> 91 #include <vm/vm_kern.h> 92 #include <vm/vm_extern.h> 93 #include <vm/vm_zone.h> 94 95 #define EASY_SCAN_FACTOR 8 96 97 #define MSYNC_FLUSH_HARDSEQ 0x01 98 #define MSYNC_FLUSH_SOFTSEQ 0x02 99 100 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ; 101 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags, 102 CTLFLAG_RW, &msync_flush_flags, 0, ""); 103 104 static void vm_object_qcollapse (vm_object_t object); 105 static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags); 106 107 /* 108 * Virtual memory objects maintain the actual data 109 * associated with allocated virtual memory. A given 110 * page of memory exists within exactly one object. 111 * 112 * An object is only deallocated when all "references" 113 * are given up. Only one "reference" to a given 114 * region of an object should be writeable. 115 * 116 * Associated with each object is a list of all resident 117 * memory pages belonging to that object; this list is 118 * maintained by the "vm_page" module, and locked by the object's 119 * lock. 120 * 121 * Each object also records a "pager" routine which is 122 * used to retrieve (and store) pages to the proper backing 123 * storage. In addition, objects may be backed by other 124 * objects from which they were virtual-copied. 125 * 126 * The only items within the object structure which are 127 * modified after time of creation are: 128 * reference count locked by object's lock 129 * pager routine locked by object's lock 130 * 131 */ 132 133 struct object_q vm_object_list; 134 static struct lwkt_token vm_object_list_token; 135 static long vm_object_count; /* count of all objects */ 136 vm_object_t kernel_object; 137 vm_object_t kmem_object; 138 static struct vm_object kernel_object_store; 139 static struct vm_object kmem_object_store; 140 extern int vm_pageout_page_count; 141 142 static long object_collapses; 143 static long object_bypasses; 144 static int next_index; 145 static vm_zone_t obj_zone; 146 static struct vm_zone obj_zone_store; 147 static int object_hash_rand; 148 #define VM_OBJECTS_INIT 256 149 static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; 150 151 void 152 _vm_object_allocate(objtype_t type, vm_size_t size, vm_object_t object) 153 { 154 int incr; 155 TAILQ_INIT(&object->memq); 156 LIST_INIT(&object->shadow_head); 157 158 object->type = type; 159 object->size = size; 160 object->ref_count = 1; 161 object->flags = 0; 162 if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) 163 vm_object_set_flag(object, OBJ_ONEMAPPING); 164 object->paging_in_progress = 0; 165 object->resident_page_count = 0; 166 object->shadow_count = 0; 167 object->pg_color = next_index; 168 if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1)) 169 incr = PQ_L2_SIZE / 3 + PQ_PRIME1; 170 else 171 incr = size; 172 next_index = (next_index + incr) & PQ_L2_MASK; 173 object->handle = NULL; 174 object->backing_object = NULL; 175 object->backing_object_offset = (vm_ooffset_t) 0; 176 /* 177 * Try to generate a number that will spread objects out in the 178 * hash table. We 'wipe' new objects across the hash in 128 page 179 * increments plus 1 more to offset it a little more by the time 180 * it wraps around. 181 */ 182 object->hash_rand = object_hash_rand - 129; 183 184 object->generation++; 185 186 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 187 vm_object_count++; 188 object_hash_rand = object->hash_rand; 189 } 190 191 /* 192 * vm_object_init: 193 * 194 * Initialize the VM objects module. 195 */ 196 void 197 vm_object_init(void) 198 { 199 TAILQ_INIT(&vm_object_list); 200 lwkt_token_init(&vm_object_list_token); 201 vm_object_count = 0; 202 203 kernel_object = &kernel_object_store; 204 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 205 kernel_object); 206 207 kmem_object = &kmem_object_store; 208 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 209 kmem_object); 210 211 obj_zone = &obj_zone_store; 212 zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object), 213 vm_objects_init, VM_OBJECTS_INIT); 214 } 215 216 void 217 vm_object_init2(void) 218 { 219 zinitna(obj_zone, NULL, NULL, 0, 0, ZONE_PANICFAIL, 1); 220 } 221 222 /* 223 * vm_object_allocate: 224 * 225 * Returns a new object with the given size. 226 */ 227 228 vm_object_t 229 vm_object_allocate(objtype_t type, vm_size_t size) 230 { 231 vm_object_t result; 232 233 result = (vm_object_t) zalloc(obj_zone); 234 235 _vm_object_allocate(type, size, result); 236 237 return (result); 238 } 239 240 241 /* 242 * vm_object_reference: 243 * 244 * Gets another reference to the given object. 245 */ 246 void 247 vm_object_reference(vm_object_t object) 248 { 249 if (object == NULL) 250 return; 251 252 #if 0 253 /* object can be re-referenced during final cleaning */ 254 KASSERT(!(object->flags & OBJ_DEAD), 255 ("vm_object_reference: attempting to reference dead obj")); 256 #endif 257 258 object->ref_count++; 259 if (object->type == OBJT_VNODE) { 260 while (vget((struct vnode *) object->handle, NULL, 261 LK_RETRY|LK_NOOBJ, curthread)) { 262 printf("vm_object_reference: delay in getting object\n"); 263 } 264 } 265 } 266 267 void 268 vm_object_vndeallocate(vm_object_t object) 269 { 270 struct vnode *vp = (struct vnode *) object->handle; 271 272 KASSERT(object->type == OBJT_VNODE, 273 ("vm_object_vndeallocate: not a vnode object")); 274 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); 275 #ifdef INVARIANTS 276 if (object->ref_count == 0) { 277 vprint("vm_object_vndeallocate", vp); 278 panic("vm_object_vndeallocate: bad object reference count"); 279 } 280 #endif 281 282 object->ref_count--; 283 if (object->ref_count == 0) { 284 vp->v_flag &= ~VTEXT; 285 vm_object_clear_flag(object, OBJ_OPT); 286 } 287 vrele(vp); 288 } 289 290 /* 291 * vm_object_deallocate: 292 * 293 * Release a reference to the specified object, 294 * gained either through a vm_object_allocate 295 * or a vm_object_reference call. When all references 296 * are gone, storage associated with this object 297 * may be relinquished. 298 * 299 * No object may be locked. 300 */ 301 void 302 vm_object_deallocate(vm_object_t object) 303 { 304 vm_object_t temp; 305 306 while (object != NULL) { 307 308 if (object->type == OBJT_VNODE) { 309 vm_object_vndeallocate(object); 310 return; 311 } 312 313 if (object->ref_count == 0) { 314 panic("vm_object_deallocate: object deallocated too many times: %d", object->type); 315 } else if (object->ref_count > 2) { 316 object->ref_count--; 317 return; 318 } 319 320 /* 321 * Here on ref_count of one or two, which are special cases for 322 * objects. 323 */ 324 if ((object->ref_count == 2) && (object->shadow_count == 0)) { 325 vm_object_set_flag(object, OBJ_ONEMAPPING); 326 object->ref_count--; 327 return; 328 } else if ((object->ref_count == 2) && (object->shadow_count == 1)) { 329 object->ref_count--; 330 if ((object->handle == NULL) && 331 (object->type == OBJT_DEFAULT || 332 object->type == OBJT_SWAP)) { 333 vm_object_t robject; 334 335 robject = LIST_FIRST(&object->shadow_head); 336 KASSERT(robject != NULL, 337 ("vm_object_deallocate: ref_count: %d, shadow_count: %d", 338 object->ref_count, 339 object->shadow_count)); 340 if ((robject->handle == NULL) && 341 (robject->type == OBJT_DEFAULT || 342 robject->type == OBJT_SWAP)) { 343 344 robject->ref_count++; 345 346 while ( 347 robject->paging_in_progress || 348 object->paging_in_progress 349 ) { 350 vm_object_pip_sleep(robject, "objde1"); 351 vm_object_pip_sleep(object, "objde2"); 352 } 353 354 if (robject->ref_count == 1) { 355 robject->ref_count--; 356 object = robject; 357 goto doterm; 358 } 359 360 object = robject; 361 vm_object_collapse(object); 362 continue; 363 } 364 } 365 366 return; 367 368 } else { 369 object->ref_count--; 370 if (object->ref_count != 0) 371 return; 372 } 373 374 doterm: 375 376 temp = object->backing_object; 377 if (temp) { 378 LIST_REMOVE(object, shadow_list); 379 temp->shadow_count--; 380 if (temp->ref_count == 0) 381 vm_object_clear_flag(temp, OBJ_OPT); 382 temp->generation++; 383 object->backing_object = NULL; 384 } 385 386 /* 387 * Don't double-terminate, we could be in a termination 388 * recursion due to the terminate having to sync data 389 * to disk. 390 */ 391 if ((object->flags & OBJ_DEAD) == 0) 392 vm_object_terminate(object); 393 object = temp; 394 } 395 } 396 397 /* 398 * vm_object_terminate actually destroys the specified object, freeing 399 * up all previously used resources. 400 * 401 * The object must be locked. 402 * This routine may block. 403 */ 404 void 405 vm_object_terminate(vm_object_t object) 406 { 407 lwkt_tokref ilock; 408 vm_page_t p; 409 int s; 410 411 /* 412 * Make sure no one uses us. 413 */ 414 vm_object_set_flag(object, OBJ_DEAD); 415 416 /* 417 * wait for the pageout daemon to be done with the object 418 */ 419 vm_object_pip_wait(object, "objtrm"); 420 421 KASSERT(!object->paging_in_progress, 422 ("vm_object_terminate: pageout in progress")); 423 424 /* 425 * Clean and free the pages, as appropriate. All references to the 426 * object are gone, so we don't need to lock it. 427 */ 428 if (object->type == OBJT_VNODE) { 429 struct vnode *vp; 430 431 /* 432 * Freeze optimized copies. 433 */ 434 vm_freeze_copyopts(object, 0, object->size); 435 436 /* 437 * Clean pages and flush buffers. 438 */ 439 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 440 441 vp = (struct vnode *) object->handle; 442 vinvalbuf(vp, V_SAVE, NULL, 0, 0); 443 } 444 445 /* 446 * Wait for any I/O to complete, after which there had better not 447 * be any references left on the object. 448 */ 449 vm_object_pip_wait(object, "objtrm"); 450 451 if (object->ref_count != 0) 452 panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count); 453 454 /* 455 * Now free any remaining pages. For internal objects, this also 456 * removes them from paging queues. Don't free wired pages, just 457 * remove them from the object. 458 */ 459 s = splvm(); 460 while ((p = TAILQ_FIRST(&object->memq)) != NULL) { 461 if (p->busy || (p->flags & PG_BUSY)) 462 panic("vm_object_terminate: freeing busy page %p", p); 463 if (p->wire_count == 0) { 464 vm_page_busy(p); 465 vm_page_free(p); 466 mycpu->gd_cnt.v_pfree++; 467 } else { 468 vm_page_busy(p); 469 vm_page_remove(p); 470 vm_page_wakeup(p); 471 } 472 } 473 splx(s); 474 475 /* 476 * Let the pager know object is dead. 477 */ 478 vm_pager_deallocate(object); 479 480 /* 481 * Remove the object from the global object list. 482 */ 483 lwkt_gettoken(&ilock, &vm_object_list_token); 484 TAILQ_REMOVE(&vm_object_list, object, object_list); 485 lwkt_reltoken(&ilock); 486 487 wakeup(object); 488 489 /* 490 * Free the space for the object. 491 */ 492 zfree(obj_zone, object); 493 } 494 495 /* 496 * vm_object_page_clean 497 * 498 * Clean all dirty pages in the specified range of object. Leaves page 499 * on whatever queue it is currently on. If NOSYNC is set then do not 500 * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC), 501 * leaving the object dirty. 502 * 503 * When stuffing pages asynchronously, allow clustering. XXX we need a 504 * synchronous clustering mode implementation. 505 * 506 * Odd semantics: if start == end, we clean everything. 507 */ 508 509 void 510 vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, 511 int flags) 512 { 513 vm_page_t p, np; 514 vm_offset_t tstart, tend; 515 vm_pindex_t pi; 516 struct vnode *vp; 517 int clearobjflags; 518 int pagerflags; 519 int curgeneration; 520 lwkt_tokref vlock; 521 int s; 522 523 if (object->type != OBJT_VNODE || 524 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 525 return; 526 527 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; 528 pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0; 529 530 vp = object->handle; 531 532 vm_object_set_flag(object, OBJ_CLEANING); 533 534 /* 535 * Handle 'entire object' case 536 */ 537 tstart = start; 538 if (end == 0) { 539 tend = object->size; 540 } else { 541 tend = end; 542 } 543 544 /* 545 * If the caller is smart and only msync()s a range he knows is 546 * dirty, we may be able to avoid an object scan. This results in 547 * a phenominal improvement in performance. We cannot do this 548 * as a matter of course because the object may be huge - e.g. 549 * the size might be in the gigabytes or terrabytes. 550 */ 551 if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) { 552 vm_offset_t tscan; 553 int scanlimit; 554 int scanreset; 555 556 scanreset = object->resident_page_count / EASY_SCAN_FACTOR; 557 if (scanreset < 16) 558 scanreset = 16; 559 pagerflags |= VM_PAGER_IGNORE_CLEANCHK; 560 561 scanlimit = scanreset; 562 tscan = tstart; 563 564 /* 565 * spl protection is required despite the obj generation 566 * tracking because we cannot safely call vm_page_test_dirty() 567 * or avoid page field tests against an interrupt unbusy/free 568 * race that might occur prior to the busy check in 569 * vm_object_page_collect_flush(). 570 */ 571 s = splvm(); 572 while (tscan < tend) { 573 curgeneration = object->generation; 574 p = vm_page_lookup(object, tscan); 575 if (p == NULL || p->valid == 0 || 576 (p->queue - p->pc) == PQ_CACHE) { 577 if (--scanlimit == 0) 578 break; 579 ++tscan; 580 continue; 581 } 582 vm_page_test_dirty(p); 583 if ((p->dirty & p->valid) == 0) { 584 if (--scanlimit == 0) 585 break; 586 ++tscan; 587 continue; 588 } 589 /* 590 * If we have been asked to skip nosync pages and 591 * this is a nosync page, we can't continue. 592 */ 593 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { 594 if (--scanlimit == 0) 595 break; 596 ++tscan; 597 continue; 598 } 599 scanlimit = scanreset; 600 601 /* 602 * This returns 0 if it was unable to busy the first 603 * page (i.e. had to sleep). 604 */ 605 tscan += vm_object_page_collect_flush(object, p, 606 curgeneration, pagerflags); 607 } 608 splx(s); 609 610 /* 611 * If everything was dirty and we flushed it successfully, 612 * and the requested range is not the entire object, we 613 * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can 614 * return immediately. 615 */ 616 if (tscan >= tend && (tstart || tend < object->size)) { 617 vm_object_clear_flag(object, OBJ_CLEANING); 618 return; 619 } 620 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK; 621 } 622 623 /* 624 * Generally set CLEANCHK interlock and make the page read-only so 625 * we can then clear the object flags. 626 * 627 * However, if this is a nosync mmap then the object is likely to 628 * stay dirty so do not mess with the page and do not clear the 629 * object flags. 630 * 631 * spl protection is required because an interrupt can remove page 632 * from the object. 633 */ 634 clearobjflags = 1; 635 636 s = splvm(); 637 for (p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) { 638 vm_page_flag_set(p, PG_CLEANCHK); 639 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) 640 clearobjflags = 0; 641 else 642 vm_page_protect(p, VM_PROT_READ); 643 } 644 splx(s); 645 646 if (clearobjflags && (tstart == 0) && (tend == object->size)) { 647 struct vnode *vp; 648 649 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 650 if (object->type == OBJT_VNODE && 651 (vp = (struct vnode *)object->handle) != NULL) { 652 if (vp->v_flag & VOBJDIRTY) { 653 lwkt_gettoken(&vlock, vp->v_interlock); 654 vp->v_flag &= ~VOBJDIRTY; 655 lwkt_reltoken(&vlock); 656 } 657 } 658 } 659 660 /* 661 * spl protection is required both to avoid an interrupt unbusy/free 662 * race against a vm_page_lookup(), and also to ensure that the 663 * memq is consistent. We do not want a busy page to be ripped out 664 * from under us. 665 */ 666 s = splvm(); 667 rescan: 668 splx(s); /* give interrupts a chance */ 669 s = splvm(); 670 curgeneration = object->generation; 671 672 for (p = TAILQ_FIRST(&object->memq); p; p = np) { 673 int n; 674 675 np = TAILQ_NEXT(p, listq); 676 677 again: 678 pi = p->pindex; 679 if (((p->flags & PG_CLEANCHK) == 0) || 680 (pi < tstart) || (pi >= tend) || 681 (p->valid == 0) || 682 ((p->queue - p->pc) == PQ_CACHE)) { 683 vm_page_flag_clear(p, PG_CLEANCHK); 684 continue; 685 } 686 687 vm_page_test_dirty(p); 688 if ((p->dirty & p->valid) == 0) { 689 vm_page_flag_clear(p, PG_CLEANCHK); 690 continue; 691 } 692 693 /* 694 * If we have been asked to skip nosync pages and this is a 695 * nosync page, skip it. Note that the object flags were 696 * not cleared in this case so we do not have to set them. 697 */ 698 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { 699 vm_page_flag_clear(p, PG_CLEANCHK); 700 continue; 701 } 702 703 n = vm_object_page_collect_flush(object, p, 704 curgeneration, pagerflags); 705 if (n == 0) 706 goto rescan; 707 if (object->generation != curgeneration) 708 goto rescan; 709 710 /* 711 * Try to optimize the next page. If we can't we pick up 712 * our (random) scan where we left off. 713 */ 714 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { 715 if ((p = vm_page_lookup(object, pi + n)) != NULL) 716 goto again; 717 } 718 } 719 splx(s); 720 721 #if 0 722 VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); 723 #endif 724 725 vm_object_clear_flag(object, OBJ_CLEANING); 726 return; 727 } 728 729 /* 730 * This routine must be called at splvm() to properly avoid an interrupt 731 * unbusy/free race that can occur prior to the busy check. 732 * 733 * Using the object generation number here to detect page ripout is not 734 * the best idea in the world. XXX 735 * 736 * NOTE: we operate under the assumption that a page found to not be busy 737 * will not be ripped out from under us by an interrupt. XXX we should 738 * recode this to explicitly busy the pages. 739 */ 740 static int 741 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags) 742 { 743 int runlen; 744 int maxf; 745 int chkb; 746 int maxb; 747 int i; 748 vm_pindex_t pi; 749 vm_page_t maf[vm_pageout_page_count]; 750 vm_page_t mab[vm_pageout_page_count]; 751 vm_page_t ma[vm_pageout_page_count]; 752 753 pi = p->pindex; 754 while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { 755 if (object->generation != curgeneration) { 756 return(0); 757 } 758 } 759 760 maxf = 0; 761 for(i = 1; i < vm_pageout_page_count; i++) { 762 vm_page_t tp; 763 764 if ((tp = vm_page_lookup(object, pi + i)) != NULL) { 765 if ((tp->flags & PG_BUSY) || 766 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 767 (tp->flags & PG_CLEANCHK) == 0) || 768 (tp->busy != 0)) 769 break; 770 if((tp->queue - tp->pc) == PQ_CACHE) { 771 vm_page_flag_clear(tp, PG_CLEANCHK); 772 break; 773 } 774 vm_page_test_dirty(tp); 775 if ((tp->dirty & tp->valid) == 0) { 776 vm_page_flag_clear(tp, PG_CLEANCHK); 777 break; 778 } 779 maf[ i - 1 ] = tp; 780 maxf++; 781 continue; 782 } 783 break; 784 } 785 786 maxb = 0; 787 chkb = vm_pageout_page_count - maxf; 788 if (chkb) { 789 for(i = 1; i < chkb;i++) { 790 vm_page_t tp; 791 792 if ((tp = vm_page_lookup(object, pi - i)) != NULL) { 793 if ((tp->flags & PG_BUSY) || 794 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 795 (tp->flags & PG_CLEANCHK) == 0) || 796 (tp->busy != 0)) 797 break; 798 if((tp->queue - tp->pc) == PQ_CACHE) { 799 vm_page_flag_clear(tp, PG_CLEANCHK); 800 break; 801 } 802 vm_page_test_dirty(tp); 803 if ((tp->dirty & tp->valid) == 0) { 804 vm_page_flag_clear(tp, PG_CLEANCHK); 805 break; 806 } 807 mab[ i - 1 ] = tp; 808 maxb++; 809 continue; 810 } 811 break; 812 } 813 } 814 815 for(i = 0; i < maxb; i++) { 816 int index = (maxb - i) - 1; 817 ma[index] = mab[i]; 818 vm_page_flag_clear(ma[index], PG_CLEANCHK); 819 } 820 vm_page_flag_clear(p, PG_CLEANCHK); 821 ma[maxb] = p; 822 for(i = 0; i < maxf; i++) { 823 int index = (maxb + i) + 1; 824 ma[index] = maf[i]; 825 vm_page_flag_clear(ma[index], PG_CLEANCHK); 826 } 827 runlen = maxb + maxf + 1; 828 829 vm_pageout_flush(ma, runlen, pagerflags); 830 for (i = 0; i < runlen; i++) { 831 if (ma[i]->valid & ma[i]->dirty) { 832 vm_page_protect(ma[i], VM_PROT_READ); 833 vm_page_flag_set(ma[i], PG_CLEANCHK); 834 835 /* 836 * maxf will end up being the actual number of pages 837 * we wrote out contiguously, non-inclusive of the 838 * first page. We do not count look-behind pages. 839 */ 840 if (i >= maxb + 1 && (maxf > i - maxb - 1)) 841 maxf = i - maxb - 1; 842 } 843 } 844 return(maxf + 1); 845 } 846 847 #ifdef not_used 848 /* XXX I cannot tell if this should be an exported symbol */ 849 /* 850 * vm_object_deactivate_pages 851 * 852 * Deactivate all pages in the specified object. (Keep its pages 853 * in memory even though it is no longer referenced.) 854 * 855 * The object must be locked. 856 */ 857 static void 858 vm_object_deactivate_pages(vm_object_t object) 859 { 860 vm_page_t p, next; 861 int s; 862 863 s = splvm(); 864 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 865 next = TAILQ_NEXT(p, listq); 866 vm_page_deactivate(p); 867 } 868 splx(s); 869 } 870 #endif 871 872 /* 873 * Same as vm_object_pmap_copy, except range checking really 874 * works, and is meant for small sections of an object. 875 * 876 * This code protects resident pages by making them read-only 877 * and is typically called on a fork or split when a page 878 * is converted to copy-on-write. 879 * 880 * NOTE: If the page is already at VM_PROT_NONE, calling 881 * vm_page_protect will have no effect. 882 */ 883 void 884 vm_object_pmap_copy_1(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 885 { 886 vm_pindex_t idx; 887 vm_page_t p; 888 int s; 889 890 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 891 return; 892 893 /* 894 * spl protection needed to prevent races between the lookup, 895 * an interrupt unbusy/free, and our protect call. 896 */ 897 s = splvm(); 898 for (idx = start; idx < end; idx++) { 899 p = vm_page_lookup(object, idx); 900 if (p == NULL) 901 continue; 902 vm_page_protect(p, VM_PROT_READ); 903 } 904 splx(s); 905 } 906 907 /* 908 * vm_object_pmap_remove: 909 * 910 * Removes all physical pages in the specified 911 * object range from all physical maps. 912 * 913 * The object must *not* be locked. 914 */ 915 void 916 vm_object_pmap_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end) 917 { 918 vm_page_t p; 919 int s; 920 921 if (object == NULL) 922 return; 923 924 /* 925 * spl protection is required because an interrupt can unbusy/free 926 * a page. 927 */ 928 s = splvm(); 929 for (p = TAILQ_FIRST(&object->memq); 930 p != NULL; 931 p = TAILQ_NEXT(p, listq) 932 ) { 933 if (p->pindex >= start && p->pindex < end) 934 vm_page_protect(p, VM_PROT_NONE); 935 } 936 splx(s); 937 if ((start == 0) && (object->size == end)) 938 vm_object_clear_flag(object, OBJ_WRITEABLE); 939 } 940 941 /* 942 * vm_object_madvise: 943 * 944 * Implements the madvise function at the object/page level. 945 * 946 * MADV_WILLNEED (any object) 947 * 948 * Activate the specified pages if they are resident. 949 * 950 * MADV_DONTNEED (any object) 951 * 952 * Deactivate the specified pages if they are resident. 953 * 954 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, 955 * OBJ_ONEMAPPING only) 956 * 957 * Deactivate and clean the specified pages if they are 958 * resident. This permits the process to reuse the pages 959 * without faulting or the kernel to reclaim the pages 960 * without I/O. 961 */ 962 void 963 vm_object_madvise(vm_object_t object, vm_pindex_t pindex, int count, int advise) 964 { 965 vm_pindex_t end, tpindex; 966 vm_object_t tobject; 967 vm_page_t m; 968 int s; 969 970 if (object == NULL) 971 return; 972 973 end = pindex + count; 974 975 /* 976 * Locate and adjust resident pages 977 */ 978 979 for (; pindex < end; pindex += 1) { 980 relookup: 981 tobject = object; 982 tpindex = pindex; 983 shadowlookup: 984 /* 985 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages 986 * and those pages must be OBJ_ONEMAPPING. 987 */ 988 if (advise == MADV_FREE) { 989 if ((tobject->type != OBJT_DEFAULT && 990 tobject->type != OBJT_SWAP) || 991 (tobject->flags & OBJ_ONEMAPPING) == 0) { 992 continue; 993 } 994 } 995 996 /* 997 * spl protection is required to avoid a race between the 998 * lookup, an interrupt unbusy/free, and our busy check. 999 */ 1000 1001 s = splvm(); 1002 m = vm_page_lookup(tobject, tpindex); 1003 1004 if (m == NULL) { 1005 /* 1006 * There may be swap even if there is no backing page 1007 */ 1008 if (advise == MADV_FREE && tobject->type == OBJT_SWAP) 1009 swap_pager_freespace(tobject, tpindex, 1); 1010 1011 /* 1012 * next object 1013 */ 1014 splx(s); 1015 if (tobject->backing_object == NULL) 1016 continue; 1017 tpindex += OFF_TO_IDX(tobject->backing_object_offset); 1018 tobject = tobject->backing_object; 1019 goto shadowlookup; 1020 } 1021 1022 /* 1023 * If the page is busy or not in a normal active state, 1024 * we skip it. If the page is not managed there are no 1025 * page queues to mess with. Things can break if we mess 1026 * with pages in any of the below states. 1027 */ 1028 if ( 1029 m->hold_count || 1030 m->wire_count || 1031 (m->flags & PG_UNMANAGED) || 1032 m->valid != VM_PAGE_BITS_ALL 1033 ) { 1034 splx(s); 1035 continue; 1036 } 1037 1038 if (vm_page_sleep_busy(m, TRUE, "madvpo")) { 1039 splx(s); 1040 goto relookup; 1041 } 1042 splx(s); 1043 1044 /* 1045 * Theoretically once a page is known not to be busy, an 1046 * interrupt cannot come along and rip it out from under us. 1047 */ 1048 1049 if (advise == MADV_WILLNEED) { 1050 vm_page_activate(m); 1051 } else if (advise == MADV_DONTNEED) { 1052 vm_page_dontneed(m); 1053 } else if (advise == MADV_FREE) { 1054 /* 1055 * Mark the page clean. This will allow the page 1056 * to be freed up by the system. However, such pages 1057 * are often reused quickly by malloc()/free() 1058 * so we do not do anything that would cause 1059 * a page fault if we can help it. 1060 * 1061 * Specifically, we do not try to actually free 1062 * the page now nor do we try to put it in the 1063 * cache (which would cause a page fault on reuse). 1064 * 1065 * But we do make the page is freeable as we 1066 * can without actually taking the step of unmapping 1067 * it. 1068 */ 1069 pmap_clear_modify(m); 1070 m->dirty = 0; 1071 m->act_count = 0; 1072 vm_page_dontneed(m); 1073 if (tobject->type == OBJT_SWAP) 1074 swap_pager_freespace(tobject, tpindex, 1); 1075 } 1076 } 1077 } 1078 1079 /* 1080 * vm_object_shadow: 1081 * 1082 * Create a new object which is backed by the 1083 * specified existing object range. The source 1084 * object reference is deallocated. 1085 * 1086 * The new object and offset into that object 1087 * are returned in the source parameters. 1088 */ 1089 1090 void 1091 vm_object_shadow(vm_object_t *object, /* IN/OUT */ 1092 vm_ooffset_t *offset, /* IN/OUT */ 1093 vm_size_t length) 1094 { 1095 vm_object_t source; 1096 vm_object_t result; 1097 1098 source = *object; 1099 1100 /* 1101 * Don't create the new object if the old object isn't shared. 1102 */ 1103 1104 if (source != NULL && 1105 source->ref_count == 1 && 1106 source->handle == NULL && 1107 (source->type == OBJT_DEFAULT || 1108 source->type == OBJT_SWAP)) 1109 return; 1110 1111 /* 1112 * Allocate a new object with the given length 1113 */ 1114 1115 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 1116 panic("vm_object_shadow: no object for shadowing"); 1117 1118 /* 1119 * The new object shadows the source object, adding a reference to it. 1120 * Our caller changes his reference to point to the new object, 1121 * removing a reference to the source object. Net result: no change 1122 * of reference count. 1123 * 1124 * Try to optimize the result object's page color when shadowing 1125 * in order to maintain page coloring consistency in the combined 1126 * shadowed object. 1127 */ 1128 result->backing_object = source; 1129 if (source) { 1130 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); 1131 source->shadow_count++; 1132 source->generation++; 1133 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK; 1134 } 1135 1136 /* 1137 * Store the offset into the source object, and fix up the offset into 1138 * the new object. 1139 */ 1140 1141 result->backing_object_offset = *offset; 1142 1143 /* 1144 * Return the new things 1145 */ 1146 1147 *offset = 0; 1148 *object = result; 1149 } 1150 1151 #define OBSC_TEST_ALL_SHADOWED 0x0001 1152 #define OBSC_COLLAPSE_NOWAIT 0x0002 1153 #define OBSC_COLLAPSE_WAIT 0x0004 1154 1155 static __inline int 1156 vm_object_backing_scan(vm_object_t object, int op) 1157 { 1158 int s; 1159 int r = 1; 1160 vm_page_t p; 1161 vm_object_t backing_object; 1162 vm_pindex_t backing_offset_index; 1163 1164 /* 1165 * spl protection is required to avoid races between the memq/lookup, 1166 * an interrupt doing an unbusy/free, and our busy check. Amoung 1167 * other things. 1168 */ 1169 s = splvm(); 1170 1171 backing_object = object->backing_object; 1172 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 1173 1174 /* 1175 * Initial conditions 1176 */ 1177 1178 if (op & OBSC_TEST_ALL_SHADOWED) { 1179 /* 1180 * We do not want to have to test for the existence of 1181 * swap pages in the backing object. XXX but with the 1182 * new swapper this would be pretty easy to do. 1183 * 1184 * XXX what about anonymous MAP_SHARED memory that hasn't 1185 * been ZFOD faulted yet? If we do not test for this, the 1186 * shadow test may succeed! XXX 1187 */ 1188 if (backing_object->type != OBJT_DEFAULT) { 1189 splx(s); 1190 return(0); 1191 } 1192 } 1193 if (op & OBSC_COLLAPSE_WAIT) { 1194 vm_object_set_flag(backing_object, OBJ_DEAD); 1195 } 1196 1197 /* 1198 * Our scan 1199 */ 1200 1201 p = TAILQ_FIRST(&backing_object->memq); 1202 while (p) { 1203 vm_page_t next = TAILQ_NEXT(p, listq); 1204 vm_pindex_t new_pindex = p->pindex - backing_offset_index; 1205 1206 if (op & OBSC_TEST_ALL_SHADOWED) { 1207 vm_page_t pp; 1208 1209 /* 1210 * Ignore pages outside the parent object's range 1211 * and outside the parent object's mapping of the 1212 * backing object. 1213 * 1214 * note that we do not busy the backing object's 1215 * page. 1216 */ 1217 1218 if ( 1219 p->pindex < backing_offset_index || 1220 new_pindex >= object->size 1221 ) { 1222 p = next; 1223 continue; 1224 } 1225 1226 /* 1227 * See if the parent has the page or if the parent's 1228 * object pager has the page. If the parent has the 1229 * page but the page is not valid, the parent's 1230 * object pager must have the page. 1231 * 1232 * If this fails, the parent does not completely shadow 1233 * the object and we might as well give up now. 1234 */ 1235 1236 pp = vm_page_lookup(object, new_pindex); 1237 if ( 1238 (pp == NULL || pp->valid == 0) && 1239 !vm_pager_has_page(object, new_pindex, NULL, NULL) 1240 ) { 1241 r = 0; 1242 break; 1243 } 1244 } 1245 1246 /* 1247 * Check for busy page 1248 */ 1249 1250 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { 1251 vm_page_t pp; 1252 1253 if (op & OBSC_COLLAPSE_NOWAIT) { 1254 if ( 1255 (p->flags & PG_BUSY) || 1256 !p->valid || 1257 p->hold_count || 1258 p->wire_count || 1259 p->busy 1260 ) { 1261 p = next; 1262 continue; 1263 } 1264 } else if (op & OBSC_COLLAPSE_WAIT) { 1265 if (vm_page_sleep_busy(p, TRUE, "vmocol")) { 1266 /* 1267 * If we slept, anything could have 1268 * happened. Since the object is 1269 * marked dead, the backing offset 1270 * should not have changed so we 1271 * just restart our scan. 1272 */ 1273 p = TAILQ_FIRST(&backing_object->memq); 1274 continue; 1275 } 1276 } 1277 1278 /* 1279 * Busy the page 1280 */ 1281 vm_page_busy(p); 1282 1283 KASSERT( 1284 p->object == backing_object, 1285 ("vm_object_qcollapse(): object mismatch") 1286 ); 1287 1288 /* 1289 * Destroy any associated swap 1290 */ 1291 if (backing_object->type == OBJT_SWAP) { 1292 swap_pager_freespace( 1293 backing_object, 1294 p->pindex, 1295 1 1296 ); 1297 } 1298 1299 if ( 1300 p->pindex < backing_offset_index || 1301 new_pindex >= object->size 1302 ) { 1303 /* 1304 * Page is out of the parent object's range, we 1305 * can simply destroy it. 1306 */ 1307 vm_page_protect(p, VM_PROT_NONE); 1308 vm_page_free(p); 1309 p = next; 1310 continue; 1311 } 1312 1313 pp = vm_page_lookup(object, new_pindex); 1314 if ( 1315 pp != NULL || 1316 vm_pager_has_page(object, new_pindex, NULL, NULL) 1317 ) { 1318 /* 1319 * page already exists in parent OR swap exists 1320 * for this location in the parent. Destroy 1321 * the original page from the backing object. 1322 * 1323 * Leave the parent's page alone 1324 */ 1325 vm_page_protect(p, VM_PROT_NONE); 1326 vm_page_free(p); 1327 p = next; 1328 continue; 1329 } 1330 1331 /* 1332 * Page does not exist in parent, rename the 1333 * page from the backing object to the main object. 1334 * 1335 * If the page was mapped to a process, it can remain 1336 * mapped through the rename. 1337 */ 1338 if ((p->queue - p->pc) == PQ_CACHE) 1339 vm_page_deactivate(p); 1340 1341 vm_page_rename(p, object, new_pindex); 1342 /* page automatically made dirty by rename */ 1343 } 1344 p = next; 1345 } 1346 splx(s); 1347 return(r); 1348 } 1349 1350 1351 /* 1352 * this version of collapse allows the operation to occur earlier and 1353 * when paging_in_progress is true for an object... This is not a complete 1354 * operation, but should plug 99.9% of the rest of the leaks. 1355 */ 1356 static void 1357 vm_object_qcollapse(vm_object_t object) 1358 { 1359 vm_object_t backing_object = object->backing_object; 1360 1361 if (backing_object->ref_count != 1) 1362 return; 1363 1364 backing_object->ref_count += 2; 1365 1366 vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT); 1367 1368 backing_object->ref_count -= 2; 1369 } 1370 1371 /* 1372 * vm_object_collapse: 1373 * 1374 * Collapse an object with the object backing it. 1375 * Pages in the backing object are moved into the 1376 * parent, and the backing object is deallocated. 1377 */ 1378 void 1379 vm_object_collapse(vm_object_t object) 1380 { 1381 while (TRUE) { 1382 vm_object_t backing_object; 1383 1384 /* 1385 * Verify that the conditions are right for collapse: 1386 * 1387 * The object exists and the backing object exists. 1388 */ 1389 if (object == NULL) 1390 break; 1391 1392 if ((backing_object = object->backing_object) == NULL) 1393 break; 1394 1395 /* 1396 * we check the backing object first, because it is most likely 1397 * not collapsable. 1398 */ 1399 if (backing_object->handle != NULL || 1400 (backing_object->type != OBJT_DEFAULT && 1401 backing_object->type != OBJT_SWAP) || 1402 (backing_object->flags & OBJ_DEAD) || 1403 object->handle != NULL || 1404 (object->type != OBJT_DEFAULT && 1405 object->type != OBJT_SWAP) || 1406 (object->flags & OBJ_DEAD)) { 1407 break; 1408 } 1409 1410 if ( 1411 object->paging_in_progress != 0 || 1412 backing_object->paging_in_progress != 0 1413 ) { 1414 vm_object_qcollapse(object); 1415 break; 1416 } 1417 1418 /* 1419 * We know that we can either collapse the backing object (if 1420 * the parent is the only reference to it) or (perhaps) have 1421 * the parent bypass the object if the parent happens to shadow 1422 * all the resident pages in the entire backing object. 1423 * 1424 * This is ignoring pager-backed pages such as swap pages. 1425 * vm_object_backing_scan fails the shadowing test in this 1426 * case. 1427 */ 1428 1429 if (backing_object->ref_count == 1) { 1430 /* 1431 * If there is exactly one reference to the backing 1432 * object, we can collapse it into the parent. 1433 */ 1434 1435 vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT); 1436 1437 /* 1438 * Move the pager from backing_object to object. 1439 */ 1440 1441 if (backing_object->type == OBJT_SWAP) { 1442 vm_object_pip_add(backing_object, 1); 1443 1444 /* 1445 * scrap the paging_offset junk and do a 1446 * discrete copy. This also removes major 1447 * assumptions about how the swap-pager 1448 * works from where it doesn't belong. The 1449 * new swapper is able to optimize the 1450 * destroy-source case. 1451 */ 1452 1453 vm_object_pip_add(object, 1); 1454 swap_pager_copy( 1455 backing_object, 1456 object, 1457 OFF_TO_IDX(object->backing_object_offset), TRUE); 1458 vm_object_pip_wakeup(object); 1459 1460 vm_object_pip_wakeup(backing_object); 1461 } 1462 /* 1463 * Object now shadows whatever backing_object did. 1464 * Note that the reference to 1465 * backing_object->backing_object moves from within 1466 * backing_object to within object. 1467 */ 1468 1469 LIST_REMOVE(object, shadow_list); 1470 object->backing_object->shadow_count--; 1471 object->backing_object->generation++; 1472 if (backing_object->backing_object) { 1473 LIST_REMOVE(backing_object, shadow_list); 1474 backing_object->backing_object->shadow_count--; 1475 backing_object->backing_object->generation++; 1476 } 1477 object->backing_object = backing_object->backing_object; 1478 if (object->backing_object) { 1479 LIST_INSERT_HEAD( 1480 &object->backing_object->shadow_head, 1481 object, 1482 shadow_list 1483 ); 1484 object->backing_object->shadow_count++; 1485 object->backing_object->generation++; 1486 } 1487 1488 object->backing_object_offset += 1489 backing_object->backing_object_offset; 1490 1491 /* 1492 * Discard backing_object. 1493 * 1494 * Since the backing object has no pages, no pager left, 1495 * and no object references within it, all that is 1496 * necessary is to dispose of it. 1497 */ 1498 1499 KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object)); 1500 KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object)); 1501 TAILQ_REMOVE( 1502 &vm_object_list, 1503 backing_object, 1504 object_list 1505 ); 1506 vm_object_count--; 1507 1508 zfree(obj_zone, backing_object); 1509 1510 object_collapses++; 1511 } else { 1512 vm_object_t new_backing_object; 1513 1514 /* 1515 * If we do not entirely shadow the backing object, 1516 * there is nothing we can do so we give up. 1517 */ 1518 1519 if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) { 1520 break; 1521 } 1522 1523 /* 1524 * Make the parent shadow the next object in the 1525 * chain. Deallocating backing_object will not remove 1526 * it, since its reference count is at least 2. 1527 */ 1528 1529 LIST_REMOVE(object, shadow_list); 1530 backing_object->shadow_count--; 1531 backing_object->generation++; 1532 1533 new_backing_object = backing_object->backing_object; 1534 if ((object->backing_object = new_backing_object) != NULL) { 1535 vm_object_reference(new_backing_object); 1536 LIST_INSERT_HEAD( 1537 &new_backing_object->shadow_head, 1538 object, 1539 shadow_list 1540 ); 1541 new_backing_object->shadow_count++; 1542 new_backing_object->generation++; 1543 object->backing_object_offset += 1544 backing_object->backing_object_offset; 1545 } 1546 1547 /* 1548 * Drop the reference count on backing_object. Since 1549 * its ref_count was at least 2, it will not vanish; 1550 * so we don't need to call vm_object_deallocate, but 1551 * we do anyway. 1552 */ 1553 vm_object_deallocate(backing_object); 1554 object_bypasses++; 1555 } 1556 1557 /* 1558 * Try again with this object's new backing object. 1559 */ 1560 } 1561 } 1562 1563 /* 1564 * vm_object_page_remove: [internal] 1565 * 1566 * Removes all physical pages in the specified 1567 * object range from the object's list of pages. 1568 */ 1569 void 1570 vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, 1571 boolean_t clean_only) 1572 { 1573 vm_page_t p, next; 1574 unsigned int size; 1575 int all; 1576 int s; 1577 1578 if (object == NULL || object->resident_page_count == 0) 1579 return; 1580 1581 all = ((end == 0) && (start == 0)); 1582 1583 /* 1584 * Since physically-backed objects do not use managed pages, we can't 1585 * remove pages from the object (we must instead remove the page 1586 * references, and then destroy the object). 1587 */ 1588 KASSERT(object->type != OBJT_PHYS, 1589 ("attempt to remove pages from a physical object")); 1590 1591 /* 1592 * Indicating that the object is undergoing paging. 1593 * 1594 * spl protection is required to avoid a race between the memq scan, 1595 * an interrupt unbusy/free, and the busy check. 1596 */ 1597 vm_object_pip_add(object, 1); 1598 s = splvm(); 1599 again: 1600 size = end - start; 1601 if (all || size > object->resident_page_count / 4) { 1602 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 1603 next = TAILQ_NEXT(p, listq); 1604 if (all || ((start <= p->pindex) && (p->pindex < end))) { 1605 if (p->wire_count != 0) { 1606 vm_page_protect(p, VM_PROT_NONE); 1607 if (!clean_only) 1608 p->valid = 0; 1609 continue; 1610 } 1611 1612 /* 1613 * The busy flags are only cleared at 1614 * interrupt -- minimize the spl transitions 1615 */ 1616 1617 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1618 goto again; 1619 1620 if (clean_only && p->valid) { 1621 vm_page_test_dirty(p); 1622 if (p->valid & p->dirty) 1623 continue; 1624 } 1625 1626 vm_page_busy(p); 1627 vm_page_protect(p, VM_PROT_NONE); 1628 vm_page_free(p); 1629 } 1630 } 1631 } else { 1632 while (size > 0) { 1633 if ((p = vm_page_lookup(object, start)) != 0) { 1634 if (p->wire_count != 0) { 1635 vm_page_protect(p, VM_PROT_NONE); 1636 if (!clean_only) 1637 p->valid = 0; 1638 start += 1; 1639 size -= 1; 1640 continue; 1641 } 1642 1643 /* 1644 * The busy flags are only cleared at 1645 * interrupt -- minimize the spl transitions 1646 */ 1647 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1648 goto again; 1649 1650 if (clean_only && p->valid) { 1651 vm_page_test_dirty(p); 1652 if (p->valid & p->dirty) { 1653 start += 1; 1654 size -= 1; 1655 continue; 1656 } 1657 } 1658 1659 vm_page_busy(p); 1660 vm_page_protect(p, VM_PROT_NONE); 1661 vm_page_free(p); 1662 } 1663 start += 1; 1664 size -= 1; 1665 } 1666 } 1667 splx(s); 1668 vm_object_pip_wakeup(object); 1669 } 1670 1671 /* 1672 * Routine: vm_object_coalesce 1673 * Function: Coalesces two objects backing up adjoining 1674 * regions of memory into a single object. 1675 * 1676 * returns TRUE if objects were combined. 1677 * 1678 * NOTE: Only works at the moment if the second object is NULL - 1679 * if it's not, which object do we lock first? 1680 * 1681 * Parameters: 1682 * prev_object First object to coalesce 1683 * prev_offset Offset into prev_object 1684 * next_object Second object into coalesce 1685 * next_offset Offset into next_object 1686 * 1687 * prev_size Size of reference to prev_object 1688 * next_size Size of reference to next_object 1689 * 1690 * Conditions: 1691 * The object must *not* be locked. 1692 */ 1693 boolean_t 1694 vm_object_coalesce(vm_object_t prev_object, vm_pindex_t prev_pindex, 1695 vm_size_t prev_size, vm_size_t next_size) 1696 { 1697 vm_pindex_t next_pindex; 1698 1699 if (prev_object == NULL) { 1700 return (TRUE); 1701 } 1702 1703 if (prev_object->type != OBJT_DEFAULT && 1704 prev_object->type != OBJT_SWAP) { 1705 return (FALSE); 1706 } 1707 1708 /* 1709 * Try to collapse the object first 1710 */ 1711 vm_object_collapse(prev_object); 1712 1713 /* 1714 * Can't coalesce if: . more than one reference . paged out . shadows 1715 * another object . has a copy elsewhere (any of which mean that the 1716 * pages not mapped to prev_entry may be in use anyway) 1717 */ 1718 1719 if (prev_object->backing_object != NULL) { 1720 return (FALSE); 1721 } 1722 1723 prev_size >>= PAGE_SHIFT; 1724 next_size >>= PAGE_SHIFT; 1725 next_pindex = prev_pindex + prev_size; 1726 1727 if ((prev_object->ref_count > 1) && 1728 (prev_object->size != next_pindex)) { 1729 return (FALSE); 1730 } 1731 1732 /* 1733 * Remove any pages that may still be in the object from a previous 1734 * deallocation. 1735 */ 1736 if (next_pindex < prev_object->size) { 1737 vm_object_page_remove(prev_object, 1738 next_pindex, 1739 next_pindex + next_size, FALSE); 1740 if (prev_object->type == OBJT_SWAP) 1741 swap_pager_freespace(prev_object, 1742 next_pindex, next_size); 1743 } 1744 1745 /* 1746 * Extend the object if necessary. 1747 */ 1748 if (next_pindex + next_size > prev_object->size) 1749 prev_object->size = next_pindex + next_size; 1750 1751 return (TRUE); 1752 } 1753 1754 void 1755 vm_object_set_writeable_dirty(vm_object_t object) 1756 { 1757 struct vnode *vp; 1758 lwkt_tokref vlock; 1759 1760 vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 1761 if (object->type == OBJT_VNODE && 1762 (vp = (struct vnode *)object->handle) != NULL) { 1763 if ((vp->v_flag & VOBJDIRTY) == 0) { 1764 lwkt_gettoken(&vlock, vp->v_interlock); 1765 vp->v_flag |= VOBJDIRTY; 1766 lwkt_reltoken(&vlock); 1767 } 1768 } 1769 } 1770 1771 1772 1773 #include "opt_ddb.h" 1774 #ifdef DDB 1775 #include <sys/kernel.h> 1776 1777 #include <sys/cons.h> 1778 1779 #include <ddb/ddb.h> 1780 1781 static int _vm_object_in_map (vm_map_t map, vm_object_t object, 1782 vm_map_entry_t entry); 1783 static int vm_object_in_map (vm_object_t object); 1784 1785 static int 1786 _vm_object_in_map(vm_map_t map, vm_object_t object, vm_map_entry_t entry) 1787 { 1788 vm_map_t tmpm; 1789 vm_map_entry_t tmpe; 1790 vm_object_t obj; 1791 int entcount; 1792 1793 if (map == 0) 1794 return 0; 1795 1796 if (entry == 0) { 1797 tmpe = map->header.next; 1798 entcount = map->nentries; 1799 while (entcount-- && (tmpe != &map->header)) { 1800 if( _vm_object_in_map(map, object, tmpe)) { 1801 return 1; 1802 } 1803 tmpe = tmpe->next; 1804 } 1805 } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 1806 tmpm = entry->object.sub_map; 1807 tmpe = tmpm->header.next; 1808 entcount = tmpm->nentries; 1809 while (entcount-- && tmpe != &tmpm->header) { 1810 if( _vm_object_in_map(tmpm, object, tmpe)) { 1811 return 1; 1812 } 1813 tmpe = tmpe->next; 1814 } 1815 } else if ((obj = entry->object.vm_object) != NULL) { 1816 for(; obj; obj=obj->backing_object) 1817 if( obj == object) { 1818 return 1; 1819 } 1820 } 1821 return 0; 1822 } 1823 1824 static int 1825 vm_object_in_map(vm_object_t object) 1826 { 1827 struct proc *p; 1828 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1829 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1830 continue; 1831 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1832 return 1; 1833 } 1834 if( _vm_object_in_map( kernel_map, object, 0)) 1835 return 1; 1836 if( _vm_object_in_map( pager_map, object, 0)) 1837 return 1; 1838 if( _vm_object_in_map( buffer_map, object, 0)) 1839 return 1; 1840 return 0; 1841 } 1842 1843 DB_SHOW_COMMAND(vmochk, vm_object_check) 1844 { 1845 vm_object_t object; 1846 1847 /* 1848 * make sure that internal objs are in a map somewhere 1849 * and none have zero ref counts. 1850 */ 1851 for (object = TAILQ_FIRST(&vm_object_list); 1852 object != NULL; 1853 object = TAILQ_NEXT(object, object_list)) { 1854 if (object->handle == NULL && 1855 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1856 if (object->ref_count == 0) { 1857 db_printf("vmochk: internal obj has zero ref count: %ld\n", 1858 (long)object->size); 1859 } 1860 if (!vm_object_in_map(object)) { 1861 db_printf( 1862 "vmochk: internal obj is not in a map: " 1863 "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", 1864 object->ref_count, (u_long)object->size, 1865 (u_long)object->size, 1866 (void *)object->backing_object); 1867 } 1868 } 1869 } 1870 } 1871 1872 /* 1873 * vm_object_print: [ debug ] 1874 */ 1875 DB_SHOW_COMMAND(object, vm_object_print_static) 1876 { 1877 /* XXX convert args. */ 1878 vm_object_t object = (vm_object_t)addr; 1879 boolean_t full = have_addr; 1880 1881 vm_page_t p; 1882 1883 /* XXX count is an (unused) arg. Avoid shadowing it. */ 1884 #define count was_count 1885 1886 int count; 1887 1888 if (object == NULL) 1889 return; 1890 1891 db_iprintf( 1892 "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n", 1893 object, (int)object->type, (u_long)object->size, 1894 object->resident_page_count, object->ref_count, object->flags); 1895 /* 1896 * XXX no %qd in kernel. Truncate object->backing_object_offset. 1897 */ 1898 db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n", 1899 object->shadow_count, 1900 object->backing_object ? object->backing_object->ref_count : 0, 1901 object->backing_object, (long)object->backing_object_offset); 1902 1903 if (!full) 1904 return; 1905 1906 db_indent += 2; 1907 count = 0; 1908 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { 1909 if (count == 0) 1910 db_iprintf("memory:="); 1911 else if (count == 6) { 1912 db_printf("\n"); 1913 db_iprintf(" ..."); 1914 count = 0; 1915 } else 1916 db_printf(","); 1917 count++; 1918 1919 db_printf("(off=0x%lx,page=0x%lx)", 1920 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1921 } 1922 if (count != 0) 1923 db_printf("\n"); 1924 db_indent -= 2; 1925 } 1926 1927 /* XXX. */ 1928 #undef count 1929 1930 /* XXX need this non-static entry for calling from vm_map_print. */ 1931 void 1932 vm_object_print(/* db_expr_t */ long addr, 1933 boolean_t have_addr, 1934 /* db_expr_t */ long count, 1935 char *modif) 1936 { 1937 vm_object_print_static(addr, have_addr, count, modif); 1938 } 1939 1940 DB_SHOW_COMMAND(vmopag, vm_object_print_pages) 1941 { 1942 vm_object_t object; 1943 int nl = 0; 1944 int c; 1945 for (object = TAILQ_FIRST(&vm_object_list); 1946 object != NULL; 1947 object = TAILQ_NEXT(object, object_list)) { 1948 vm_pindex_t idx, fidx; 1949 vm_pindex_t osize; 1950 vm_paddr_t pa = -1, padiff; 1951 int rcount; 1952 vm_page_t m; 1953 1954 db_printf("new object: %p\n", (void *)object); 1955 if ( nl > 18) { 1956 c = cngetc(); 1957 if (c != ' ') 1958 return; 1959 nl = 0; 1960 } 1961 nl++; 1962 rcount = 0; 1963 fidx = 0; 1964 osize = object->size; 1965 if (osize > 128) 1966 osize = 128; 1967 for (idx = 0; idx < osize; idx++) { 1968 m = vm_page_lookup(object, idx); 1969 if (m == NULL) { 1970 if (rcount) { 1971 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 1972 (long)fidx, rcount, (long)pa); 1973 if ( nl > 18) { 1974 c = cngetc(); 1975 if (c != ' ') 1976 return; 1977 nl = 0; 1978 } 1979 nl++; 1980 rcount = 0; 1981 } 1982 continue; 1983 } 1984 1985 1986 if (rcount && 1987 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { 1988 ++rcount; 1989 continue; 1990 } 1991 if (rcount) { 1992 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m); 1993 padiff >>= PAGE_SHIFT; 1994 padiff &= PQ_L2_MASK; 1995 if (padiff == 0) { 1996 pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE; 1997 ++rcount; 1998 continue; 1999 } 2000 db_printf(" index(%ld)run(%d)pa(0x%lx)", 2001 (long)fidx, rcount, (long)pa); 2002 db_printf("pd(%ld)\n", (long)padiff); 2003 if ( nl > 18) { 2004 c = cngetc(); 2005 if (c != ' ') 2006 return; 2007 nl = 0; 2008 } 2009 nl++; 2010 } 2011 fidx = idx; 2012 pa = VM_PAGE_TO_PHYS(m); 2013 rcount = 1; 2014 } 2015 if (rcount) { 2016 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 2017 (long)fidx, rcount, (long)pa); 2018 if ( nl > 18) { 2019 c = cngetc(); 2020 if (c != ' ') 2021 return; 2022 nl = 0; 2023 } 2024 nl++; 2025 } 2026 } 2027 } 2028 #endif /* DDB */ 2029