1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_object.c 8.5 (Berkeley) 3/22/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $FreeBSD: src/sys/vm/vm_object.c,v 1.171.2.8 2003/05/26 19:17:56 alc Exp $ 65 * $DragonFly: src/sys/vm/vm_object.c,v 1.2 2003/06/17 04:29:00 dillon Exp $ 66 */ 67 68 /* 69 * Virtual memory object module. 70 */ 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> /* for curproc, pageproc */ 75 #include <sys/vnode.h> 76 #include <sys/vmmeter.h> 77 #include <sys/mman.h> 78 #include <sys/mount.h> 79 #include <sys/kernel.h> 80 #include <sys/sysctl.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/pmap.h> 85 #include <vm/vm_map.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/vm_pageout.h> 89 #include <vm/vm_pager.h> 90 #include <vm/swap_pager.h> 91 #include <vm/vm_kern.h> 92 #include <vm/vm_extern.h> 93 #include <vm/vm_zone.h> 94 95 #define EASY_SCAN_FACTOR 8 96 97 #define MSYNC_FLUSH_HARDSEQ 0x01 98 #define MSYNC_FLUSH_SOFTSEQ 0x02 99 100 static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ; 101 SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags, 102 CTLFLAG_RW, &msync_flush_flags, 0, ""); 103 104 static void vm_object_qcollapse (vm_object_t object); 105 static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags); 106 107 /* 108 * Virtual memory objects maintain the actual data 109 * associated with allocated virtual memory. A given 110 * page of memory exists within exactly one object. 111 * 112 * An object is only deallocated when all "references" 113 * are given up. Only one "reference" to a given 114 * region of an object should be writeable. 115 * 116 * Associated with each object is a list of all resident 117 * memory pages belonging to that object; this list is 118 * maintained by the "vm_page" module, and locked by the object's 119 * lock. 120 * 121 * Each object also records a "pager" routine which is 122 * used to retrieve (and store) pages to the proper backing 123 * storage. In addition, objects may be backed by other 124 * objects from which they were virtual-copied. 125 * 126 * The only items within the object structure which are 127 * modified after time of creation are: 128 * reference count locked by object's lock 129 * pager routine locked by object's lock 130 * 131 */ 132 133 struct object_q vm_object_list; 134 #ifndef NULL_SIMPLELOCKS 135 static struct simplelock vm_object_list_lock; 136 #endif 137 static long vm_object_count; /* count of all objects */ 138 vm_object_t kernel_object; 139 vm_object_t kmem_object; 140 static struct vm_object kernel_object_store; 141 static struct vm_object kmem_object_store; 142 extern int vm_pageout_page_count; 143 144 static long object_collapses; 145 static long object_bypasses; 146 static int next_index; 147 static vm_zone_t obj_zone; 148 static struct vm_zone obj_zone_store; 149 static int object_hash_rand; 150 #define VM_OBJECTS_INIT 256 151 static struct vm_object vm_objects_init[VM_OBJECTS_INIT]; 152 153 void 154 _vm_object_allocate(type, size, object) 155 objtype_t type; 156 vm_size_t size; 157 vm_object_t object; 158 { 159 int incr; 160 TAILQ_INIT(&object->memq); 161 LIST_INIT(&object->shadow_head); 162 163 object->type = type; 164 object->size = size; 165 object->ref_count = 1; 166 object->flags = 0; 167 if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP)) 168 vm_object_set_flag(object, OBJ_ONEMAPPING); 169 object->paging_in_progress = 0; 170 object->resident_page_count = 0; 171 object->shadow_count = 0; 172 object->pg_color = next_index; 173 if ( size > (PQ_L2_SIZE / 3 + PQ_PRIME1)) 174 incr = PQ_L2_SIZE / 3 + PQ_PRIME1; 175 else 176 incr = size; 177 next_index = (next_index + incr) & PQ_L2_MASK; 178 object->handle = NULL; 179 object->backing_object = NULL; 180 object->backing_object_offset = (vm_ooffset_t) 0; 181 /* 182 * Try to generate a number that will spread objects out in the 183 * hash table. We 'wipe' new objects across the hash in 128 page 184 * increments plus 1 more to offset it a little more by the time 185 * it wraps around. 186 */ 187 object->hash_rand = object_hash_rand - 129; 188 189 object->generation++; 190 191 TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); 192 vm_object_count++; 193 object_hash_rand = object->hash_rand; 194 } 195 196 /* 197 * vm_object_init: 198 * 199 * Initialize the VM objects module. 200 */ 201 void 202 vm_object_init() 203 { 204 TAILQ_INIT(&vm_object_list); 205 simple_lock_init(&vm_object_list_lock); 206 vm_object_count = 0; 207 208 kernel_object = &kernel_object_store; 209 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 210 kernel_object); 211 212 kmem_object = &kmem_object_store; 213 _vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS), 214 kmem_object); 215 216 obj_zone = &obj_zone_store; 217 zbootinit(obj_zone, "VM OBJECT", sizeof (struct vm_object), 218 vm_objects_init, VM_OBJECTS_INIT); 219 } 220 221 void 222 vm_object_init2() { 223 zinitna(obj_zone, NULL, NULL, 0, 0, 0, 1); 224 } 225 226 /* 227 * vm_object_allocate: 228 * 229 * Returns a new object with the given size. 230 */ 231 232 vm_object_t 233 vm_object_allocate(type, size) 234 objtype_t type; 235 vm_size_t size; 236 { 237 vm_object_t result; 238 239 result = (vm_object_t) zalloc(obj_zone); 240 241 _vm_object_allocate(type, size, result); 242 243 return (result); 244 } 245 246 247 /* 248 * vm_object_reference: 249 * 250 * Gets another reference to the given object. 251 */ 252 void 253 vm_object_reference(object) 254 vm_object_t object; 255 { 256 if (object == NULL) 257 return; 258 259 #if 0 260 /* object can be re-referenced during final cleaning */ 261 KASSERT(!(object->flags & OBJ_DEAD), 262 ("vm_object_reference: attempting to reference dead obj")); 263 #endif 264 265 object->ref_count++; 266 if (object->type == OBJT_VNODE) { 267 while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) { 268 printf("vm_object_reference: delay in getting object\n"); 269 } 270 } 271 } 272 273 void 274 vm_object_vndeallocate(object) 275 vm_object_t object; 276 { 277 struct vnode *vp = (struct vnode *) object->handle; 278 279 KASSERT(object->type == OBJT_VNODE, 280 ("vm_object_vndeallocate: not a vnode object")); 281 KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp")); 282 #ifdef INVARIANTS 283 if (object->ref_count == 0) { 284 vprint("vm_object_vndeallocate", vp); 285 panic("vm_object_vndeallocate: bad object reference count"); 286 } 287 #endif 288 289 object->ref_count--; 290 if (object->ref_count == 0) { 291 vp->v_flag &= ~VTEXT; 292 vm_object_clear_flag(object, OBJ_OPT); 293 } 294 vrele(vp); 295 } 296 297 /* 298 * vm_object_deallocate: 299 * 300 * Release a reference to the specified object, 301 * gained either through a vm_object_allocate 302 * or a vm_object_reference call. When all references 303 * are gone, storage associated with this object 304 * may be relinquished. 305 * 306 * No object may be locked. 307 */ 308 void 309 vm_object_deallocate(object) 310 vm_object_t object; 311 { 312 vm_object_t temp; 313 314 while (object != NULL) { 315 316 if (object->type == OBJT_VNODE) { 317 vm_object_vndeallocate(object); 318 return; 319 } 320 321 if (object->ref_count == 0) { 322 panic("vm_object_deallocate: object deallocated too many times: %d", object->type); 323 } else if (object->ref_count > 2) { 324 object->ref_count--; 325 return; 326 } 327 328 /* 329 * Here on ref_count of one or two, which are special cases for 330 * objects. 331 */ 332 if ((object->ref_count == 2) && (object->shadow_count == 0)) { 333 vm_object_set_flag(object, OBJ_ONEMAPPING); 334 object->ref_count--; 335 return; 336 } else if ((object->ref_count == 2) && (object->shadow_count == 1)) { 337 object->ref_count--; 338 if ((object->handle == NULL) && 339 (object->type == OBJT_DEFAULT || 340 object->type == OBJT_SWAP)) { 341 vm_object_t robject; 342 343 robject = LIST_FIRST(&object->shadow_head); 344 KASSERT(robject != NULL, 345 ("vm_object_deallocate: ref_count: %d, shadow_count: %d", 346 object->ref_count, 347 object->shadow_count)); 348 if ((robject->handle == NULL) && 349 (robject->type == OBJT_DEFAULT || 350 robject->type == OBJT_SWAP)) { 351 352 robject->ref_count++; 353 354 while ( 355 robject->paging_in_progress || 356 object->paging_in_progress 357 ) { 358 vm_object_pip_sleep(robject, "objde1"); 359 vm_object_pip_sleep(object, "objde2"); 360 } 361 362 if (robject->ref_count == 1) { 363 robject->ref_count--; 364 object = robject; 365 goto doterm; 366 } 367 368 object = robject; 369 vm_object_collapse(object); 370 continue; 371 } 372 } 373 374 return; 375 376 } else { 377 object->ref_count--; 378 if (object->ref_count != 0) 379 return; 380 } 381 382 doterm: 383 384 temp = object->backing_object; 385 if (temp) { 386 LIST_REMOVE(object, shadow_list); 387 temp->shadow_count--; 388 if (temp->ref_count == 0) 389 vm_object_clear_flag(temp, OBJ_OPT); 390 temp->generation++; 391 object->backing_object = NULL; 392 } 393 394 /* 395 * Don't double-terminate, we could be in a termination 396 * recursion due to the terminate having to sync data 397 * to disk. 398 */ 399 if ((object->flags & OBJ_DEAD) == 0) 400 vm_object_terminate(object); 401 object = temp; 402 } 403 } 404 405 /* 406 * vm_object_terminate actually destroys the specified object, freeing 407 * up all previously used resources. 408 * 409 * The object must be locked. 410 * This routine may block. 411 */ 412 void 413 vm_object_terminate(object) 414 vm_object_t object; 415 { 416 vm_page_t p; 417 int s; 418 419 /* 420 * Make sure no one uses us. 421 */ 422 vm_object_set_flag(object, OBJ_DEAD); 423 424 /* 425 * wait for the pageout daemon to be done with the object 426 */ 427 vm_object_pip_wait(object, "objtrm"); 428 429 KASSERT(!object->paging_in_progress, 430 ("vm_object_terminate: pageout in progress")); 431 432 /* 433 * Clean and free the pages, as appropriate. All references to the 434 * object are gone, so we don't need to lock it. 435 */ 436 if (object->type == OBJT_VNODE) { 437 struct vnode *vp; 438 439 /* 440 * Freeze optimized copies. 441 */ 442 vm_freeze_copyopts(object, 0, object->size); 443 444 /* 445 * Clean pages and flush buffers. 446 */ 447 vm_object_page_clean(object, 0, 0, OBJPC_SYNC); 448 449 vp = (struct vnode *) object->handle; 450 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 451 } 452 453 /* 454 * Wait for any I/O to complete, after which there had better not 455 * be any references left on the object. 456 */ 457 vm_object_pip_wait(object, "objtrm"); 458 459 if (object->ref_count != 0) 460 panic("vm_object_terminate: object with references, ref_count=%d", object->ref_count); 461 462 /* 463 * Now free any remaining pages. For internal objects, this also 464 * removes them from paging queues. Don't free wired pages, just 465 * remove them from the object. 466 */ 467 s = splvm(); 468 while ((p = TAILQ_FIRST(&object->memq)) != NULL) { 469 if (p->busy || (p->flags & PG_BUSY)) 470 panic("vm_object_terminate: freeing busy page %p\n", p); 471 if (p->wire_count == 0) { 472 vm_page_busy(p); 473 vm_page_free(p); 474 cnt.v_pfree++; 475 } else { 476 vm_page_busy(p); 477 vm_page_remove(p); 478 } 479 } 480 splx(s); 481 482 /* 483 * Let the pager know object is dead. 484 */ 485 vm_pager_deallocate(object); 486 487 /* 488 * Remove the object from the global object list. 489 */ 490 simple_lock(&vm_object_list_lock); 491 TAILQ_REMOVE(&vm_object_list, object, object_list); 492 simple_unlock(&vm_object_list_lock); 493 494 wakeup(object); 495 496 /* 497 * Free the space for the object. 498 */ 499 zfree(obj_zone, object); 500 } 501 502 /* 503 * vm_object_page_clean 504 * 505 * Clean all dirty pages in the specified range of object. Leaves page 506 * on whatever queue it is currently on. If NOSYNC is set then do not 507 * write out pages with PG_NOSYNC set (originally comes from MAP_NOSYNC), 508 * leaving the object dirty. 509 * 510 * When stuffing pages asynchronously, allow clustering. XXX we need a 511 * synchronous clustering mode implementation. 512 * 513 * Odd semantics: if start == end, we clean everything. 514 * 515 * The object must be locked. 516 */ 517 518 void 519 vm_object_page_clean(object, start, end, flags) 520 vm_object_t object; 521 vm_pindex_t start; 522 vm_pindex_t end; 523 int flags; 524 { 525 vm_page_t p, np; 526 vm_offset_t tstart, tend; 527 vm_pindex_t pi; 528 struct vnode *vp; 529 int clearobjflags; 530 int pagerflags; 531 int curgeneration; 532 533 if (object->type != OBJT_VNODE || 534 (object->flags & OBJ_MIGHTBEDIRTY) == 0) 535 return; 536 537 pagerflags = (flags & (OBJPC_SYNC | OBJPC_INVAL)) ? VM_PAGER_PUT_SYNC : VM_PAGER_CLUSTER_OK; 538 pagerflags |= (flags & OBJPC_INVAL) ? VM_PAGER_PUT_INVAL : 0; 539 540 vp = object->handle; 541 542 vm_object_set_flag(object, OBJ_CLEANING); 543 544 /* 545 * Handle 'entire object' case 546 */ 547 tstart = start; 548 if (end == 0) { 549 tend = object->size; 550 } else { 551 tend = end; 552 } 553 554 /* 555 * If the caller is smart and only msync()s a range he knows is 556 * dirty, we may be able to avoid an object scan. This results in 557 * a phenominal improvement in performance. We cannot do this 558 * as a matter of course because the object may be huge - e.g. 559 * the size might be in the gigabytes or terrabytes. 560 */ 561 if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) { 562 vm_offset_t tscan; 563 int scanlimit; 564 int scanreset; 565 566 scanreset = object->resident_page_count / EASY_SCAN_FACTOR; 567 if (scanreset < 16) 568 scanreset = 16; 569 pagerflags |= VM_PAGER_IGNORE_CLEANCHK; 570 571 scanlimit = scanreset; 572 tscan = tstart; 573 while (tscan < tend) { 574 curgeneration = object->generation; 575 p = vm_page_lookup(object, tscan); 576 if (p == NULL || p->valid == 0 || 577 (p->queue - p->pc) == PQ_CACHE) { 578 if (--scanlimit == 0) 579 break; 580 ++tscan; 581 continue; 582 } 583 vm_page_test_dirty(p); 584 if ((p->dirty & p->valid) == 0) { 585 if (--scanlimit == 0) 586 break; 587 ++tscan; 588 continue; 589 } 590 /* 591 * If we have been asked to skip nosync pages and 592 * this is a nosync page, we can't continue. 593 */ 594 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { 595 if (--scanlimit == 0) 596 break; 597 ++tscan; 598 continue; 599 } 600 scanlimit = scanreset; 601 602 /* 603 * This returns 0 if it was unable to busy the first 604 * page (i.e. had to sleep). 605 */ 606 tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags); 607 } 608 609 /* 610 * If everything was dirty and we flushed it successfully, 611 * and the requested range is not the entire object, we 612 * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can 613 * return immediately. 614 */ 615 if (tscan >= tend && (tstart || tend < object->size)) { 616 vm_object_clear_flag(object, OBJ_CLEANING); 617 return; 618 } 619 pagerflags &= ~VM_PAGER_IGNORE_CLEANCHK; 620 } 621 622 /* 623 * Generally set CLEANCHK interlock and make the page read-only so 624 * we can then clear the object flags. 625 * 626 * However, if this is a nosync mmap then the object is likely to 627 * stay dirty so do not mess with the page and do not clear the 628 * object flags. 629 */ 630 631 clearobjflags = 1; 632 633 for(p = TAILQ_FIRST(&object->memq); p; p = TAILQ_NEXT(p, listq)) { 634 vm_page_flag_set(p, PG_CLEANCHK); 635 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) 636 clearobjflags = 0; 637 else 638 vm_page_protect(p, VM_PROT_READ); 639 } 640 641 if (clearobjflags && (tstart == 0) && (tend == object->size)) { 642 struct vnode *vp; 643 644 vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 645 if (object->type == OBJT_VNODE && 646 (vp = (struct vnode *)object->handle) != NULL) { 647 if (vp->v_flag & VOBJDIRTY) { 648 simple_lock(&vp->v_interlock); 649 vp->v_flag &= ~VOBJDIRTY; 650 simple_unlock(&vp->v_interlock); 651 } 652 } 653 } 654 655 rescan: 656 curgeneration = object->generation; 657 658 for(p = TAILQ_FIRST(&object->memq); p; p = np) { 659 int n; 660 661 np = TAILQ_NEXT(p, listq); 662 663 again: 664 pi = p->pindex; 665 if (((p->flags & PG_CLEANCHK) == 0) || 666 (pi < tstart) || (pi >= tend) || 667 (p->valid == 0) || 668 ((p->queue - p->pc) == PQ_CACHE)) { 669 vm_page_flag_clear(p, PG_CLEANCHK); 670 continue; 671 } 672 673 vm_page_test_dirty(p); 674 if ((p->dirty & p->valid) == 0) { 675 vm_page_flag_clear(p, PG_CLEANCHK); 676 continue; 677 } 678 679 /* 680 * If we have been asked to skip nosync pages and this is a 681 * nosync page, skip it. Note that the object flags were 682 * not cleared in this case so we do not have to set them. 683 */ 684 if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { 685 vm_page_flag_clear(p, PG_CLEANCHK); 686 continue; 687 } 688 689 n = vm_object_page_collect_flush(object, p, 690 curgeneration, pagerflags); 691 if (n == 0) 692 goto rescan; 693 if (object->generation != curgeneration) 694 goto rescan; 695 696 /* 697 * Try to optimize the next page. If we can't we pick up 698 * our (random) scan where we left off. 699 */ 700 if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { 701 if ((p = vm_page_lookup(object, pi + n)) != NULL) 702 goto again; 703 } 704 } 705 706 #if 0 707 VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); 708 #endif 709 710 vm_object_clear_flag(object, OBJ_CLEANING); 711 return; 712 } 713 714 static int 715 vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags) 716 { 717 int runlen; 718 int s; 719 int maxf; 720 int chkb; 721 int maxb; 722 int i; 723 vm_pindex_t pi; 724 vm_page_t maf[vm_pageout_page_count]; 725 vm_page_t mab[vm_pageout_page_count]; 726 vm_page_t ma[vm_pageout_page_count]; 727 728 s = splvm(); 729 pi = p->pindex; 730 while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { 731 if (object->generation != curgeneration) { 732 splx(s); 733 return(0); 734 } 735 } 736 737 maxf = 0; 738 for(i = 1; i < vm_pageout_page_count; i++) { 739 vm_page_t tp; 740 741 if ((tp = vm_page_lookup(object, pi + i)) != NULL) { 742 if ((tp->flags & PG_BUSY) || 743 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 744 (tp->flags & PG_CLEANCHK) == 0) || 745 (tp->busy != 0)) 746 break; 747 if((tp->queue - tp->pc) == PQ_CACHE) { 748 vm_page_flag_clear(tp, PG_CLEANCHK); 749 break; 750 } 751 vm_page_test_dirty(tp); 752 if ((tp->dirty & tp->valid) == 0) { 753 vm_page_flag_clear(tp, PG_CLEANCHK); 754 break; 755 } 756 maf[ i - 1 ] = tp; 757 maxf++; 758 continue; 759 } 760 break; 761 } 762 763 maxb = 0; 764 chkb = vm_pageout_page_count - maxf; 765 if (chkb) { 766 for(i = 1; i < chkb;i++) { 767 vm_page_t tp; 768 769 if ((tp = vm_page_lookup(object, pi - i)) != NULL) { 770 if ((tp->flags & PG_BUSY) || 771 ((pagerflags & VM_PAGER_IGNORE_CLEANCHK) == 0 && 772 (tp->flags & PG_CLEANCHK) == 0) || 773 (tp->busy != 0)) 774 break; 775 if((tp->queue - tp->pc) == PQ_CACHE) { 776 vm_page_flag_clear(tp, PG_CLEANCHK); 777 break; 778 } 779 vm_page_test_dirty(tp); 780 if ((tp->dirty & tp->valid) == 0) { 781 vm_page_flag_clear(tp, PG_CLEANCHK); 782 break; 783 } 784 mab[ i - 1 ] = tp; 785 maxb++; 786 continue; 787 } 788 break; 789 } 790 } 791 792 for(i = 0; i < maxb; i++) { 793 int index = (maxb - i) - 1; 794 ma[index] = mab[i]; 795 vm_page_flag_clear(ma[index], PG_CLEANCHK); 796 } 797 vm_page_flag_clear(p, PG_CLEANCHK); 798 ma[maxb] = p; 799 for(i = 0; i < maxf; i++) { 800 int index = (maxb + i) + 1; 801 ma[index] = maf[i]; 802 vm_page_flag_clear(ma[index], PG_CLEANCHK); 803 } 804 runlen = maxb + maxf + 1; 805 806 splx(s); 807 vm_pageout_flush(ma, runlen, pagerflags); 808 for (i = 0; i < runlen; i++) { 809 if (ma[i]->valid & ma[i]->dirty) { 810 vm_page_protect(ma[i], VM_PROT_READ); 811 vm_page_flag_set(ma[i], PG_CLEANCHK); 812 813 /* 814 * maxf will end up being the actual number of pages 815 * we wrote out contiguously, non-inclusive of the 816 * first page. We do not count look-behind pages. 817 */ 818 if (i >= maxb + 1 && (maxf > i - maxb - 1)) 819 maxf = i - maxb - 1; 820 } 821 } 822 return(maxf + 1); 823 } 824 825 #ifdef not_used 826 /* XXX I cannot tell if this should be an exported symbol */ 827 /* 828 * vm_object_deactivate_pages 829 * 830 * Deactivate all pages in the specified object. (Keep its pages 831 * in memory even though it is no longer referenced.) 832 * 833 * The object must be locked. 834 */ 835 static void 836 vm_object_deactivate_pages(object) 837 vm_object_t object; 838 { 839 vm_page_t p, next; 840 841 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 842 next = TAILQ_NEXT(p, listq); 843 vm_page_deactivate(p); 844 } 845 } 846 #endif 847 848 /* 849 * Same as vm_object_pmap_copy, except range checking really 850 * works, and is meant for small sections of an object. 851 * 852 * This code protects resident pages by making them read-only 853 * and is typically called on a fork or split when a page 854 * is converted to copy-on-write. 855 * 856 * NOTE: If the page is already at VM_PROT_NONE, calling 857 * vm_page_protect will have no effect. 858 */ 859 860 void 861 vm_object_pmap_copy_1(object, start, end) 862 vm_object_t object; 863 vm_pindex_t start; 864 vm_pindex_t end; 865 { 866 vm_pindex_t idx; 867 vm_page_t p; 868 869 if (object == NULL || (object->flags & OBJ_WRITEABLE) == 0) 870 return; 871 872 for (idx = start; idx < end; idx++) { 873 p = vm_page_lookup(object, idx); 874 if (p == NULL) 875 continue; 876 vm_page_protect(p, VM_PROT_READ); 877 } 878 } 879 880 /* 881 * vm_object_pmap_remove: 882 * 883 * Removes all physical pages in the specified 884 * object range from all physical maps. 885 * 886 * The object must *not* be locked. 887 */ 888 void 889 vm_object_pmap_remove(object, start, end) 890 vm_object_t object; 891 vm_pindex_t start; 892 vm_pindex_t end; 893 { 894 vm_page_t p; 895 896 if (object == NULL) 897 return; 898 for (p = TAILQ_FIRST(&object->memq); 899 p != NULL; 900 p = TAILQ_NEXT(p, listq)) { 901 if (p->pindex >= start && p->pindex < end) 902 vm_page_protect(p, VM_PROT_NONE); 903 } 904 if ((start == 0) && (object->size == end)) 905 vm_object_clear_flag(object, OBJ_WRITEABLE); 906 } 907 908 /* 909 * vm_object_madvise: 910 * 911 * Implements the madvise function at the object/page level. 912 * 913 * MADV_WILLNEED (any object) 914 * 915 * Activate the specified pages if they are resident. 916 * 917 * MADV_DONTNEED (any object) 918 * 919 * Deactivate the specified pages if they are resident. 920 * 921 * MADV_FREE (OBJT_DEFAULT/OBJT_SWAP objects, 922 * OBJ_ONEMAPPING only) 923 * 924 * Deactivate and clean the specified pages if they are 925 * resident. This permits the process to reuse the pages 926 * without faulting or the kernel to reclaim the pages 927 * without I/O. 928 */ 929 void 930 vm_object_madvise(object, pindex, count, advise) 931 vm_object_t object; 932 vm_pindex_t pindex; 933 int count; 934 int advise; 935 { 936 vm_pindex_t end, tpindex; 937 vm_object_t tobject; 938 vm_page_t m; 939 940 if (object == NULL) 941 return; 942 943 end = pindex + count; 944 945 /* 946 * Locate and adjust resident pages 947 */ 948 949 for (; pindex < end; pindex += 1) { 950 relookup: 951 tobject = object; 952 tpindex = pindex; 953 shadowlookup: 954 /* 955 * MADV_FREE only operates on OBJT_DEFAULT or OBJT_SWAP pages 956 * and those pages must be OBJ_ONEMAPPING. 957 */ 958 if (advise == MADV_FREE) { 959 if ((tobject->type != OBJT_DEFAULT && 960 tobject->type != OBJT_SWAP) || 961 (tobject->flags & OBJ_ONEMAPPING) == 0) { 962 continue; 963 } 964 } 965 966 m = vm_page_lookup(tobject, tpindex); 967 968 if (m == NULL) { 969 /* 970 * There may be swap even if there is no backing page 971 */ 972 if (advise == MADV_FREE && tobject->type == OBJT_SWAP) 973 swap_pager_freespace(tobject, tpindex, 1); 974 975 /* 976 * next object 977 */ 978 tobject = tobject->backing_object; 979 if (tobject == NULL) 980 continue; 981 tpindex += OFF_TO_IDX(tobject->backing_object_offset); 982 goto shadowlookup; 983 } 984 985 /* 986 * If the page is busy or not in a normal active state, 987 * we skip it. If the page is not managed there are no 988 * page queues to mess with. Things can break if we mess 989 * with pages in any of the below states. 990 */ 991 if ( 992 m->hold_count || 993 m->wire_count || 994 (m->flags & PG_UNMANAGED) || 995 m->valid != VM_PAGE_BITS_ALL 996 ) { 997 continue; 998 } 999 1000 if (vm_page_sleep_busy(m, TRUE, "madvpo")) 1001 goto relookup; 1002 1003 if (advise == MADV_WILLNEED) { 1004 vm_page_activate(m); 1005 } else if (advise == MADV_DONTNEED) { 1006 vm_page_dontneed(m); 1007 } else if (advise == MADV_FREE) { 1008 /* 1009 * Mark the page clean. This will allow the page 1010 * to be freed up by the system. However, such pages 1011 * are often reused quickly by malloc()/free() 1012 * so we do not do anything that would cause 1013 * a page fault if we can help it. 1014 * 1015 * Specifically, we do not try to actually free 1016 * the page now nor do we try to put it in the 1017 * cache (which would cause a page fault on reuse). 1018 * 1019 * But we do make the page is freeable as we 1020 * can without actually taking the step of unmapping 1021 * it. 1022 */ 1023 pmap_clear_modify(m); 1024 m->dirty = 0; 1025 m->act_count = 0; 1026 vm_page_dontneed(m); 1027 if (tobject->type == OBJT_SWAP) 1028 swap_pager_freespace(tobject, tpindex, 1); 1029 } 1030 } 1031 } 1032 1033 /* 1034 * vm_object_shadow: 1035 * 1036 * Create a new object which is backed by the 1037 * specified existing object range. The source 1038 * object reference is deallocated. 1039 * 1040 * The new object and offset into that object 1041 * are returned in the source parameters. 1042 */ 1043 1044 void 1045 vm_object_shadow(object, offset, length) 1046 vm_object_t *object; /* IN/OUT */ 1047 vm_ooffset_t *offset; /* IN/OUT */ 1048 vm_size_t length; 1049 { 1050 vm_object_t source; 1051 vm_object_t result; 1052 1053 source = *object; 1054 1055 /* 1056 * Don't create the new object if the old object isn't shared. 1057 */ 1058 1059 if (source != NULL && 1060 source->ref_count == 1 && 1061 source->handle == NULL && 1062 (source->type == OBJT_DEFAULT || 1063 source->type == OBJT_SWAP)) 1064 return; 1065 1066 /* 1067 * Allocate a new object with the given length 1068 */ 1069 1070 if ((result = vm_object_allocate(OBJT_DEFAULT, length)) == NULL) 1071 panic("vm_object_shadow: no object for shadowing"); 1072 1073 /* 1074 * The new object shadows the source object, adding a reference to it. 1075 * Our caller changes his reference to point to the new object, 1076 * removing a reference to the source object. Net result: no change 1077 * of reference count. 1078 * 1079 * Try to optimize the result object's page color when shadowing 1080 * in order to maintain page coloring consistency in the combined 1081 * shadowed object. 1082 */ 1083 result->backing_object = source; 1084 if (source) { 1085 LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list); 1086 source->shadow_count++; 1087 source->generation++; 1088 result->pg_color = (source->pg_color + OFF_TO_IDX(*offset)) & PQ_L2_MASK; 1089 } 1090 1091 /* 1092 * Store the offset into the source object, and fix up the offset into 1093 * the new object. 1094 */ 1095 1096 result->backing_object_offset = *offset; 1097 1098 /* 1099 * Return the new things 1100 */ 1101 1102 *offset = 0; 1103 *object = result; 1104 } 1105 1106 #define OBSC_TEST_ALL_SHADOWED 0x0001 1107 #define OBSC_COLLAPSE_NOWAIT 0x0002 1108 #define OBSC_COLLAPSE_WAIT 0x0004 1109 1110 static __inline int 1111 vm_object_backing_scan(vm_object_t object, int op) 1112 { 1113 int s; 1114 int r = 1; 1115 vm_page_t p; 1116 vm_object_t backing_object; 1117 vm_pindex_t backing_offset_index; 1118 1119 s = splvm(); 1120 1121 backing_object = object->backing_object; 1122 backing_offset_index = OFF_TO_IDX(object->backing_object_offset); 1123 1124 /* 1125 * Initial conditions 1126 */ 1127 1128 if (op & OBSC_TEST_ALL_SHADOWED) { 1129 /* 1130 * We do not want to have to test for the existence of 1131 * swap pages in the backing object. XXX but with the 1132 * new swapper this would be pretty easy to do. 1133 * 1134 * XXX what about anonymous MAP_SHARED memory that hasn't 1135 * been ZFOD faulted yet? If we do not test for this, the 1136 * shadow test may succeed! XXX 1137 */ 1138 if (backing_object->type != OBJT_DEFAULT) { 1139 splx(s); 1140 return(0); 1141 } 1142 } 1143 if (op & OBSC_COLLAPSE_WAIT) { 1144 vm_object_set_flag(backing_object, OBJ_DEAD); 1145 } 1146 1147 /* 1148 * Our scan 1149 */ 1150 1151 p = TAILQ_FIRST(&backing_object->memq); 1152 while (p) { 1153 vm_page_t next = TAILQ_NEXT(p, listq); 1154 vm_pindex_t new_pindex = p->pindex - backing_offset_index; 1155 1156 if (op & OBSC_TEST_ALL_SHADOWED) { 1157 vm_page_t pp; 1158 1159 /* 1160 * Ignore pages outside the parent object's range 1161 * and outside the parent object's mapping of the 1162 * backing object. 1163 * 1164 * note that we do not busy the backing object's 1165 * page. 1166 */ 1167 1168 if ( 1169 p->pindex < backing_offset_index || 1170 new_pindex >= object->size 1171 ) { 1172 p = next; 1173 continue; 1174 } 1175 1176 /* 1177 * See if the parent has the page or if the parent's 1178 * object pager has the page. If the parent has the 1179 * page but the page is not valid, the parent's 1180 * object pager must have the page. 1181 * 1182 * If this fails, the parent does not completely shadow 1183 * the object and we might as well give up now. 1184 */ 1185 1186 pp = vm_page_lookup(object, new_pindex); 1187 if ( 1188 (pp == NULL || pp->valid == 0) && 1189 !vm_pager_has_page(object, new_pindex, NULL, NULL) 1190 ) { 1191 r = 0; 1192 break; 1193 } 1194 } 1195 1196 /* 1197 * Check for busy page 1198 */ 1199 1200 if (op & (OBSC_COLLAPSE_WAIT | OBSC_COLLAPSE_NOWAIT)) { 1201 vm_page_t pp; 1202 1203 if (op & OBSC_COLLAPSE_NOWAIT) { 1204 if ( 1205 (p->flags & PG_BUSY) || 1206 !p->valid || 1207 p->hold_count || 1208 p->wire_count || 1209 p->busy 1210 ) { 1211 p = next; 1212 continue; 1213 } 1214 } else if (op & OBSC_COLLAPSE_WAIT) { 1215 if (vm_page_sleep_busy(p, TRUE, "vmocol")) { 1216 /* 1217 * If we slept, anything could have 1218 * happened. Since the object is 1219 * marked dead, the backing offset 1220 * should not have changed so we 1221 * just restart our scan. 1222 */ 1223 p = TAILQ_FIRST(&backing_object->memq); 1224 continue; 1225 } 1226 } 1227 1228 /* 1229 * Busy the page 1230 */ 1231 vm_page_busy(p); 1232 1233 KASSERT( 1234 p->object == backing_object, 1235 ("vm_object_qcollapse(): object mismatch") 1236 ); 1237 1238 /* 1239 * Destroy any associated swap 1240 */ 1241 if (backing_object->type == OBJT_SWAP) { 1242 swap_pager_freespace( 1243 backing_object, 1244 p->pindex, 1245 1 1246 ); 1247 } 1248 1249 if ( 1250 p->pindex < backing_offset_index || 1251 new_pindex >= object->size 1252 ) { 1253 /* 1254 * Page is out of the parent object's range, we 1255 * can simply destroy it. 1256 */ 1257 vm_page_protect(p, VM_PROT_NONE); 1258 vm_page_free(p); 1259 p = next; 1260 continue; 1261 } 1262 1263 pp = vm_page_lookup(object, new_pindex); 1264 if ( 1265 pp != NULL || 1266 vm_pager_has_page(object, new_pindex, NULL, NULL) 1267 ) { 1268 /* 1269 * page already exists in parent OR swap exists 1270 * for this location in the parent. Destroy 1271 * the original page from the backing object. 1272 * 1273 * Leave the parent's page alone 1274 */ 1275 vm_page_protect(p, VM_PROT_NONE); 1276 vm_page_free(p); 1277 p = next; 1278 continue; 1279 } 1280 1281 /* 1282 * Page does not exist in parent, rename the 1283 * page from the backing object to the main object. 1284 * 1285 * If the page was mapped to a process, it can remain 1286 * mapped through the rename. 1287 */ 1288 if ((p->queue - p->pc) == PQ_CACHE) 1289 vm_page_deactivate(p); 1290 1291 vm_page_rename(p, object, new_pindex); 1292 /* page automatically made dirty by rename */ 1293 } 1294 p = next; 1295 } 1296 splx(s); 1297 return(r); 1298 } 1299 1300 1301 /* 1302 * this version of collapse allows the operation to occur earlier and 1303 * when paging_in_progress is true for an object... This is not a complete 1304 * operation, but should plug 99.9% of the rest of the leaks. 1305 */ 1306 static void 1307 vm_object_qcollapse(object) 1308 vm_object_t object; 1309 { 1310 vm_object_t backing_object = object->backing_object; 1311 1312 if (backing_object->ref_count != 1) 1313 return; 1314 1315 backing_object->ref_count += 2; 1316 1317 vm_object_backing_scan(object, OBSC_COLLAPSE_NOWAIT); 1318 1319 backing_object->ref_count -= 2; 1320 } 1321 1322 /* 1323 * vm_object_collapse: 1324 * 1325 * Collapse an object with the object backing it. 1326 * Pages in the backing object are moved into the 1327 * parent, and the backing object is deallocated. 1328 */ 1329 void 1330 vm_object_collapse(object) 1331 vm_object_t object; 1332 { 1333 while (TRUE) { 1334 vm_object_t backing_object; 1335 1336 /* 1337 * Verify that the conditions are right for collapse: 1338 * 1339 * The object exists and the backing object exists. 1340 */ 1341 if (object == NULL) 1342 break; 1343 1344 if ((backing_object = object->backing_object) == NULL) 1345 break; 1346 1347 /* 1348 * we check the backing object first, because it is most likely 1349 * not collapsable. 1350 */ 1351 if (backing_object->handle != NULL || 1352 (backing_object->type != OBJT_DEFAULT && 1353 backing_object->type != OBJT_SWAP) || 1354 (backing_object->flags & OBJ_DEAD) || 1355 object->handle != NULL || 1356 (object->type != OBJT_DEFAULT && 1357 object->type != OBJT_SWAP) || 1358 (object->flags & OBJ_DEAD)) { 1359 break; 1360 } 1361 1362 if ( 1363 object->paging_in_progress != 0 || 1364 backing_object->paging_in_progress != 0 1365 ) { 1366 vm_object_qcollapse(object); 1367 break; 1368 } 1369 1370 /* 1371 * We know that we can either collapse the backing object (if 1372 * the parent is the only reference to it) or (perhaps) have 1373 * the parent bypass the object if the parent happens to shadow 1374 * all the resident pages in the entire backing object. 1375 * 1376 * This is ignoring pager-backed pages such as swap pages. 1377 * vm_object_backing_scan fails the shadowing test in this 1378 * case. 1379 */ 1380 1381 if (backing_object->ref_count == 1) { 1382 /* 1383 * If there is exactly one reference to the backing 1384 * object, we can collapse it into the parent. 1385 */ 1386 1387 vm_object_backing_scan(object, OBSC_COLLAPSE_WAIT); 1388 1389 /* 1390 * Move the pager from backing_object to object. 1391 */ 1392 1393 if (backing_object->type == OBJT_SWAP) { 1394 vm_object_pip_add(backing_object, 1); 1395 1396 /* 1397 * scrap the paging_offset junk and do a 1398 * discrete copy. This also removes major 1399 * assumptions about how the swap-pager 1400 * works from where it doesn't belong. The 1401 * new swapper is able to optimize the 1402 * destroy-source case. 1403 */ 1404 1405 vm_object_pip_add(object, 1); 1406 swap_pager_copy( 1407 backing_object, 1408 object, 1409 OFF_TO_IDX(object->backing_object_offset), TRUE); 1410 vm_object_pip_wakeup(object); 1411 1412 vm_object_pip_wakeup(backing_object); 1413 } 1414 /* 1415 * Object now shadows whatever backing_object did. 1416 * Note that the reference to 1417 * backing_object->backing_object moves from within 1418 * backing_object to within object. 1419 */ 1420 1421 LIST_REMOVE(object, shadow_list); 1422 object->backing_object->shadow_count--; 1423 object->backing_object->generation++; 1424 if (backing_object->backing_object) { 1425 LIST_REMOVE(backing_object, shadow_list); 1426 backing_object->backing_object->shadow_count--; 1427 backing_object->backing_object->generation++; 1428 } 1429 object->backing_object = backing_object->backing_object; 1430 if (object->backing_object) { 1431 LIST_INSERT_HEAD( 1432 &object->backing_object->shadow_head, 1433 object, 1434 shadow_list 1435 ); 1436 object->backing_object->shadow_count++; 1437 object->backing_object->generation++; 1438 } 1439 1440 object->backing_object_offset += 1441 backing_object->backing_object_offset; 1442 1443 /* 1444 * Discard backing_object. 1445 * 1446 * Since the backing object has no pages, no pager left, 1447 * and no object references within it, all that is 1448 * necessary is to dispose of it. 1449 */ 1450 1451 KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object)); 1452 KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object)); 1453 TAILQ_REMOVE( 1454 &vm_object_list, 1455 backing_object, 1456 object_list 1457 ); 1458 vm_object_count--; 1459 1460 zfree(obj_zone, backing_object); 1461 1462 object_collapses++; 1463 } else { 1464 vm_object_t new_backing_object; 1465 1466 /* 1467 * If we do not entirely shadow the backing object, 1468 * there is nothing we can do so we give up. 1469 */ 1470 1471 if (vm_object_backing_scan(object, OBSC_TEST_ALL_SHADOWED) == 0) { 1472 break; 1473 } 1474 1475 /* 1476 * Make the parent shadow the next object in the 1477 * chain. Deallocating backing_object will not remove 1478 * it, since its reference count is at least 2. 1479 */ 1480 1481 LIST_REMOVE(object, shadow_list); 1482 backing_object->shadow_count--; 1483 backing_object->generation++; 1484 1485 new_backing_object = backing_object->backing_object; 1486 if ((object->backing_object = new_backing_object) != NULL) { 1487 vm_object_reference(new_backing_object); 1488 LIST_INSERT_HEAD( 1489 &new_backing_object->shadow_head, 1490 object, 1491 shadow_list 1492 ); 1493 new_backing_object->shadow_count++; 1494 new_backing_object->generation++; 1495 object->backing_object_offset += 1496 backing_object->backing_object_offset; 1497 } 1498 1499 /* 1500 * Drop the reference count on backing_object. Since 1501 * its ref_count was at least 2, it will not vanish; 1502 * so we don't need to call vm_object_deallocate, but 1503 * we do anyway. 1504 */ 1505 vm_object_deallocate(backing_object); 1506 object_bypasses++; 1507 } 1508 1509 /* 1510 * Try again with this object's new backing object. 1511 */ 1512 } 1513 } 1514 1515 /* 1516 * vm_object_page_remove: [internal] 1517 * 1518 * Removes all physical pages in the specified 1519 * object range from the object's list of pages. 1520 * 1521 * The object must be locked. 1522 */ 1523 void 1524 vm_object_page_remove(object, start, end, clean_only) 1525 vm_object_t object; 1526 vm_pindex_t start; 1527 vm_pindex_t end; 1528 boolean_t clean_only; 1529 { 1530 vm_page_t p, next; 1531 unsigned int size; 1532 int all; 1533 1534 if (object == NULL || 1535 object->resident_page_count == 0) 1536 return; 1537 1538 all = ((end == 0) && (start == 0)); 1539 1540 /* 1541 * Since physically-backed objects do not use managed pages, we can't 1542 * remove pages from the object (we must instead remove the page 1543 * references, and then destroy the object). 1544 */ 1545 KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object")); 1546 1547 vm_object_pip_add(object, 1); 1548 again: 1549 size = end - start; 1550 if (all || size > object->resident_page_count / 4) { 1551 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { 1552 next = TAILQ_NEXT(p, listq); 1553 if (all || ((start <= p->pindex) && (p->pindex < end))) { 1554 if (p->wire_count != 0) { 1555 vm_page_protect(p, VM_PROT_NONE); 1556 if (!clean_only) 1557 p->valid = 0; 1558 continue; 1559 } 1560 1561 /* 1562 * The busy flags are only cleared at 1563 * interrupt -- minimize the spl transitions 1564 */ 1565 1566 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1567 goto again; 1568 1569 if (clean_only && p->valid) { 1570 vm_page_test_dirty(p); 1571 if (p->valid & p->dirty) 1572 continue; 1573 } 1574 1575 vm_page_busy(p); 1576 vm_page_protect(p, VM_PROT_NONE); 1577 vm_page_free(p); 1578 } 1579 } 1580 } else { 1581 while (size > 0) { 1582 if ((p = vm_page_lookup(object, start)) != 0) { 1583 1584 if (p->wire_count != 0) { 1585 vm_page_protect(p, VM_PROT_NONE); 1586 if (!clean_only) 1587 p->valid = 0; 1588 start += 1; 1589 size -= 1; 1590 continue; 1591 } 1592 1593 /* 1594 * The busy flags are only cleared at 1595 * interrupt -- minimize the spl transitions 1596 */ 1597 if (vm_page_sleep_busy(p, TRUE, "vmopar")) 1598 goto again; 1599 1600 if (clean_only && p->valid) { 1601 vm_page_test_dirty(p); 1602 if (p->valid & p->dirty) { 1603 start += 1; 1604 size -= 1; 1605 continue; 1606 } 1607 } 1608 1609 vm_page_busy(p); 1610 vm_page_protect(p, VM_PROT_NONE); 1611 vm_page_free(p); 1612 } 1613 start += 1; 1614 size -= 1; 1615 } 1616 } 1617 vm_object_pip_wakeup(object); 1618 } 1619 1620 /* 1621 * Routine: vm_object_coalesce 1622 * Function: Coalesces two objects backing up adjoining 1623 * regions of memory into a single object. 1624 * 1625 * returns TRUE if objects were combined. 1626 * 1627 * NOTE: Only works at the moment if the second object is NULL - 1628 * if it's not, which object do we lock first? 1629 * 1630 * Parameters: 1631 * prev_object First object to coalesce 1632 * prev_offset Offset into prev_object 1633 * next_object Second object into coalesce 1634 * next_offset Offset into next_object 1635 * 1636 * prev_size Size of reference to prev_object 1637 * next_size Size of reference to next_object 1638 * 1639 * Conditions: 1640 * The object must *not* be locked. 1641 */ 1642 boolean_t 1643 vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size) 1644 vm_object_t prev_object; 1645 vm_pindex_t prev_pindex; 1646 vm_size_t prev_size, next_size; 1647 { 1648 vm_pindex_t next_pindex; 1649 1650 if (prev_object == NULL) { 1651 return (TRUE); 1652 } 1653 1654 if (prev_object->type != OBJT_DEFAULT && 1655 prev_object->type != OBJT_SWAP) { 1656 return (FALSE); 1657 } 1658 1659 /* 1660 * Try to collapse the object first 1661 */ 1662 vm_object_collapse(prev_object); 1663 1664 /* 1665 * Can't coalesce if: . more than one reference . paged out . shadows 1666 * another object . has a copy elsewhere (any of which mean that the 1667 * pages not mapped to prev_entry may be in use anyway) 1668 */ 1669 1670 if (prev_object->backing_object != NULL) { 1671 return (FALSE); 1672 } 1673 1674 prev_size >>= PAGE_SHIFT; 1675 next_size >>= PAGE_SHIFT; 1676 next_pindex = prev_pindex + prev_size; 1677 1678 if ((prev_object->ref_count > 1) && 1679 (prev_object->size != next_pindex)) { 1680 return (FALSE); 1681 } 1682 1683 /* 1684 * Remove any pages that may still be in the object from a previous 1685 * deallocation. 1686 */ 1687 if (next_pindex < prev_object->size) { 1688 vm_object_page_remove(prev_object, 1689 next_pindex, 1690 next_pindex + next_size, FALSE); 1691 if (prev_object->type == OBJT_SWAP) 1692 swap_pager_freespace(prev_object, 1693 next_pindex, next_size); 1694 } 1695 1696 /* 1697 * Extend the object if necessary. 1698 */ 1699 if (next_pindex + next_size > prev_object->size) 1700 prev_object->size = next_pindex + next_size; 1701 1702 return (TRUE); 1703 } 1704 1705 void 1706 vm_object_set_writeable_dirty(vm_object_t object) 1707 { 1708 struct vnode *vp; 1709 1710 vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 1711 if (object->type == OBJT_VNODE && 1712 (vp = (struct vnode *)object->handle) != NULL) { 1713 if ((vp->v_flag & VOBJDIRTY) == 0) { 1714 simple_lock(&vp->v_interlock); 1715 vp->v_flag |= VOBJDIRTY; 1716 simple_unlock(&vp->v_interlock); 1717 } 1718 } 1719 } 1720 1721 1722 1723 #include "opt_ddb.h" 1724 #ifdef DDB 1725 #include <sys/kernel.h> 1726 1727 #include <sys/cons.h> 1728 1729 #include <ddb/ddb.h> 1730 1731 static int _vm_object_in_map __P((vm_map_t map, vm_object_t object, 1732 vm_map_entry_t entry)); 1733 static int vm_object_in_map __P((vm_object_t object)); 1734 1735 static int 1736 _vm_object_in_map(map, object, entry) 1737 vm_map_t map; 1738 vm_object_t object; 1739 vm_map_entry_t entry; 1740 { 1741 vm_map_t tmpm; 1742 vm_map_entry_t tmpe; 1743 vm_object_t obj; 1744 int entcount; 1745 1746 if (map == 0) 1747 return 0; 1748 1749 if (entry == 0) { 1750 tmpe = map->header.next; 1751 entcount = map->nentries; 1752 while (entcount-- && (tmpe != &map->header)) { 1753 if( _vm_object_in_map(map, object, tmpe)) { 1754 return 1; 1755 } 1756 tmpe = tmpe->next; 1757 } 1758 } else if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 1759 tmpm = entry->object.sub_map; 1760 tmpe = tmpm->header.next; 1761 entcount = tmpm->nentries; 1762 while (entcount-- && tmpe != &tmpm->header) { 1763 if( _vm_object_in_map(tmpm, object, tmpe)) { 1764 return 1; 1765 } 1766 tmpe = tmpe->next; 1767 } 1768 } else if ((obj = entry->object.vm_object) != NULL) { 1769 for(; obj; obj=obj->backing_object) 1770 if( obj == object) { 1771 return 1; 1772 } 1773 } 1774 return 0; 1775 } 1776 1777 static int 1778 vm_object_in_map( object) 1779 vm_object_t object; 1780 { 1781 struct proc *p; 1782 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 1783 if( !p->p_vmspace /* || (p->p_flag & (P_SYSTEM|P_WEXIT)) */) 1784 continue; 1785 if( _vm_object_in_map(&p->p_vmspace->vm_map, object, 0)) 1786 return 1; 1787 } 1788 if( _vm_object_in_map( kernel_map, object, 0)) 1789 return 1; 1790 if( _vm_object_in_map( kmem_map, object, 0)) 1791 return 1; 1792 if( _vm_object_in_map( pager_map, object, 0)) 1793 return 1; 1794 if( _vm_object_in_map( buffer_map, object, 0)) 1795 return 1; 1796 if( _vm_object_in_map( mb_map, object, 0)) 1797 return 1; 1798 return 0; 1799 } 1800 1801 DB_SHOW_COMMAND(vmochk, vm_object_check) 1802 { 1803 vm_object_t object; 1804 1805 /* 1806 * make sure that internal objs are in a map somewhere 1807 * and none have zero ref counts. 1808 */ 1809 for (object = TAILQ_FIRST(&vm_object_list); 1810 object != NULL; 1811 object = TAILQ_NEXT(object, object_list)) { 1812 if (object->handle == NULL && 1813 (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { 1814 if (object->ref_count == 0) { 1815 db_printf("vmochk: internal obj has zero ref count: %ld\n", 1816 (long)object->size); 1817 } 1818 if (!vm_object_in_map(object)) { 1819 db_printf( 1820 "vmochk: internal obj is not in a map: " 1821 "ref: %d, size: %lu: 0x%lx, backing_object: %p\n", 1822 object->ref_count, (u_long)object->size, 1823 (u_long)object->size, 1824 (void *)object->backing_object); 1825 } 1826 } 1827 } 1828 } 1829 1830 /* 1831 * vm_object_print: [ debug ] 1832 */ 1833 DB_SHOW_COMMAND(object, vm_object_print_static) 1834 { 1835 /* XXX convert args. */ 1836 vm_object_t object = (vm_object_t)addr; 1837 boolean_t full = have_addr; 1838 1839 vm_page_t p; 1840 1841 /* XXX count is an (unused) arg. Avoid shadowing it. */ 1842 #define count was_count 1843 1844 int count; 1845 1846 if (object == NULL) 1847 return; 1848 1849 db_iprintf( 1850 "Object %p: type=%d, size=0x%lx, res=%d, ref=%d, flags=0x%x\n", 1851 object, (int)object->type, (u_long)object->size, 1852 object->resident_page_count, object->ref_count, object->flags); 1853 /* 1854 * XXX no %qd in kernel. Truncate object->backing_object_offset. 1855 */ 1856 db_iprintf(" sref=%d, backing_object(%d)=(%p)+0x%lx\n", 1857 object->shadow_count, 1858 object->backing_object ? object->backing_object->ref_count : 0, 1859 object->backing_object, (long)object->backing_object_offset); 1860 1861 if (!full) 1862 return; 1863 1864 db_indent += 2; 1865 count = 0; 1866 for (p = TAILQ_FIRST(&object->memq); p != NULL; p = TAILQ_NEXT(p, listq)) { 1867 if (count == 0) 1868 db_iprintf("memory:="); 1869 else if (count == 6) { 1870 db_printf("\n"); 1871 db_iprintf(" ..."); 1872 count = 0; 1873 } else 1874 db_printf(","); 1875 count++; 1876 1877 db_printf("(off=0x%lx,page=0x%lx)", 1878 (u_long) p->pindex, (u_long) VM_PAGE_TO_PHYS(p)); 1879 } 1880 if (count != 0) 1881 db_printf("\n"); 1882 db_indent -= 2; 1883 } 1884 1885 /* XXX. */ 1886 #undef count 1887 1888 /* XXX need this non-static entry for calling from vm_map_print. */ 1889 void 1890 vm_object_print(addr, have_addr, count, modif) 1891 /* db_expr_t */ long addr; 1892 boolean_t have_addr; 1893 /* db_expr_t */ long count; 1894 char *modif; 1895 { 1896 vm_object_print_static(addr, have_addr, count, modif); 1897 } 1898 1899 DB_SHOW_COMMAND(vmopag, vm_object_print_pages) 1900 { 1901 vm_object_t object; 1902 int nl = 0; 1903 int c; 1904 for (object = TAILQ_FIRST(&vm_object_list); 1905 object != NULL; 1906 object = TAILQ_NEXT(object, object_list)) { 1907 vm_pindex_t idx, fidx; 1908 vm_pindex_t osize; 1909 vm_offset_t pa = -1, padiff; 1910 int rcount; 1911 vm_page_t m; 1912 1913 db_printf("new object: %p\n", (void *)object); 1914 if ( nl > 18) { 1915 c = cngetc(); 1916 if (c != ' ') 1917 return; 1918 nl = 0; 1919 } 1920 nl++; 1921 rcount = 0; 1922 fidx = 0; 1923 osize = object->size; 1924 if (osize > 128) 1925 osize = 128; 1926 for(idx=0;idx<osize;idx++) { 1927 m = vm_page_lookup(object, idx); 1928 if (m == NULL) { 1929 if (rcount) { 1930 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 1931 (long)fidx, rcount, (long)pa); 1932 if ( nl > 18) { 1933 c = cngetc(); 1934 if (c != ' ') 1935 return; 1936 nl = 0; 1937 } 1938 nl++; 1939 rcount = 0; 1940 } 1941 continue; 1942 } 1943 1944 1945 if (rcount && 1946 (VM_PAGE_TO_PHYS(m) == pa + rcount * PAGE_SIZE)) { 1947 ++rcount; 1948 continue; 1949 } 1950 if (rcount) { 1951 padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m); 1952 padiff >>= PAGE_SHIFT; 1953 padiff &= PQ_L2_MASK; 1954 if (padiff == 0) { 1955 pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE; 1956 ++rcount; 1957 continue; 1958 } 1959 db_printf(" index(%ld)run(%d)pa(0x%lx)", 1960 (long)fidx, rcount, (long)pa); 1961 db_printf("pd(%ld)\n", (long)padiff); 1962 if ( nl > 18) { 1963 c = cngetc(); 1964 if (c != ' ') 1965 return; 1966 nl = 0; 1967 } 1968 nl++; 1969 } 1970 fidx = idx; 1971 pa = VM_PAGE_TO_PHYS(m); 1972 rcount = 1; 1973 } 1974 if (rcount) { 1975 db_printf(" index(%ld)run(%d)pa(0x%lx)\n", 1976 (long)fidx, rcount, (long)pa); 1977 if ( nl > 18) { 1978 c = cngetc(); 1979 if (c != ' ') 1980 return; 1981 nl = 0; 1982 } 1983 nl++; 1984 } 1985 } 1986 } 1987 #endif /* DDB */ 1988