1 /* $NetBSD: uvm_pglist.c,v 1.25 2002/11/02 07:40:49 perry Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * uvm_pglist.c: pglist functions 42 */ 43 44 #include <sys/cdefs.h> 45 __KERNEL_RCSID(0, "$NetBSD: uvm_pglist.c,v 1.25 2002/11/02 07:40:49 perry Exp $"); 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/malloc.h> 50 #include <sys/proc.h> 51 52 #include <uvm/uvm.h> 53 54 #ifdef VM_PAGE_ALLOC_MEMORY_STATS 55 #define STAT_INCR(v) (v)++ 56 #define STAT_DECR(v) do { \ 57 if ((v) == 0) \ 58 printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \ 59 else \ 60 (v)--; \ 61 } while (/*CONSTCOND*/ 0) 62 u_long uvm_pglistalloc_npages; 63 #else 64 #define STAT_INCR(v) 65 #define STAT_DECR(v) 66 #endif 67 68 /* 69 * uvm_pglistalloc: allocate a list of pages 70 * 71 * => allocated pages are placed at the tail of rlist. rlist is 72 * assumed to be properly initialized by caller. 73 * => returns 0 on success or errno on failure 74 * => XXX: implementation allocates only a single segment, also 75 * might be able to better advantage of vm_physeg[]. 76 * => doesn't take into account clean non-busy pages on inactive list 77 * that could be used(?) 78 * => params: 79 * size the size of the allocation, rounded to page size. 80 * low the low address of the allowed allocation range. 81 * high the high address of the allowed allocation range. 82 * alignment memory must be aligned to this power-of-two boundary. 83 * boundary no segment in the allocation may cross this 84 * power-of-two boundary (relative to zero). 85 */ 86 87 static void uvm_pglist_add(struct vm_page *, struct pglist *); 88 static int uvm_pglistalloc_c_ps(struct vm_physseg *, int, paddr_t, paddr_t, 89 paddr_t, paddr_t, struct pglist *); 90 static int uvm_pglistalloc_contig(int, paddr_t, paddr_t, paddr_t, paddr_t, 91 struct pglist *); 92 static int uvm_pglistalloc_s_ps(struct vm_physseg *, int, paddr_t, paddr_t, 93 struct pglist *); 94 static int uvm_pglistalloc_simple(int, paddr_t, paddr_t, 95 struct pglist *, int); 96 97 static void 98 uvm_pglist_add(pg, rlist) 99 struct vm_page *pg; 100 struct pglist *rlist; 101 { 102 int free_list, color, pgflidx; 103 #ifdef DEBUG 104 struct vm_page *tp; 105 #endif 106 107 #if PGFL_NQUEUES != 2 108 #error uvm_pglistalloc needs to be updated 109 #endif 110 111 free_list = uvm_page_lookup_freelist(pg); 112 color = VM_PGCOLOR_BUCKET(pg); 113 pgflidx = (pg->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN; 114 #ifdef DEBUG 115 for (tp = TAILQ_FIRST(&uvm.page_free[ 116 free_list].pgfl_buckets[color].pgfl_queues[pgflidx]); 117 tp != NULL; 118 tp = TAILQ_NEXT(tp, pageq)) { 119 if (tp == pg) 120 break; 121 } 122 if (tp == NULL) 123 panic("uvm_pglistalloc: page not on freelist"); 124 #endif 125 TAILQ_REMOVE(&uvm.page_free[free_list].pgfl_buckets[ 126 color].pgfl_queues[pgflidx], pg, pageq); 127 uvmexp.free--; 128 if (pg->flags & PG_ZERO) 129 uvmexp.zeropages--; 130 pg->flags = PG_CLEAN; 131 pg->pqflags = 0; 132 pg->uobject = NULL; 133 pg->uanon = NULL; 134 TAILQ_INSERT_TAIL(rlist, pg, pageq); 135 STAT_INCR(uvm_pglistalloc_npages); 136 } 137 138 static int 139 uvm_pglistalloc_c_ps(ps, num, low, high, alignment, boundary, rlist) 140 struct vm_physseg *ps; 141 int num; 142 paddr_t low, high, alignment, boundary; 143 struct pglist *rlist; 144 { 145 int try, limit, tryidx, end, idx; 146 struct vm_page *pgs; 147 int pagemask; 148 #ifdef DEBUG 149 paddr_t idxpa, lastidxpa; 150 int cidx; 151 #endif 152 #ifdef PGALLOC_VERBOSE 153 printf("pgalloc: contig %d pgs from psi %d\n", num, ps - vm_physmem); 154 #endif 155 156 try = roundup(max(atop(low), ps->avail_start), atop(alignment)); 157 limit = min(atop(high), ps->avail_end); 158 pagemask = ~((boundary >> PAGE_SHIFT) - 1); 159 160 for (;;) { 161 if (try + num > limit) { 162 /* 163 * We've run past the allowable range. 164 */ 165 return (0); /* FAIL */ 166 } 167 if (boundary != 0 && 168 ((try ^ (try + num - 1)) & pagemask) != 0) { 169 /* 170 * Region crosses boundary. Jump to the boundary 171 * just crossed and ensure alignment. 172 */ 173 try = (try + num - 1) & pagemask; 174 try = roundup(try, atop(alignment)); 175 continue; 176 } 177 #ifdef DEBUG 178 /* 179 * Make sure this is a managed physical page. 180 */ 181 182 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 183 panic("pgalloc contig: botch1"); 184 if (cidx != try - ps->start) 185 panic("pgalloc contig: botch2"); 186 if (vm_physseg_find(try + num - 1, &cidx) != ps - vm_physmem) 187 panic("pgalloc contig: botch3"); 188 if (cidx != try - ps->start + num - 1) 189 panic("pgalloc contig: botch4"); 190 #endif 191 tryidx = try - ps->start; 192 end = tryidx + num; 193 pgs = ps->pgs; 194 195 /* 196 * Found a suitable starting page. See if the range is free. 197 */ 198 for (idx = tryidx; idx < end; idx++) { 199 if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) 200 break; 201 202 #ifdef DEBUG 203 idxpa = VM_PAGE_TO_PHYS(&pgs[idx]); 204 if (idx > tryidx) { 205 lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]); 206 if ((lastidxpa + PAGE_SIZE) != idxpa) { 207 /* 208 * Region not contiguous. 209 */ 210 panic("pgalloc contig: botch5"); 211 } 212 if (boundary != 0 && 213 ((lastidxpa ^ idxpa) & ~(boundary - 1)) 214 != 0) { 215 /* 216 * Region crosses boundary. 217 */ 218 panic("pgalloc contig: botch6"); 219 } 220 } 221 #endif 222 } 223 if (idx == end) 224 break; 225 226 try += atop(alignment); 227 } 228 229 /* 230 * we have a chunk of memory that conforms to the requested constraints. 231 */ 232 idx = tryidx; 233 while (idx < end) 234 uvm_pglist_add(&pgs[idx++], rlist); 235 236 #ifdef PGALLOC_VERBOSE 237 printf("got %d pgs\n", num); 238 #endif 239 return (num); /* number of pages allocated */ 240 } 241 242 static int 243 uvm_pglistalloc_contig(num, low, high, alignment, boundary, rlist) 244 int num; 245 paddr_t low, high, alignment, boundary; 246 struct pglist *rlist; 247 { 248 int fl, psi; 249 struct vm_physseg *ps; 250 int s, error; 251 252 /* Default to "lose". */ 253 error = ENOMEM; 254 255 /* 256 * Block all memory allocation and lock the free list. 257 */ 258 s = uvm_lock_fpageq(); 259 260 /* Are there even any free pages? */ 261 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 262 goto out; 263 264 for (fl = 0; fl < VM_NFREELIST; fl++) { 265 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 266 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 267 #else 268 for (psi = 0 ; psi < vm_nphysseg ; psi++) 269 #endif 270 { 271 ps = &vm_physmem[psi]; 272 273 if (ps->free_list != fl) 274 continue; 275 276 num -= uvm_pglistalloc_c_ps(ps, num, low, high, 277 alignment, boundary, rlist); 278 if (num == 0) { 279 #ifdef PGALLOC_VERBOSE 280 printf("pgalloc: %lx-%lx\n", 281 TAILQ_FIRST(rlist)->phys_addr, 282 TAILQ_LAST(rlist, pglist)->phys_addr); 283 #endif 284 error = 0; 285 goto out; 286 } 287 } 288 } 289 290 out: 291 /* 292 * check to see if we need to generate some free pages waking 293 * the pagedaemon. 294 */ 295 296 UVM_KICK_PDAEMON(); 297 uvm_unlock_fpageq(s); 298 return (error); 299 } 300 301 static int 302 uvm_pglistalloc_s_ps(ps, num, low, high, rlist) 303 struct vm_physseg *ps; 304 int num; 305 paddr_t low, high; 306 struct pglist *rlist; 307 { 308 int todo, limit, try; 309 struct vm_page *pg; 310 #ifdef DEBUG 311 int cidx; 312 #endif 313 #ifdef PGALLOC_VERBOSE 314 printf("pgalloc: simple %d pgs from psi %d\n", num, ps - vm_physmem); 315 #endif 316 317 todo = num; 318 limit = min(atop(high), ps->avail_end); 319 320 for (try = max(atop(low), ps->avail_start); 321 try < limit; try ++) { 322 #ifdef DEBUG 323 if (vm_physseg_find(try, &cidx) != ps - vm_physmem) 324 panic("pgalloc simple: botch1"); 325 if (cidx != (try - ps->start)) 326 panic("pgalloc simple: botch2"); 327 #endif 328 pg = &ps->pgs[try - ps->start]; 329 if (VM_PAGE_IS_FREE(pg) == 0) 330 continue; 331 332 uvm_pglist_add(pg, rlist); 333 if (--todo == 0) 334 break; 335 } 336 337 #ifdef PGALLOC_VERBOSE 338 printf("got %d pgs\n", num - todo); 339 #endif 340 return (num - todo); /* number of pages allocated */ 341 } 342 343 static int 344 uvm_pglistalloc_simple(num, low, high, rlist, waitok) 345 int num; 346 paddr_t low, high; 347 struct pglist *rlist; 348 int waitok; 349 { 350 int fl, psi, s, error; 351 struct vm_physseg *ps; 352 353 /* Default to "lose". */ 354 error = ENOMEM; 355 356 again: 357 /* 358 * Block all memory allocation and lock the free list. 359 */ 360 s = uvm_lock_fpageq(); 361 362 /* Are there even any free pages? */ 363 if (uvmexp.free <= (uvmexp.reserve_pagedaemon + uvmexp.reserve_kernel)) 364 goto out; 365 366 for (fl = 0; fl < VM_NFREELIST; fl++) { 367 #if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST) 368 for (psi = vm_nphysseg - 1 ; psi >= 0 ; psi--) 369 #else 370 for (psi = 0 ; psi < vm_nphysseg ; psi++) 371 #endif 372 { 373 ps = &vm_physmem[psi]; 374 375 if (ps->free_list != fl) 376 continue; 377 378 num -= uvm_pglistalloc_s_ps(ps, num, low, high, rlist); 379 if (num == 0) { 380 error = 0; 381 goto out; 382 } 383 } 384 385 } 386 387 out: 388 /* 389 * check to see if we need to generate some free pages waking 390 * the pagedaemon. 391 */ 392 393 UVM_KICK_PDAEMON(); 394 uvm_unlock_fpageq(s); 395 if (error) { 396 if (waitok) { 397 /* XXX perhaps some time limitation? */ 398 #ifdef DEBUG 399 printf("pglistalloc waiting\n"); 400 #endif 401 uvm_wait("pglalloc"); 402 goto again; 403 } else 404 uvm_pglistfree(rlist); 405 } 406 #ifdef PGALLOC_VERBOSE 407 if (!error) 408 printf("pgalloc: %lx..%lx\n", 409 TAILQ_FIRST(rlist)->phys_addr, 410 TAILQ_LAST(rlist, pglist)->phys_addr); 411 #endif 412 return (error); 413 } 414 415 int 416 uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) 417 psize_t size; 418 paddr_t low, high, alignment, boundary; 419 struct pglist *rlist; 420 int nsegs, waitok; 421 { 422 int num, res; 423 424 KASSERT((alignment & (alignment - 1)) == 0); 425 KASSERT((boundary & (boundary - 1)) == 0); 426 427 /* 428 * Our allocations are always page granularity, so our alignment 429 * must be, too. 430 */ 431 if (alignment < PAGE_SIZE) 432 alignment = PAGE_SIZE; 433 if (boundary != 0 && boundary < size) 434 return (EINVAL); 435 num = atop(round_page(size)); 436 low = roundup(low, alignment); 437 438 TAILQ_INIT(rlist); 439 440 if ((nsegs < size >> PAGE_SHIFT) || (alignment != PAGE_SIZE) || 441 (boundary != 0)) 442 res = uvm_pglistalloc_contig(num, low, high, alignment, 443 boundary, rlist); 444 else 445 res = uvm_pglistalloc_simple(num, low, high, rlist, waitok); 446 447 return (res); 448 } 449 450 /* 451 * uvm_pglistfree: free a list of pages 452 * 453 * => pages should already be unmapped 454 */ 455 456 void 457 uvm_pglistfree(list) 458 struct pglist *list; 459 { 460 struct vm_page *pg; 461 int s; 462 463 /* 464 * Lock the free list and free each page. 465 */ 466 467 s = uvm_lock_fpageq(); 468 while ((pg = TAILQ_FIRST(list)) != NULL) { 469 KASSERT((pg->pqflags & (PQ_ACTIVE|PQ_INACTIVE)) == 0); 470 TAILQ_REMOVE(list, pg, pageq); 471 pg->pqflags = PQ_FREE; 472 TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)]. 473 pgfl_buckets[VM_PGCOLOR_BUCKET(pg)]. 474 pgfl_queues[PGFL_UNKNOWN], pg, pageq); 475 uvmexp.free++; 476 if (uvmexp.zeropages < UVM_PAGEZERO_TARGET) 477 uvm.page_idle_zero = vm_page_zero_enable; 478 STAT_DECR(uvm_pglistalloc_npages); 479 } 480 uvm_unlock_fpageq(s); 481 } 482