xref: /illumos-gate/usr/src/uts/sun4v/os/memseg.c (revision 4c06356b)
1 /*
2  *
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/cmn_err.h>
29 #include <sys/vm.h>
30 #include <sys/mman.h>
31 #include <vm/vm_dep.h>
32 #include <vm/seg_kmem.h>
33 #include <vm/seg_kpm.h>
34 #include <sys/mem_config.h>
35 #include <sys/sysmacros.h>
36 
37 extern pgcnt_t pp_dummy_npages;
38 extern pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
39 
40 extern kmutex_t memseg_lists_lock;
41 extern struct memseg *memseg_va_avail;
42 extern struct memseg *memseg_alloc();
43 
44 extern page_t *ppvm_base;
45 extern pgcnt_t ppvm_size;
46 
47 static vnode_t pp_vn, rsv_vn;
48 static pgcnt_t rsv_metapgs;
49 static int meta_rsv_enable;
50 static int sun4v_memseg_debug;
51 
52 extern struct memseg *memseg_reuse(pgcnt_t);
53 extern void remap_to_dummy(caddr_t, pgcnt_t);
54 
55 /*
56  * The page_t memory for incoming pages is allocated from existing memory
57  * which can create a potential situation where memory addition fails
58  * because of shortage of existing memory.  To mitigate this situation
59  * some memory is always reserved ahead of time for page_t allocation.
60  * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
61  * page_t allocation.  The added 256MB added memory could theoretically
62  * allow an addition of 16GB.
63  */
64 #define	RSV_SIZE	0x40000000	/* add size with rsrvd page_t's 1G */
65 
66 #ifdef	DEBUG
67 #define	MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
68 #else
69 #define	MEMSEG_DEBUG(...)
70 #endif
71 
72 /*
73  * The page_t's for the incoming memory are allocated from
74  * existing pages.
75  */
76 /*ARGSUSED*/
77 int
78 memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
79 {
80 	page_t		*pp, *opp, *epp, *pgpp;
81 	pgcnt_t		metapgs;
82 	int		i, rsv;
83 	struct seg	kseg;
84 	caddr_t		vaddr;
85 	u_offset_t	off;
86 
87 	/*
88 	 * Verify incoming memory is within supported DR range.
89 	 */
90 	if ((base + npgs) * sizeof (page_t) > ppvm_size)
91 		return (KPHYSM_ENOTSUP);
92 
93 	opp = pp = ppvm_base + base;
94 	epp = pp + npgs;
95 	metapgs = btopr(npgs * sizeof (page_t));
96 
97 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
98 	    page_find(&pp_vn, (u_offset_t)pp)) {
99 		/*
100 		 * Another memseg has page_t's in the same
101 		 * page which 'pp' resides.  This would happen
102 		 * if PAGESIZE is not an integral multiple of
103 		 * sizeof (page_t) and therefore 'pp'
104 		 * does not start on a page boundry.
105 		 *
106 		 * Since the other memseg's pages_t's still
107 		 * map valid pages, skip allocation of this page.
108 		 * Advance 'pp' to the next page which should
109 		 * belong only to the incoming memseg.
110 		 *
111 		 * If the last page_t in the current page
112 		 * crosses a page boundary, this should still
113 		 * work.  The first part of the page_t is
114 		 * already allocated.  The second part of
115 		 * the page_t will be allocated below.
116 		 */
117 		ASSERT(PAGESIZE % sizeof (page_t));
118 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
119 		metapgs--;
120 	}
121 
122 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
123 	    page_find(&pp_vn, (u_offset_t)epp)) {
124 		/*
125 		 * Another memseg has page_t's in the same
126 		 * page which 'epp' resides.  This would happen
127 		 * if PAGESIZE is not an integral multiple of
128 		 * sizeof (page_t) and therefore 'epp'
129 		 * does not start on a page boundry.
130 		 *
131 		 * Since the other memseg's pages_t's still
132 		 * map valid pages, skip allocation of this page.
133 		 */
134 		ASSERT(PAGESIZE % sizeof (page_t));
135 		metapgs--;
136 	}
137 
138 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
139 
140 	/*
141 	 * Back metadata space with physical pages.
142 	 */
143 	kseg.s_as = &kas;
144 	vaddr = (caddr_t)pp;
145 
146 	for (i = 0; i < metapgs; i++)
147 		if (page_find(&pp_vn, (u_offset_t)(vaddr + i * PAGESIZE)))
148 			panic("page_find(0x%p, %p)\n",
149 			    (void *)&pp_vn, (void *)(vaddr + i * PAGESIZE));
150 
151 	/*
152 	 * Allocate the metadata pages; these are the pages that will
153 	 * contain the page_t's for the incoming memory.
154 	 *
155 	 * If a normal allocation fails, use the reserved metapgs for
156 	 * a small allocation; otherwise retry with PG_WAIT.
157 	 */
158 	rsv = off = 0;
159 	if (metapgs <= rsv_metapgs) {
160 		MEMSEG_DEBUG("memseg_get: use rsv 0x%lx metapgs", metapgs);
161 		ASSERT(meta_rsv_enable);
162 		rsv = 1;
163 	} else if ((pgpp = page_create_va(&pp_vn, (u_offset_t)pp, ptob(metapgs),
164 	    PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
165 		cmn_err(CE_WARN, "memseg_get: can't get 0x%ld metapgs",
166 		    metapgs);
167 		return (KPHYSM_ERESOURCE);
168 	}
169 	if (rsv) {
170 		/*
171 		 * The reseve pages must be hashed out of the reserve vnode
172 		 * and rehashed by <pp_vn,vaddr>.  The resreved pages also
173 		 * must be replenished immedidately at the end of the add
174 		 * processing.
175 		 */
176 		for (i = 0; i < metapgs; i++) {
177 			pgpp = page_find(&rsv_vn, off);
178 			ASSERT(pgpp);
179 			page_hashout(pgpp, 0);
180 			hat_devload(kas.a_hat, vaddr, PAGESIZE,
181 			    page_pptonum(pgpp), PROT_READ | PROT_WRITE,
182 			    HAT_LOAD | HAT_LOAD_REMAP | HAT_LOAD_NOCONSIST);
183 			ASSERT(!page_find(&pp_vn, (u_offset_t)vaddr));
184 			if (!page_hashin(pgpp, &pp_vn, (u_offset_t)vaddr, 0))
185 				panic("memseg_get: page_hashin(0x%p, 0x%p)",
186 				    (void *)pgpp, (void *)vaddr);
187 			off += PAGESIZE;
188 			vaddr += PAGESIZE;
189 			rsv_metapgs--;
190 		}
191 	} else {
192 		for (i = 0; i < metapgs; i++) {
193 			hat_devload(kas.a_hat, vaddr, PAGESIZE,
194 			    page_pptonum(pgpp), PROT_READ | PROT_WRITE,
195 			    HAT_LOAD | HAT_LOAD_REMAP | HAT_LOAD_NOCONSIST);
196 			pgpp = pgpp->p_next;
197 			vaddr += PAGESIZE;
198 		}
199 	}
200 
201 	ASSERT(ptp);
202 	ASSERT(metap);
203 
204 	*ptp = (void *)opp;
205 	*metap = metapgs;
206 
207 	return (KPHYSM_OK);
208 }
209 
210 void
211 memseg_free_meta(void *ptp, pgcnt_t metapgs)
212 {
213 	int i;
214 	page_t *pp;
215 	u_offset_t off;
216 
217 	if (!metapgs)
218 		return;
219 
220 	off = (u_offset_t)ptp;
221 
222 	ASSERT(off);
223 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
224 
225 	MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
226 	    (uint64_t)off, metapgs);
227 	/*
228 	 * Free pages allocated during add.
229 	 */
230 	for (i = 0; i < metapgs; i++) {
231 		pp = page_find(&pp_vn, off);
232 		ASSERT(pp);
233 		ASSERT(pp->p_szc == 0);
234 		page_io_unlock(pp);
235 		page_destroy(pp, 0);
236 		off += PAGESIZE;
237 	}
238 }
239 
240 pfn_t
241 memseg_get_metapfn(void *ptp, pgcnt_t metapg)
242 {
243 	page_t *pp;
244 	u_offset_t off;
245 
246 	off = (u_offset_t)ptp + ptob(metapg);
247 
248 	ASSERT(off);
249 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
250 
251 	pp = page_find(&pp_vn, off);
252 	ASSERT(pp);
253 	ASSERT(pp->p_szc == 0);
254 	ASSERT(pp->p_pagenum != PFN_INVALID);
255 
256 	return (pp->p_pagenum);
257 }
258 
259 /*
260  * Remap a memseg's page_t's to dummy pages.  Skip the low/high
261  * ends of the range if they are already in use.
262  */
263 void
264 memseg_remap_meta(struct memseg *seg)
265 {
266 	int i;
267 	u_offset_t off;
268 	page_t *pp;
269 #if 0
270 	page_t *epp;
271 #endif
272 	pgcnt_t metapgs;
273 
274 	metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
275 	ASSERT(metapgs);
276 	pp = seg->pages;
277 	seg->pages_end = seg->pages_base;
278 #if 0
279 	epp = seg->epages;
280 
281 	/*
282 	 * This code cannot be tested as the kernel does not compile
283 	 * when page_t size is changed.  It is left here as a starting
284 	 * point if the unaligned page_t size needs to be supported.
285 	 */
286 
287 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
288 	    page_find(&pp_vn, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
289 		/*
290 		 * Another memseg has page_t's in the same
291 		 * page which 'pp' resides.  This would happen
292 		 * if PAGESIZE is not an integral multiple of
293 		 * sizeof (page_t) and therefore 'seg->pages'
294 		 * does not start on a page boundry.
295 		 *
296 		 * Since the other memseg's pages_t's still
297 		 * map valid pages, skip remap of this page.
298 		 * Advance 'pp' to the next page which should
299 		 * belong only to the outgoing memseg.
300 		 *
301 		 * If the last page_t in the current page
302 		 * crosses a page boundary, this should still
303 		 * work.  The first part of the page_t is
304 		 * valid since memseg_lock_delete_all() has
305 		 * been called.  The second part of the page_t
306 		 * will be remapped to the corresponding
307 		 * dummy page below.
308 		 */
309 		ASSERT(PAGESIZE % sizeof (page_t));
310 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
311 		metapgs--;
312 	}
313 
314 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
315 	    page_find(&pp_vn, (u_offset_t)epp) && !page_deleted(epp)) {
316 		/*
317 		 * Another memseg has page_t's in the same
318 		 * page which 'epp' resides.  This would happen
319 		 * if PAGESIZE is not an integral multiple of
320 		 * sizeof (page_t) and therefore 'seg->epages'
321 		 * does not start on a page boundry.
322 		 *
323 		 * Since the other memseg's pages_t's still
324 		 * map valid pages, skip remap of this page.
325 		 */
326 		ASSERT(PAGESIZE % sizeof (page_t));
327 		metapgs--;
328 	}
329 #endif
330 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
331 
332 	remap_to_dummy((caddr_t)pp, metapgs);
333 
334 	off = (u_offset_t)pp;
335 
336 	MEMSEG_DEBUG("memseg_remap: off=0x%lx metapgs=0x%lx\n", (uint64_t)off,
337 	    metapgs);
338 	/*
339 	 * Free pages allocated during add.
340 	 */
341 	for (i = 0; i < metapgs; i++) {
342 		pp = page_find(&pp_vn, off);
343 		ASSERT(pp);
344 		ASSERT(pp->p_szc == 0);
345 		page_io_unlock(pp);
346 		page_destroy(pp, 0);
347 		off += PAGESIZE;
348 	}
349 }
350 
351 static void
352 rsv_alloc()
353 {
354 	int i;
355 	page_t *pp;
356 	pgcnt_t metapgs;
357 	u_offset_t off;
358 	struct seg kseg;
359 
360 	kseg.s_as = &kas;
361 
362 	/*
363 	 * Reserve enough page_t pages for an add request of
364 	 * RSV_SIZE bytes.
365 	 */
366 	metapgs = btopr(btop(RSV_SIZE) * sizeof (page_t)) - rsv_metapgs;
367 
368 	for (i = off = 0; i < metapgs; i++, off += PAGESIZE) {
369 		(void) page_create_va(&rsv_vn, off, PAGESIZE,
370 		    PG_NORELOC | PG_WAIT, &kseg, 0);
371 		pp = page_find(&rsv_vn, off);
372 		ASSERT(pp);
373 		ASSERT(PAGE_EXCL(pp));
374 		page_iolock_init(pp);
375 		rsv_metapgs++;
376 	}
377 }
378 
379 void
380 i_dr_mem_init(size_t *hint)
381 {
382 	if (meta_rsv_enable) {
383 		rsv_alloc();
384 		if (hint)
385 			*hint = RSV_SIZE;
386 	}
387 }
388 
389 void
390 i_dr_mem_fini()
391 {
392 	int i;
393 	page_t *pp;
394 	u_offset_t off;
395 
396 	for (i = off = 0; i < rsv_metapgs; i++, off += PAGESIZE) {
397 		if (pp = page_find(&rsv_vn, off)) {
398 			ASSERT(PAGE_EXCL(pp));
399 			page_destroy(pp, 0);
400 		}
401 		ASSERT(!page_find(&rsv_vn, off));
402 	}
403 	rsv_metapgs = 0;
404 }
405 
406 void
407 i_dr_mem_update()
408 {
409 	rsv_alloc();
410 }
411