xref: /illumos-gate/usr/src/uts/sun4v/os/ppage.c (revision 09295472)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machsystm.h>
32 #include <sys/t_lock.h>
33 #include <sys/vmem.h>
34 #include <sys/mman.h>
35 #include <sys/vm.h>
36 #include <sys/cpu.h>
37 #include <sys/cmn_err.h>
38 #include <sys/cpuvar.h>
39 #include <sys/atomic.h>
40 #include <vm/as.h>
41 #include <vm/hat.h>
42 #include <vm/as.h>
43 #include <vm/page.h>
44 #include <vm/seg.h>
45 #include <vm/seg_kmem.h>
46 #include <vm/hat_sfmmu.h>
47 #include <sys/debug.h>
48 #include <sys/cpu_module.h>
49 
50 /*
51  * A quick way to generate a cache consistent address to map in a page.
52  * users: ppcopy, pagezero, /proc, dev/mem
53  *
54  * The ppmapin/ppmapout routines provide a quick way of generating a cache
55  * consistent address by reserving a given amount of kernel address space.
56  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
57  * into x number of sets, where x is the number of colors for the virtual
58  * cache. The number of colors is how many times a page can be mapped
59  * simulatenously in the cache.  For direct map caches this translates to
60  * the number of pages in the cache.
61  * Each set will be assigned a group of virtual pages from the reserved memory
62  * depending on its virtual color.
63  * When trying to assign a virtual address we will find out the color for the
64  * physical page in question (if applicable).  Then we will try to find an
65  * available virtual page from the set of the appropiate color.
66  */
67 
68 #define	clsettoarray(color, set) ((color * nsets) + set)
69 
70 int pp_slots = 4;		/* small default, tuned by cpu module */
71 
72 /* tuned by cpu module, default is "safe" */
73 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
74 
75 static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
76 static int	nsets;			/* number of sets */
77 static int	ppmap_pages;		/* generate align mask */
78 static int	ppmap_shift;		/* set selector */
79 
80 #ifdef PPDEBUG
81 #define		MAXCOLORS	16	/* for debug only */
82 static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
83 static int	align_hits[MAXCOLORS];
84 static int	pp_allocs;		/* # of ppmapin requests */
85 #endif /* PPDEBUG */
86 
87 /*
88  * There are only 64 TLB entries on spitfire, 16 on cheetah
89  * (fully-associative TLB) so we allow the cpu module to tune the
90  * number to use here via pp_slots.
91  */
92 static struct ppmap_va {
93 	caddr_t	ppmap_slots[MAXPP_SLOTS];
94 } ppmap_va[NCPU];
95 
96 void
97 ppmapinit(void)
98 {
99 	int color, nset, setsize;
100 	caddr_t va;
101 
102 	ASSERT(pp_slots <= MAXPP_SLOTS);
103 
104 	va = (caddr_t)PPMAPBASE;
105 	if (cache & CACHE_VAC) {
106 		int a;
107 
108 		ppmap_pages = mmu_btop(shm_alignment);
109 		nsets = PPMAPSIZE / shm_alignment;
110 		setsize = shm_alignment;
111 		ppmap_shift = MMU_PAGESHIFT;
112 		a = ppmap_pages;
113 		while (a >>= 1)
114 			ppmap_shift++;
115 	} else {
116 		/*
117 		 * If we do not have a virtual indexed cache we simply
118 		 * have only one set containing all pages.
119 		 */
120 		ppmap_pages = 1;
121 		nsets = mmu_btop(PPMAPSIZE);
122 		setsize = MMU_PAGESIZE;
123 		ppmap_shift = MMU_PAGESHIFT;
124 	}
125 	for (color = 0; color < ppmap_pages; color++) {
126 		for (nset = 0; nset < nsets; nset++) {
127 			ppmap_vaddrs[clsettoarray(color, nset)] =
128 			    (caddr_t)((uintptr_t)va + (nset * setsize));
129 		}
130 		va += MMU_PAGESIZE;
131 	}
132 }
133 
134 /*
135  * Allocate a cache consistent virtual address to map a page, pp,
136  * with protection, vprot; and map it in the MMU, using the most
137  * efficient means possible.  The argument avoid is a virtual address
138  * hint which when masked yields an offset into a virtual cache
139  * that should be avoided when allocating an address to map in a
140  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
141  *
142  * machine dependent, depends on virtual address space layout,
143  * understands that all kernel addresses have bit 31 set.
144  *
145  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
146  * that found in other architectures.  In other architectures the hint
147  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
148  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
149  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
150  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
151  * to worry about virtual cache trashing.  Actually, by using the hint to choose
152  * the right color we can almost guarantee a cache conflict will not occur.
153  */
154 
155 caddr_t
156 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
157 {
158 	int color, nset, index, start;
159 	caddr_t va;
160 
161 #ifdef PPDEBUG
162 	pp_allocs++;
163 #endif /* PPDEBUG */
164 	if (cache & CACHE_VAC) {
165 		color = sfmmu_get_ppvcolor(pp);
166 		if (color == -1) {
167 			if ((intptr_t)hint != -1L) {
168 				color = addr_to_vcolor(hint);
169 			} else {
170 				color = addr_to_vcolor(mmu_ptob(pp->p_pagenum));
171 			}
172 		}
173 
174 	} else {
175 		/*
176 		 * For physical caches, we can pick any address we want.
177 		 */
178 		color = 0;
179 	}
180 
181 	start = color;
182 	do {
183 		for (nset = 0; nset < nsets; nset++) {
184 			index = clsettoarray(color, nset);
185 			va = ppmap_vaddrs[index];
186 			if (va != NULL) {
187 #ifdef PPDEBUG
188 				align_hits[color]++;
189 #endif /* PPDEBUG */
190 				if (casptr(&ppmap_vaddrs[index],
191 				    va, NULL) == va) {
192 					hat_memload(kas.a_hat, va, pp,
193 						vprot | HAT_NOSYNC,
194 						HAT_LOAD_LOCK);
195 					return (va);
196 				}
197 			}
198 		}
199 		/*
200 		 * first pick didn't succeed, try another
201 		 */
202 		if (++color == ppmap_pages)
203 			color = 0;
204 	} while (color != start);
205 
206 #ifdef PPDEBUG
207 	ppalloc_noslot++;
208 #endif /* PPDEBUG */
209 
210 	/*
211 	 * No free slots; get a random one from the kernel heap area.
212 	 */
213 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
214 
215 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
216 
217 	return (va);
218 
219 }
220 
221 void
222 ppmapout(caddr_t va)
223 {
224 	int color, nset, index;
225 
226 	if (va >= kernelheap && va < ekernelheap) {
227 		/*
228 		 * Space came from kernelmap, flush the page and
229 		 * return the space.
230 		 */
231 		hat_unload(kas.a_hat, va, PAGESIZE,
232 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
233 		vmem_free(heap_arena, va, PAGESIZE);
234 	} else {
235 		/*
236 		 * Space came from ppmap_vaddrs[], give it back.
237 		 */
238 		color = addr_to_vcolor(va);
239 		ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1);
240 
241 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
242 		index = clsettoarray(color, nset);
243 		hat_unload(kas.a_hat, va, PAGESIZE,
244 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
245 
246 		ASSERT(ppmap_vaddrs[index] == NULL);
247 		ppmap_vaddrs[index] = va;
248 	}
249 }
250 
251 #ifdef DEBUG
252 #define	PP_STAT_ADD(stat)	(stat)++
253 uint_t pload, ploadfail;
254 uint_t ppzero, ppzero_short;
255 #else
256 #define	PP_STAT_ADD(stat)
257 #endif /* DEBUG */
258 
259 static void
260 pp_unload_tlb(caddr_t *pslot, caddr_t va)
261 {
262 	ASSERT(*pslot == va);
263 
264 	vtag_flushpage(va, (uint64_t)ksfmmup);
265 	*pslot = NULL;				/* release the slot */
266 }
267 
268 /*
269  * Routine to copy kernel pages during relocation.  It will copy one
270  * PAGESIZE page to another PAGESIZE page.  This function may be called
271  * above LOCK_LEVEL so it should not grab any locks.
272  */
273 void
274 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
275 {
276 	uint64_t fm_pa, to_pa;
277 	size_t nbytes;
278 
279 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
280 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
281 
282 	nbytes = MMU_PAGESIZE;
283 
284 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
285 		hw_pa_bcopy32(fm_pa, to_pa);
286 }
287 
288 /*
289  * Copy the data from the physical page represented by "frompp" to
290  * that represented by "topp".
291  *
292  * Try to use per cpu mapping first, if that fails then call pp_mapin
293  * to load it.
294  */
295 void
296 ppcopy(page_t *fm_pp, page_t *to_pp)
297 {
298 	caddr_t fm_va;
299 	caddr_t to_va;
300 	boolean_t fast;
301 
302 	ASSERT(PAGE_LOCKED(fm_pp));
303 	ASSERT(PAGE_LOCKED(to_pp));
304 
305 	/*
306 	 * Try to map using KPM.  If it fails, fall back to
307 	 * ppmapin/ppmapout.
308 	 */
309 	if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
310 	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
311 		if (fm_va != NULL)
312 			hat_kpm_mapout(fm_pp, NULL, fm_va);
313 		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
314 		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
315 		fast = B_FALSE;
316 	} else
317 		fast = B_TRUE;
318 
319 	bcopy(fm_va, to_va, PAGESIZE);
320 
321 	/* Unmap */
322 	if (fast) {
323 		hat_kpm_mapout(fm_pp, NULL, fm_va);
324 		hat_kpm_mapout(to_pp, NULL, to_va);
325 	} else {
326 		ppmapout(fm_va);
327 		ppmapout(to_va);
328 	}
329 }
330 
331 /*
332  * Zero the physical page from off to off + len given by `pp'
333  * without changing the reference and modified bits of page.
334  *
335  * Again, we'll try per cpu mapping first.
336  */
337 
338 void
339 pagezero(page_t *pp, uint_t off, uint_t len)
340 {
341 	caddr_t va;
342 	extern int hwblkclr(void *, size_t);
343 	extern int use_hw_bzero;
344 	boolean_t fast;
345 
346 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
347 	ASSERT(PAGE_LOCKED(pp));
348 
349 	PP_STAT_ADD(ppzero);
350 
351 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
352 		PP_STAT_ADD(ppzero_short);
353 	}
354 
355 	kpreempt_disable();
356 
357 	/*
358 	 * Try to use KPM.  If that fails, fall back to
359 	 * ppmapin/ppmapout.
360 	 */
361 	fast = B_TRUE;
362 	va = hat_kpm_mapin(pp, NULL);
363 	if (va == NULL) {
364 		fast = B_FALSE;
365 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
366 	}
367 
368 	if (!use_hw_bzero) {
369 		bzero(va + off, len);
370 		sync_icache(va + off, len);
371 	} else if (hwblkclr(va + off, len)) {
372 		/*
373 		 * We may not have used block commit asi.
374 		 * So flush the I-$ manually
375 		 */
376 		sync_icache(va + off, len);
377 	} else {
378 		/*
379 		 * We have used blk commit, and flushed the I-$.
380 		 * However we still may have an instruction in the
381 		 * pipeline. Only a flush will invalidate that.
382 		 */
383 		doflush(va);
384 	}
385 
386 	if (fast) {
387 		hat_kpm_mapout(pp, NULL, va);
388 	} else {
389 		ppmapout(va);
390 	}
391 	kpreempt_enable();
392 }
393