xref: /illumos-gate/usr/src/uts/sun4v/os/ppage.c (revision dd4eeefd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/archsystm.h>
31 #include <sys/machsystm.h>
32 #include <sys/t_lock.h>
33 #include <sys/vmem.h>
34 #include <sys/mman.h>
35 #include <sys/vm.h>
36 #include <sys/cpu.h>
37 #include <sys/cmn_err.h>
38 #include <sys/cpuvar.h>
39 #include <sys/atomic.h>
40 #include <vm/as.h>
41 #include <vm/hat.h>
42 #include <vm/as.h>
43 #include <vm/page.h>
44 #include <vm/seg.h>
45 #include <vm/seg_kmem.h>
46 #include <vm/hat_sfmmu.h>
47 #include <sys/debug.h>
48 #include <sys/cpu_module.h>
49 
50 /*
51  * A quick way to generate a cache consistent address to map in a page.
52  * users: ppcopy, pagezero, /proc, dev/mem
53  *
54  * The ppmapin/ppmapout routines provide a quick way of generating a cache
55  * consistent address by reserving a given amount of kernel address space.
56  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
57  * into x number of sets, where x is the number of colors for the virtual
58  * cache. The number of colors is how many times a page can be mapped
59  * simulatenously in the cache.  For direct map caches this translates to
60  * the number of pages in the cache.
61  * Each set will be assigned a group of virtual pages from the reserved memory
62  * depending on its virtual color.
63  * When trying to assign a virtual address we will find out the color for the
64  * physical page in question (if applicable).  Then we will try to find an
65  * available virtual page from the set of the appropiate color.
66  */
67 
68 int pp_slots = 4;		/* small default, tuned by cpu module */
69 
70 /* tuned by cpu module, default is "safe" */
71 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
72 
73 static caddr_t	ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
74 static int	nsets;			/* number of sets */
75 static int	ppmap_shift;		/* set selector */
76 
77 #ifdef PPDEBUG
78 #define		MAXCOLORS	16	/* for debug only */
79 static int	ppalloc_noslot = 0;	/* # of allocations from kernelmap */
80 static int	align_hits;
81 static int	pp_allocs;		/* # of ppmapin requests */
82 #endif /* PPDEBUG */
83 
84 /*
85  * There are only 64 TLB entries on spitfire, 16 on cheetah
86  * (fully-associative TLB) so we allow the cpu module to tune the
87  * number to use here via pp_slots.
88  */
89 static struct ppmap_va {
90 	caddr_t	ppmap_slots[MAXPP_SLOTS];
91 } ppmap_va[NCPU];
92 
93 /* prevent compilation with VAC defined */
94 #ifdef VAC
95 #error "sun4v ppmapin and ppmapout do not support VAC"
96 #endif
97 
98 void
99 ppmapinit(void)
100 {
101 	int nset;
102 	caddr_t va;
103 
104 	ASSERT(pp_slots <= MAXPP_SLOTS);
105 
106 	va = (caddr_t)PPMAPBASE;
107 
108 	/*
109 	 * sun4v does not have a virtual indexed cache and simply
110 	 * has only one set containing all pages.
111 	 */
112 	nsets = mmu_btop(PPMAPSIZE);
113 	ppmap_shift = MMU_PAGESHIFT;
114 
115 	for (nset = 0; nset < nsets; nset++) {
116 		ppmap_vaddrs[nset] =
117 		    (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
118 	}
119 }
120 
121 /*
122  * Allocate a cache consistent virtual address to map a page, pp,
123  * with protection, vprot; and map it in the MMU, using the most
124  * efficient means possible.  The argument avoid is a virtual address
125  * hint which when masked yields an offset into a virtual cache
126  * that should be avoided when allocating an address to map in a
127  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
128  *
129  * machine dependent, depends on virtual address space layout,
130  * understands that all kernel addresses have bit 31 set.
131  *
132  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
133  * that found in other architectures.  In other architectures the hint
134  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
135  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
136  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
137  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
138  * to worry about virtual cache trashing.  Actually, by using the hint to choose
139  * the right color we can almost guarantee a cache conflict will not occur.
140  */
141 
142 /*ARGSUSED2*/
143 caddr_t
144 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
145 {
146 	int nset;
147 	caddr_t va;
148 
149 #ifdef PPDEBUG
150 	pp_allocs++;
151 #endif /* PPDEBUG */
152 
153 	/*
154 	 * For sun4v caches are physical caches, we can pick any address
155 	 * we want.
156 	 */
157 	for (nset = 0; nset < nsets; nset++) {
158 		va = ppmap_vaddrs[nset];
159 		if (va != NULL) {
160 #ifdef PPDEBUG
161 			align_hits++;
162 #endif /* PPDEBUG */
163 			if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
164 				hat_memload(kas.a_hat, va, pp,
165 					vprot | HAT_NOSYNC,
166 					HAT_LOAD_LOCK);
167 				return (va);
168 			}
169 		}
170 	}
171 
172 #ifdef PPDEBUG
173 	ppalloc_noslot++;
174 #endif /* PPDEBUG */
175 
176 	/*
177 	 * No free slots; get a random one from the kernel heap area.
178 	 */
179 	va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
180 
181 	hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
182 
183 	return (va);
184 
185 }
186 
187 void
188 ppmapout(caddr_t va)
189 {
190 	int nset;
191 
192 	if (va >= kernelheap && va < ekernelheap) {
193 		/*
194 		 * Space came from kernelmap, flush the page and
195 		 * return the space.
196 		 */
197 		hat_unload(kas.a_hat, va, PAGESIZE,
198 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
199 		vmem_free(heap_arena, va, PAGESIZE);
200 	} else {
201 		/*
202 		 * Space came from ppmap_vaddrs[], give it back.
203 		 */
204 		nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
205 		hat_unload(kas.a_hat, va, PAGESIZE,
206 		    (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
207 
208 		ASSERT(ppmap_vaddrs[nset] == NULL);
209 		ppmap_vaddrs[nset] = va;
210 	}
211 }
212 
213 #ifdef DEBUG
214 #define	PP_STAT_ADD(stat)	(stat)++
215 uint_t pload, ploadfail;
216 uint_t ppzero, ppzero_short;
217 #else
218 #define	PP_STAT_ADD(stat)
219 #endif /* DEBUG */
220 
221 static void
222 pp_unload_tlb(caddr_t *pslot, caddr_t va)
223 {
224 	ASSERT(*pslot == va);
225 
226 	vtag_flushpage(va, (uint64_t)ksfmmup);
227 	*pslot = NULL;				/* release the slot */
228 }
229 
230 /*
231  * Routine to copy kernel pages during relocation.  It will copy one
232  * PAGESIZE page to another PAGESIZE page.  This function may be called
233  * above LOCK_LEVEL so it should not grab any locks.
234  */
235 void
236 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
237 {
238 	uint64_t fm_pa, to_pa;
239 	size_t nbytes;
240 
241 	fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
242 	to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
243 
244 	nbytes = MMU_PAGESIZE;
245 
246 	for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
247 		hw_pa_bcopy32(fm_pa, to_pa);
248 }
249 
250 /*
251  * Copy the data from the physical page represented by "frompp" to
252  * that represented by "topp".
253  *
254  * Try to use per cpu mapping first, if that fails then call pp_mapin
255  * to load it.
256  * Returns one on success or zero on some sort of fault while doing the copy.
257  */
258 int
259 ppcopy(page_t *fm_pp, page_t *to_pp)
260 {
261 	caddr_t fm_va;
262 	caddr_t to_va;
263 	boolean_t fast;
264 	label_t ljb;
265 	int ret = 1;
266 
267 	ASSERT(PAGE_LOCKED(fm_pp));
268 	ASSERT(PAGE_LOCKED(to_pp));
269 
270 	/*
271 	 * Try to map using KPM.  If it fails, fall back to
272 	 * ppmapin/ppmapout.
273 	 */
274 	if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
275 	    (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
276 		if (fm_va != NULL)
277 			hat_kpm_mapout(fm_pp, NULL, fm_va);
278 		fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
279 		to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
280 		fast = B_FALSE;
281 	} else
282 		fast = B_TRUE;
283 
284 	if (on_fault(&ljb)) {
285 		ret = 0;
286 		goto faulted;
287 	}
288 	bcopy(fm_va, to_va, PAGESIZE);
289 	no_fault();
290 faulted:
291 
292 	/* Unmap */
293 	if (fast) {
294 		hat_kpm_mapout(fm_pp, NULL, fm_va);
295 		hat_kpm_mapout(to_pp, NULL, to_va);
296 	} else {
297 		ppmapout(fm_va);
298 		ppmapout(to_va);
299 	}
300 	return (ret);
301 }
302 
303 /*
304  * Zero the physical page from off to off + len given by `pp'
305  * without changing the reference and modified bits of page.
306  *
307  * Again, we'll try per cpu mapping first.
308  */
309 
310 void
311 pagezero(page_t *pp, uint_t off, uint_t len)
312 {
313 	caddr_t va;
314 	extern int hwblkclr(void *, size_t);
315 	extern int use_hw_bzero;
316 	boolean_t fast;
317 
318 	ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
319 	ASSERT(PAGE_LOCKED(pp));
320 
321 	PP_STAT_ADD(ppzero);
322 
323 	if (len != MMU_PAGESIZE || !use_hw_bzero) {
324 		PP_STAT_ADD(ppzero_short);
325 	}
326 
327 	kpreempt_disable();
328 
329 	/*
330 	 * Try to use KPM.  If that fails, fall back to
331 	 * ppmapin/ppmapout.
332 	 */
333 	fast = B_TRUE;
334 	va = hat_kpm_mapin(pp, NULL);
335 	if (va == NULL) {
336 		fast = B_FALSE;
337 		va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
338 	}
339 
340 	if (!use_hw_bzero) {
341 		bzero(va + off, len);
342 		sync_icache(va + off, len);
343 	} else if (hwblkclr(va + off, len)) {
344 		/*
345 		 * We may not have used block commit asi.
346 		 * So flush the I-$ manually
347 		 */
348 		sync_icache(va + off, len);
349 	} else {
350 		/*
351 		 * We have used blk commit, and flushed the I-$.
352 		 * However we still may have an instruction in the
353 		 * pipeline. Only a flush will invalidate that.
354 		 */
355 		doflush(va);
356 	}
357 
358 	if (fast) {
359 		hat_kpm_mapout(pp, NULL, va);
360 	} else {
361 		ppmapout(va);
362 	}
363 	kpreempt_enable();
364 }
365