xref: /illumos-gate/usr/src/uts/sun4u/vm/mach_kpm.c (revision 602ca9ea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Kernel Physical Mapping (segkpm) hat interface routines for sun4u.
30  */
31 
32 #include <sys/types.h>
33 #include <vm/hat.h>
34 #include <vm/hat_sfmmu.h>
35 #include <vm/page.h>
36 #include <sys/sysmacros.h>
37 #include <sys/cmn_err.h>
38 #include <sys/machsystm.h>
39 #include <vm/seg_kpm.h>
40 #include <sys/cpu_module.h>
41 #include <vm/mach_kpm.h>
42 
43 /* kpm prototypes */
44 static caddr_t	sfmmu_kpm_mapin(page_t *);
45 static void	sfmmu_kpm_mapout(page_t *, caddr_t);
46 static int	sfmmu_kpme_lookup(struct kpme *, page_t *);
47 static void	sfmmu_kpme_add(struct kpme *, page_t *);
48 static void	sfmmu_kpme_sub(struct kpme *, page_t *);
49 static caddr_t	sfmmu_kpm_getvaddr(page_t *, int *);
50 static int	sfmmu_kpm_fault(caddr_t, struct memseg *, page_t *);
51 static int	sfmmu_kpm_fault_small(caddr_t, struct memseg *, page_t *);
52 static void	sfmmu_kpm_vac_conflict(page_t *, caddr_t);
53 void	sfmmu_kpm_pageunload(page_t *);
54 void	sfmmu_kpm_vac_unload(page_t *, caddr_t);
55 static void	sfmmu_kpm_demap_large(caddr_t);
56 static void	sfmmu_kpm_demap_small(caddr_t);
57 static void	sfmmu_kpm_demap_tlbs(caddr_t);
58 void	sfmmu_kpm_hme_unload(page_t *);
59 kpm_hlk_t *sfmmu_kpm_kpmp_enter(page_t *, pgcnt_t);
60 void	sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp);
61 void	sfmmu_kpm_page_cache(page_t *, int, int);
62 
63 /*
64  * Kernel Physical Mapping (kpm) facility
65  */
66 
67 void
68 mach_kpm_init()
69 {}
70 
71 /* -- hat_kpm interface section -- */
72 
73 /*
74  * Mapin a locked page and return the vaddr.
75  * When a kpme is provided by the caller it is added to
76  * the page p_kpmelist. The page to be mapped in must
77  * be at least read locked (p_selock).
78  */
79 caddr_t
80 hat_kpm_mapin(struct page *pp, struct kpme *kpme)
81 {
82 	kmutex_t	*pml;
83 	caddr_t		vaddr;
84 
85 	if (kpm_enable == 0) {
86 		cmn_err(CE_WARN, "hat_kpm_mapin: kpm_enable not set");
87 		return ((caddr_t)NULL);
88 	}
89 
90 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
91 		cmn_err(CE_WARN, "hat_kpm_mapin: pp zero or not locked");
92 		return ((caddr_t)NULL);
93 	}
94 
95 	pml = sfmmu_mlist_enter(pp);
96 	ASSERT(pp->p_kpmref >= 0);
97 
98 	vaddr = (pp->p_kpmref == 0) ?
99 		sfmmu_kpm_mapin(pp) : hat_kpm_page2va(pp, 1);
100 
101 	if (kpme != NULL) {
102 		/*
103 		 * Tolerate multiple mapins for the same kpme to avoid
104 		 * the need for an extra serialization.
105 		 */
106 		if ((sfmmu_kpme_lookup(kpme, pp)) == 0)
107 			sfmmu_kpme_add(kpme, pp);
108 
109 		ASSERT(pp->p_kpmref > 0);
110 
111 	} else {
112 		pp->p_kpmref++;
113 	}
114 
115 	sfmmu_mlist_exit(pml);
116 	return (vaddr);
117 }
118 
119 /*
120  * Mapout a locked page.
121  * When a kpme is provided by the caller it is removed from
122  * the page p_kpmelist. The page to be mapped out must be at
123  * least read locked (p_selock).
124  * Note: The seg_kpm layer provides a mapout interface for the
125  * case that a kpme is used and the underlying page is unlocked.
126  * This can be used instead of calling this function directly.
127  */
128 void
129 hat_kpm_mapout(struct page *pp, struct kpme *kpme, caddr_t vaddr)
130 {
131 	kmutex_t	*pml;
132 
133 	if (kpm_enable == 0) {
134 		cmn_err(CE_WARN, "hat_kpm_mapout: kpm_enable not set");
135 		return;
136 	}
137 
138 	if (IS_KPM_ADDR(vaddr) == 0) {
139 		cmn_err(CE_WARN, "hat_kpm_mapout: no kpm address");
140 		return;
141 	}
142 
143 	if (pp == NULL || PAGE_LOCKED(pp) == 0) {
144 		cmn_err(CE_WARN, "hat_kpm_mapout: page zero or not locked");
145 		return;
146 	}
147 
148 	if (kpme != NULL) {
149 		ASSERT(pp == kpme->kpe_page);
150 		pp = kpme->kpe_page;
151 		pml = sfmmu_mlist_enter(pp);
152 
153 		if (sfmmu_kpme_lookup(kpme, pp) == 0)
154 			panic("hat_kpm_mapout: kpme not found pp=%p",
155 				(void *)pp);
156 
157 		ASSERT(pp->p_kpmref > 0);
158 		sfmmu_kpme_sub(kpme, pp);
159 
160 	} else {
161 		pml = sfmmu_mlist_enter(pp);
162 		pp->p_kpmref--;
163 	}
164 
165 	ASSERT(pp->p_kpmref >= 0);
166 	if (pp->p_kpmref == 0)
167 		sfmmu_kpm_mapout(pp, vaddr);
168 
169 	sfmmu_mlist_exit(pml);
170 }
171 
172 /*
173  * Return the kpm virtual address for the page at pp.
174  * If checkswap is non zero and the page is backed by a
175  * swap vnode the physical address is used rather than
176  * p_offset to determine the kpm region.
177  * Note: The function has to be used w/ extreme care. The
178  * stability of the page identity is in the responsibility
179  * of the caller.
180  */
181 /*ARGSUSED*/
182 caddr_t
183 hat_kpm_page2va(struct page *pp, int checkswap)
184 {
185 	int		vcolor, vcolor_pa;
186 	uintptr_t	paddr, vaddr;
187 
188 	ASSERT(kpm_enable);
189 
190 	paddr = ptob(pp->p_pagenum);
191 	vcolor_pa = addr_to_vcolor(paddr);
192 
193 	if (checkswap && pp->p_vnode && IS_SWAPFSVP(pp->p_vnode))
194 		vcolor = (PP_ISNC(pp)) ? vcolor_pa : PP_GET_VCOLOR(pp);
195 	else
196 		vcolor = addr_to_vcolor(pp->p_offset);
197 
198 	vaddr = (uintptr_t)kpm_vbase + paddr;
199 
200 	if (vcolor_pa != vcolor) {
201 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
202 		vaddr += (vcolor_pa > vcolor) ?
203 			((uintptr_t)vcolor_pa << kpm_size_shift) :
204 			((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
205 	}
206 
207 	return ((caddr_t)vaddr);
208 }
209 
210 /*
211  * Return the page for the kpm virtual address vaddr.
212  * Caller is responsible for the kpm mapping and lock
213  * state of the page.
214  */
215 page_t *
216 hat_kpm_vaddr2page(caddr_t vaddr)
217 {
218 	uintptr_t	paddr;
219 	pfn_t		pfn;
220 
221 	ASSERT(IS_KPM_ADDR(vaddr));
222 
223 	SFMMU_KPM_VTOP(vaddr, paddr);
224 	pfn = (pfn_t)btop(paddr);
225 
226 	return (page_numtopp_nolock(pfn));
227 }
228 
229 /* page to kpm_page */
230 #define	PP2KPMPG(pp, kp) {						\
231 	struct memseg	*mseg;						\
232 	pgcnt_t		inx;						\
233 	pfn_t		pfn;						\
234 									\
235 	pfn = pp->p_pagenum;						\
236 	mseg = page_numtomemseg_nolock(pfn);				\
237 	ASSERT(mseg);							\
238 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);		\
239 	ASSERT(inx < mseg->kpm_nkpmpgs);				\
240 	kp = &mseg->kpm_pages[inx];					\
241 }
242 
243 /* page to kpm_spage */
244 #define	PP2KPMSPG(pp, ksp) {						\
245 	struct memseg	*mseg;						\
246 	pgcnt_t		inx;						\
247 	pfn_t		pfn;						\
248 									\
249 	pfn = pp->p_pagenum;						\
250 	mseg = page_numtomemseg_nolock(pfn);				\
251 	ASSERT(mseg);							\
252 	inx = pfn - mseg->kpm_pbase;					\
253 	ksp = &mseg->kpm_spages[inx];					\
254 }
255 
256 /*
257  * hat_kpm_fault is called from segkpm_fault when a kpm tsbmiss occurred
258  * which could not be resolved by the trap level tsbmiss handler for the
259  * following reasons:
260  * . The vaddr is in VAC alias range (always PAGESIZE mapping size).
261  * . The kpm (s)page range of vaddr is in a VAC alias prevention state.
262  * . tsbmiss handling at trap level is not desired (DEBUG kernel only,
263  *   kpm_tsbmtl == 0).
264  */
265 int
266 hat_kpm_fault(struct hat *hat, caddr_t vaddr)
267 {
268 	int		error;
269 	uintptr_t	paddr;
270 	pfn_t		pfn;
271 	struct memseg	*mseg;
272 	page_t	*pp;
273 
274 	if (kpm_enable == 0) {
275 		cmn_err(CE_WARN, "hat_kpm_fault: kpm_enable not set");
276 		return (ENOTSUP);
277 	}
278 
279 	ASSERT(hat == ksfmmup);
280 	ASSERT(IS_KPM_ADDR(vaddr));
281 
282 	SFMMU_KPM_VTOP(vaddr, paddr);
283 	pfn = (pfn_t)btop(paddr);
284 	mseg = page_numtomemseg_nolock(pfn);
285 	if (mseg == NULL)
286 		return (EFAULT);
287 
288 	pp = &mseg->pages[(pgcnt_t)(pfn - mseg->pages_base)];
289 	ASSERT((pfn_t)pp->p_pagenum == pfn);
290 
291 	if (!PAGE_LOCKED(pp))
292 		return (EFAULT);
293 
294 	if (kpm_smallpages == 0)
295 		error = sfmmu_kpm_fault(vaddr, mseg, pp);
296 	else
297 		error = sfmmu_kpm_fault_small(vaddr, mseg, pp);
298 
299 	return (error);
300 }
301 
302 /*
303  * memseg_hash[] was cleared, need to clear memseg_phash[] too.
304  */
305 void
306 hat_kpm_mseghash_clear(int nentries)
307 {
308 	pgcnt_t i;
309 
310 	if (kpm_enable == 0)
311 		return;
312 
313 	for (i = 0; i < nentries; i++)
314 		memseg_phash[i] = MSEG_NULLPTR_PA;
315 }
316 
317 /*
318  * Update memseg_phash[inx] when memseg_hash[inx] was changed.
319  */
320 void
321 hat_kpm_mseghash_update(pgcnt_t inx, struct memseg *msp)
322 {
323 	if (kpm_enable == 0)
324 		return;
325 
326 	memseg_phash[inx] = (msp) ? va_to_pa(msp) : MSEG_NULLPTR_PA;
327 }
328 
329 /*
330  * Update kpm memseg members from basic memseg info.
331  */
332 void
333 hat_kpm_addmem_mseg_update(struct memseg *msp, pgcnt_t nkpmpgs,
334 	offset_t kpm_pages_off)
335 {
336 	if (kpm_enable == 0)
337 		return;
338 
339 	msp->kpm_pages = (kpm_page_t *)((caddr_t)msp->pages + kpm_pages_off);
340 	msp->kpm_nkpmpgs = nkpmpgs;
341 	msp->kpm_pbase = kpmptop(ptokpmp(msp->pages_base));
342 	msp->pagespa = va_to_pa(msp->pages);
343 	msp->epagespa = va_to_pa(msp->epages);
344 	msp->kpm_pagespa = va_to_pa(msp->kpm_pages);
345 }
346 
347 /*
348  * Setup nextpa when a memseg is inserted.
349  * Assumes that the memsegslock is already held.
350  */
351 void
352 hat_kpm_addmem_mseg_insert(struct memseg *msp)
353 {
354 	if (kpm_enable == 0)
355 		return;
356 
357 	ASSERT(memsegs_lock_held());
358 	msp->nextpa = (memsegs) ? va_to_pa(memsegs) : MSEG_NULLPTR_PA;
359 }
360 
361 /*
362  * Setup memsegspa when a memseg is (head) inserted.
363  * Called before memsegs is updated to complete a
364  * memseg insert operation.
365  * Assumes that the memsegslock is already held.
366  */
367 void
368 hat_kpm_addmem_memsegs_update(struct memseg *msp)
369 {
370 	if (kpm_enable == 0)
371 		return;
372 
373 	ASSERT(memsegs_lock_held());
374 	ASSERT(memsegs);
375 	memsegspa = va_to_pa(msp);
376 }
377 
378 /*
379  * Return end of metadata for an already setup memseg.
380  *
381  * Note: kpm_pages and kpm_spages are aliases and the underlying
382  * member of struct memseg is a union, therefore they always have
383  * the same address within a memseg. They must be differentiated
384  * when pointer arithmetic is used with them.
385  */
386 caddr_t
387 hat_kpm_mseg_reuse(struct memseg *msp)
388 {
389 	caddr_t end;
390 
391 	if (kpm_smallpages == 0)
392 		end = (caddr_t)(msp->kpm_pages + msp->kpm_nkpmpgs);
393 	else
394 		end = (caddr_t)(msp->kpm_spages + msp->kpm_nkpmpgs);
395 
396 	return (end);
397 }
398 
399 /*
400  * Update memsegspa (when first memseg in list
401  * is deleted) or nextpa  when a memseg deleted.
402  * Assumes that the memsegslock is already held.
403  */
404 void
405 hat_kpm_delmem_mseg_update(struct memseg *msp, struct memseg **mspp)
406 {
407 	struct memseg *lmsp;
408 
409 	if (kpm_enable == 0)
410 		return;
411 
412 	ASSERT(memsegs_lock_held());
413 
414 	if (mspp == &memsegs) {
415 		memsegspa = (msp->next) ?
416 				va_to_pa(msp->next) : MSEG_NULLPTR_PA;
417 	} else {
418 		lmsp = (struct memseg *)
419 			((uint64_t)mspp - offsetof(struct memseg, next));
420 		lmsp->nextpa = (msp->next) ?
421 				va_to_pa(msp->next) : MSEG_NULLPTR_PA;
422 	}
423 }
424 
425 /*
426  * Update kpm members for all memseg's involved in a split operation
427  * and do the atomic update of the physical memseg chain.
428  *
429  * Note: kpm_pages and kpm_spages are aliases and the underlying member
430  * of struct memseg is a union, therefore they always have the same
431  * address within a memseg. With that the direct assignments and
432  * va_to_pa conversions below don't have to be distinguished wrt. to
433  * kpm_smallpages. They must be differentiated when pointer arithmetic
434  * is used with them.
435  *
436  * Assumes that the memsegslock is already held.
437  */
438 void
439 hat_kpm_split_mseg_update(struct memseg *msp, struct memseg **mspp,
440 	struct memseg *lo, struct memseg *mid, struct memseg *hi)
441 {
442 	pgcnt_t start, end, kbase, kstart, num;
443 	struct memseg *lmsp;
444 
445 	if (kpm_enable == 0)
446 		return;
447 
448 	ASSERT(memsegs_lock_held());
449 	ASSERT(msp && mid && msp->kpm_pages);
450 
451 	kbase = ptokpmp(msp->kpm_pbase);
452 
453 	if (lo) {
454 		num = lo->pages_end - lo->pages_base;
455 		start = kpmptop(ptokpmp(lo->pages_base));
456 		/* align end to kpm page size granularity */
457 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
458 		lo->kpm_pbase = start;
459 		lo->kpm_nkpmpgs = ptokpmp(end - start);
460 		lo->kpm_pages = msp->kpm_pages;
461 		lo->kpm_pagespa = va_to_pa(lo->kpm_pages);
462 		lo->pagespa = va_to_pa(lo->pages);
463 		lo->epagespa = va_to_pa(lo->epages);
464 		lo->nextpa = va_to_pa(lo->next);
465 	}
466 
467 	/* mid */
468 	num = mid->pages_end - mid->pages_base;
469 	kstart = ptokpmp(mid->pages_base);
470 	start = kpmptop(kstart);
471 	/* align end to kpm page size granularity */
472 	end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
473 	mid->kpm_pbase = start;
474 	mid->kpm_nkpmpgs = ptokpmp(end - start);
475 	if (kpm_smallpages == 0) {
476 		mid->kpm_pages = msp->kpm_pages + (kstart - kbase);
477 	} else {
478 		mid->kpm_spages = msp->kpm_spages + (kstart - kbase);
479 	}
480 	mid->kpm_pagespa = va_to_pa(mid->kpm_pages);
481 	mid->pagespa = va_to_pa(mid->pages);
482 	mid->epagespa = va_to_pa(mid->epages);
483 	mid->nextpa = (mid->next) ?  va_to_pa(mid->next) : MSEG_NULLPTR_PA;
484 
485 	if (hi) {
486 		num = hi->pages_end - hi->pages_base;
487 		kstart = ptokpmp(hi->pages_base);
488 		start = kpmptop(kstart);
489 		/* align end to kpm page size granularity */
490 		end = kpmptop(ptokpmp(start + num - 1)) + kpmpnpgs;
491 		hi->kpm_pbase = start;
492 		hi->kpm_nkpmpgs = ptokpmp(end - start);
493 		if (kpm_smallpages == 0) {
494 			hi->kpm_pages = msp->kpm_pages + (kstart - kbase);
495 		} else {
496 			hi->kpm_spages = msp->kpm_spages + (kstart - kbase);
497 		}
498 		hi->kpm_pagespa = va_to_pa(hi->kpm_pages);
499 		hi->pagespa = va_to_pa(hi->pages);
500 		hi->epagespa = va_to_pa(hi->epages);
501 		hi->nextpa = (hi->next) ? va_to_pa(hi->next) : MSEG_NULLPTR_PA;
502 	}
503 
504 	/*
505 	 * Atomic update of the physical memseg chain
506 	 */
507 	if (mspp == &memsegs) {
508 		memsegspa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
509 	} else {
510 		lmsp = (struct memseg *)
511 			((uint64_t)mspp - offsetof(struct memseg, next));
512 		lmsp->nextpa = (lo) ? va_to_pa(lo) : va_to_pa(mid);
513 	}
514 }
515 
516 /*
517  * Walk the memsegs chain, applying func to each memseg span and vcolor.
518  */
519 void
520 hat_kpm_walk(void (*func)(void *, void *, size_t), void *arg)
521 {
522 	pfn_t	pbase, pend;
523 	int	vcolor;
524 	void	*base;
525 	size_t	size;
526 	struct memseg *msp;
527 	extern uint_t vac_colors;
528 
529 	for (msp = memsegs; msp; msp = msp->next) {
530 		pbase = msp->pages_base;
531 		pend = msp->pages_end;
532 		for (vcolor = 0; vcolor < vac_colors; vcolor++) {
533 			base = ptob(pbase) + kpm_vbase + kpm_size * vcolor;
534 			size = ptob(pend - pbase);
535 			func(arg, base, size);
536 		}
537 	}
538 }
539 
540 
541 /* -- sfmmu_kpm internal section -- */
542 
543 /*
544  * Return the page frame number if a valid segkpm mapping exists
545  * for vaddr, otherwise return PFN_INVALID. No locks are grabbed.
546  * Should only be used by other sfmmu routines.
547  */
548 pfn_t
549 sfmmu_kpm_vatopfn(caddr_t vaddr)
550 {
551 	uintptr_t	paddr;
552 	pfn_t		pfn;
553 	page_t	*pp;
554 
555 	ASSERT(kpm_enable && IS_KPM_ADDR(vaddr));
556 
557 	SFMMU_KPM_VTOP(vaddr, paddr);
558 	pfn = (pfn_t)btop(paddr);
559 	pp = page_numtopp_nolock(pfn);
560 	if (pp && pp->p_kpmref)
561 		return (pfn);
562 	else
563 		return ((pfn_t)PFN_INVALID);
564 }
565 
566 /*
567  * Lookup a kpme in the p_kpmelist.
568  */
569 static int
570 sfmmu_kpme_lookup(struct kpme *kpme, page_t *pp)
571 {
572 	struct kpme	*p;
573 
574 	for (p = pp->p_kpmelist; p; p = p->kpe_next) {
575 		if (p == kpme)
576 			return (1);
577 	}
578 	return (0);
579 }
580 
581 /*
582  * Insert a kpme into the p_kpmelist and increment
583  * the per page kpm reference count.
584  */
585 static void
586 sfmmu_kpme_add(struct kpme *kpme, page_t *pp)
587 {
588 	ASSERT(pp->p_kpmref >= 0);
589 
590 	/* head insert */
591 	kpme->kpe_prev = NULL;
592 	kpme->kpe_next = pp->p_kpmelist;
593 
594 	if (pp->p_kpmelist)
595 		pp->p_kpmelist->kpe_prev = kpme;
596 
597 	pp->p_kpmelist = kpme;
598 	kpme->kpe_page = pp;
599 	pp->p_kpmref++;
600 }
601 
602 /*
603  * Remove a kpme from the p_kpmelist and decrement
604  * the per page kpm reference count.
605  */
606 static void
607 sfmmu_kpme_sub(struct kpme *kpme, page_t *pp)
608 {
609 	ASSERT(pp->p_kpmref > 0);
610 
611 	if (kpme->kpe_prev) {
612 		ASSERT(pp->p_kpmelist != kpme);
613 		ASSERT(kpme->kpe_prev->kpe_page == pp);
614 		kpme->kpe_prev->kpe_next = kpme->kpe_next;
615 	} else {
616 		ASSERT(pp->p_kpmelist == kpme);
617 		pp->p_kpmelist = kpme->kpe_next;
618 	}
619 
620 	if (kpme->kpe_next) {
621 		ASSERT(kpme->kpe_next->kpe_page == pp);
622 		kpme->kpe_next->kpe_prev = kpme->kpe_prev;
623 	}
624 
625 	kpme->kpe_next = kpme->kpe_prev = NULL;
626 	kpme->kpe_page = NULL;
627 	pp->p_kpmref--;
628 }
629 
630 /*
631  * Mapin a single page, it is called every time a page changes it's state
632  * from kpm-unmapped to kpm-mapped. It may not be called, when only a new
633  * kpm instance does a mapin and wants to share the mapping.
634  * Assumes that the mlist mutex is already grabbed.
635  */
636 static caddr_t
637 sfmmu_kpm_mapin(page_t *pp)
638 {
639 	kpm_page_t	*kp;
640 	kpm_hlk_t	*kpmp;
641 	caddr_t		vaddr;
642 	int		kpm_vac_range;
643 	pfn_t		pfn;
644 	tte_t		tte;
645 	kmutex_t	*pmtx;
646 	int		uncached;
647 	kpm_spage_t	*ksp;
648 	kpm_shlk_t	*kpmsp;
649 	int		oldval;
650 
651 	ASSERT(sfmmu_mlist_held(pp));
652 	ASSERT(pp->p_kpmref == 0);
653 
654 	vaddr = sfmmu_kpm_getvaddr(pp, &kpm_vac_range);
655 
656 	ASSERT(IS_KPM_ADDR(vaddr));
657 	uncached = PP_ISNC(pp);
658 	pfn = pp->p_pagenum;
659 
660 	if (kpm_smallpages)
661 		goto smallpages_mapin;
662 
663 	PP2KPMPG(pp, kp);
664 
665 	kpmp = KPMP_HASH(kp);
666 	mutex_enter(&kpmp->khl_mutex);
667 
668 	ASSERT(PP_ISKPMC(pp) == 0);
669 	ASSERT(PP_ISKPMS(pp) == 0);
670 
671 	if (uncached) {
672 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
673 		if (kpm_vac_range == 0) {
674 			if (kp->kp_refcnts == 0) {
675 				/*
676 				 * Must remove large page mapping if it exists.
677 				 * Pages in uncached state can only be mapped
678 				 * small (PAGESIZE) within the regular kpm
679 				 * range.
680 				 */
681 				if (kp->kp_refcntc == -1) {
682 					/* remove go indication */
683 					sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
684 						&kpmp->khl_lock, KPMTSBM_STOP);
685 				}
686 				if (kp->kp_refcnt > 0 && kp->kp_refcntc == 0)
687 					sfmmu_kpm_demap_large(vaddr);
688 			}
689 			ASSERT(kp->kp_refcntc >= 0);
690 			kp->kp_refcntc++;
691 		}
692 		pmtx = sfmmu_page_enter(pp);
693 		PP_SETKPMC(pp);
694 		sfmmu_page_exit(pmtx);
695 	}
696 
697 	if ((kp->kp_refcntc > 0 || kp->kp_refcnts > 0) && kpm_vac_range == 0) {
698 		/*
699 		 * Have to do a small (PAGESIZE) mapin within this kpm_page
700 		 * range since it is marked to be in VAC conflict mode or
701 		 * when there are still other small mappings around.
702 		 */
703 
704 		/* tte assembly */
705 		if (uncached == 0)
706 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
707 		else
708 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
709 
710 		/* tsb dropin */
711 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
712 
713 		pmtx = sfmmu_page_enter(pp);
714 		PP_SETKPMS(pp);
715 		sfmmu_page_exit(pmtx);
716 
717 		kp->kp_refcnts++;
718 		ASSERT(kp->kp_refcnts > 0);
719 		goto exit;
720 	}
721 
722 	if (kpm_vac_range == 0) {
723 		/*
724 		 * Fast path / regular case, no VAC conflict handling
725 		 * in progress within this kpm_page range.
726 		 */
727 		if (kp->kp_refcnt == 0) {
728 
729 			/* tte assembly */
730 			KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
731 
732 			/* tsb dropin */
733 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
734 
735 			/* Set go flag for TL tsbmiss handler */
736 			if (kp->kp_refcntc == 0)
737 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
738 						&kpmp->khl_lock, KPMTSBM_START);
739 
740 			ASSERT(kp->kp_refcntc == -1);
741 		}
742 		kp->kp_refcnt++;
743 		ASSERT(kp->kp_refcnt);
744 
745 	} else {
746 		/*
747 		 * The page is not setup according to the common VAC
748 		 * prevention rules for the regular and kpm mapping layer
749 		 * E.g. the page layer was not able to deliver a right
750 		 * vcolor'ed page for a given vaddr corresponding to
751 		 * the wanted p_offset. It has to be mapped in small in
752 		 * within the corresponding kpm vac range in order to
753 		 * prevent VAC alias conflicts.
754 		 */
755 
756 		/* tte assembly */
757 		if (uncached == 0) {
758 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
759 		} else {
760 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
761 		}
762 
763 		/* tsb dropin */
764 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
765 
766 		kp->kp_refcnta++;
767 		if (kp->kp_refcntc == -1) {
768 			ASSERT(kp->kp_refcnt > 0);
769 
770 			/* remove go indication */
771 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
772 					KPMTSBM_STOP);
773 		}
774 		ASSERT(kp->kp_refcntc >= 0);
775 	}
776 exit:
777 	mutex_exit(&kpmp->khl_mutex);
778 	return (vaddr);
779 
780 smallpages_mapin:
781 	if (uncached == 0) {
782 		/* tte assembly */
783 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
784 	} else {
785 		/* ASSERT(pp->p_share); XXX use hat_page_getshare */
786 		pmtx = sfmmu_page_enter(pp);
787 		PP_SETKPMC(pp);
788 		sfmmu_page_exit(pmtx);
789 		/* tte assembly */
790 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
791 	}
792 
793 	/* tsb dropin */
794 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
795 
796 	PP2KPMSPG(pp, ksp);
797 	kpmsp = KPMP_SHASH(ksp);
798 
799 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped, &kpmsp->kshl_lock,
800 				(uncached) ? KPM_MAPPEDSC : KPM_MAPPEDS);
801 
802 	if (oldval != 0)
803 		panic("sfmmu_kpm_mapin: stale smallpages mapping");
804 
805 	return (vaddr);
806 }
807 
808 /*
809  * Mapout a single page, it is called every time a page changes it's state
810  * from kpm-mapped to kpm-unmapped. It may not be called, when only a kpm
811  * instance calls mapout and there are still other instances mapping the
812  * page. Assumes that the mlist mutex is already grabbed.
813  *
814  * Note: In normal mode (no VAC conflict prevention pending) TLB's are
815  * not flushed. This is the core segkpm behavior to avoid xcalls. It is
816  * no problem because a translation from a segkpm virtual address to a
817  * physical address is always the same. The only downside is a slighty
818  * increased window of vulnerability for misbehaving _kernel_ modules.
819  */
820 static void
821 sfmmu_kpm_mapout(page_t *pp, caddr_t vaddr)
822 {
823 	kpm_page_t	*kp;
824 	kpm_hlk_t	*kpmp;
825 	int		alias_range;
826 	kmutex_t	*pmtx;
827 	kpm_spage_t	*ksp;
828 	kpm_shlk_t	*kpmsp;
829 	int		oldval;
830 
831 	ASSERT(sfmmu_mlist_held(pp));
832 	ASSERT(pp->p_kpmref == 0);
833 
834 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
835 
836 	if (kpm_smallpages)
837 		goto smallpages_mapout;
838 
839 	PP2KPMPG(pp, kp);
840 	kpmp = KPMP_HASH(kp);
841 	mutex_enter(&kpmp->khl_mutex);
842 
843 	if (alias_range) {
844 		ASSERT(PP_ISKPMS(pp) == 0);
845 		if (kp->kp_refcnta <= 0) {
846 			panic("sfmmu_kpm_mapout: bad refcnta kp=%p",
847 				(void *)kp);
848 		}
849 
850 		if (PP_ISTNC(pp))  {
851 			if (PP_ISKPMC(pp) == 0) {
852 				/*
853 				 * Uncached kpm mappings must always have
854 				 * forced "small page" mode.
855 				 */
856 				panic("sfmmu_kpm_mapout: uncached page not "
857 					"kpm marked");
858 			}
859 			sfmmu_kpm_demap_small(vaddr);
860 
861 			pmtx = sfmmu_page_enter(pp);
862 			PP_CLRKPMC(pp);
863 			sfmmu_page_exit(pmtx);
864 
865 			/*
866 			 * Check if we can resume cached mode. This might
867 			 * be the case if the kpm mapping was the only
868 			 * mapping in conflict with other non rule
869 			 * compliant mappings. The page is no more marked
870 			 * as kpm mapped, so the conv_tnc path will not
871 			 * change kpm state.
872 			 */
873 			conv_tnc(pp, TTE8K);
874 
875 		} else if (PP_ISKPMC(pp) == 0) {
876 			/* remove TSB entry only */
877 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
878 
879 		} else {
880 			/* already demapped */
881 			pmtx = sfmmu_page_enter(pp);
882 			PP_CLRKPMC(pp);
883 			sfmmu_page_exit(pmtx);
884 		}
885 		kp->kp_refcnta--;
886 		goto exit;
887 	}
888 
889 	if (kp->kp_refcntc <= 0 && kp->kp_refcnts == 0) {
890 		/*
891 		 * Fast path / regular case.
892 		 */
893 		ASSERT(kp->kp_refcntc >= -1);
894 		ASSERT(!(pp->p_nrm & (P_KPMC | P_KPMS | P_TNC | P_PNC)));
895 
896 		if (kp->kp_refcnt <= 0)
897 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
898 
899 		if (--kp->kp_refcnt == 0) {
900 			/* remove go indication */
901 			if (kp->kp_refcntc == -1) {
902 				sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
903 					&kpmp->khl_lock, KPMTSBM_STOP);
904 			}
905 			ASSERT(kp->kp_refcntc == 0);
906 
907 			/* remove TSB entry */
908 			sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
909 #ifdef	DEBUG
910 			if (kpm_tlb_flush)
911 				sfmmu_kpm_demap_tlbs(vaddr);
912 #endif
913 		}
914 
915 	} else {
916 		/*
917 		 * The VAC alias path.
918 		 * We come here if the kpm vaddr is not in any alias_range
919 		 * and we are unmapping a page within the regular kpm_page
920 		 * range. The kpm_page either holds conflict pages and/or
921 		 * is in "small page" mode. If the page is not marked
922 		 * P_KPMS it couldn't have a valid PAGESIZE sized TSB
923 		 * entry. Dcache flushing is done lazy and follows the
924 		 * rules of the regular virtual page coloring scheme.
925 		 *
926 		 * Per page states and required actions:
927 		 *   P_KPMC: remove a kpm mapping that is conflicting.
928 		 *   P_KPMS: remove a small kpm mapping within a kpm_page.
929 		 *   P_TNC:  check if we can re-cache the page.
930 		 *   P_PNC:  we cannot re-cache, sorry.
931 		 * Per kpm_page:
932 		 *   kp_refcntc > 0: page is part of a kpm_page with conflicts.
933 		 *   kp_refcnts > 0: rm a small mapped page within a kpm_page.
934 		 */
935 
936 		if (PP_ISKPMS(pp)) {
937 			if (kp->kp_refcnts < 1) {
938 				panic("sfmmu_kpm_mapout: bad refcnts kp=%p",
939 					(void *)kp);
940 			}
941 			sfmmu_kpm_demap_small(vaddr);
942 
943 			/*
944 			 * Check if we can resume cached mode. This might
945 			 * be the case if the kpm mapping was the only
946 			 * mapping in conflict with other non rule
947 			 * compliant mappings. The page is no more marked
948 			 * as kpm mapped, so the conv_tnc path will not
949 			 * change kpm state.
950 			 */
951 			if (PP_ISTNC(pp))  {
952 				if (!PP_ISKPMC(pp)) {
953 					/*
954 					 * Uncached kpm mappings must always
955 					 * have forced "small page" mode.
956 					 */
957 					panic("sfmmu_kpm_mapout: uncached "
958 						"page not kpm marked");
959 				}
960 				conv_tnc(pp, TTE8K);
961 			}
962 			kp->kp_refcnts--;
963 			kp->kp_refcnt++;
964 			pmtx = sfmmu_page_enter(pp);
965 			PP_CLRKPMS(pp);
966 			sfmmu_page_exit(pmtx);
967 		}
968 
969 		if (PP_ISKPMC(pp)) {
970 			if (kp->kp_refcntc < 1) {
971 				panic("sfmmu_kpm_mapout: bad refcntc kp=%p",
972 					(void *)kp);
973 			}
974 			pmtx = sfmmu_page_enter(pp);
975 			PP_CLRKPMC(pp);
976 			sfmmu_page_exit(pmtx);
977 			kp->kp_refcntc--;
978 		}
979 
980 		if (kp->kp_refcnt-- < 1)
981 			panic("sfmmu_kpm_mapout: bad refcnt kp=%p", (void *)kp);
982 	}
983 exit:
984 	mutex_exit(&kpmp->khl_mutex);
985 	return;
986 
987 smallpages_mapout:
988 	PP2KPMSPG(pp, ksp);
989 	kpmsp = KPMP_SHASH(ksp);
990 
991 	if (PP_ISKPMC(pp) == 0) {
992 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
993 					&kpmsp->kshl_lock, 0);
994 
995 		if (oldval != KPM_MAPPEDS) {
996 			/*
997 			 * When we're called after sfmmu_kpm_hme_unload,
998 			 * KPM_MAPPEDSC is valid too.
999 			 */
1000 			if (oldval != KPM_MAPPEDSC)
1001 				panic("sfmmu_kpm_mapout: incorrect mapping");
1002 		}
1003 
1004 		/* remove TSB entry */
1005 		sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1006 #ifdef	DEBUG
1007 		if (kpm_tlb_flush)
1008 			sfmmu_kpm_demap_tlbs(vaddr);
1009 #endif
1010 
1011 	} else if (PP_ISTNC(pp)) {
1012 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1013 					&kpmsp->kshl_lock, 0);
1014 
1015 		if (oldval != KPM_MAPPEDSC || PP_ISKPMC(pp) == 0)
1016 			panic("sfmmu_kpm_mapout: inconsistent TNC mapping");
1017 
1018 		sfmmu_kpm_demap_small(vaddr);
1019 
1020 		pmtx = sfmmu_page_enter(pp);
1021 		PP_CLRKPMC(pp);
1022 		sfmmu_page_exit(pmtx);
1023 
1024 		/*
1025 		 * Check if we can resume cached mode. This might be
1026 		 * the case if the kpm mapping was the only mapping
1027 		 * in conflict with other non rule compliant mappings.
1028 		 * The page is no more marked as kpm mapped, so the
1029 		 * conv_tnc path will not change the kpm state.
1030 		 */
1031 		conv_tnc(pp, TTE8K);
1032 
1033 	} else {
1034 		oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1035 					&kpmsp->kshl_lock, 0);
1036 
1037 		if (oldval != KPM_MAPPEDSC)
1038 			panic("sfmmu_kpm_mapout: inconsistent mapping");
1039 
1040 		pmtx = sfmmu_page_enter(pp);
1041 		PP_CLRKPMC(pp);
1042 		sfmmu_page_exit(pmtx);
1043 	}
1044 }
1045 
1046 #define	abs(x)  ((x) < 0 ? -(x) : (x))
1047 
1048 /*
1049  * Determine appropriate kpm mapping address and handle any kpm/hme
1050  * conflicts. Page mapping list and its vcolor parts must be protected.
1051  */
1052 static caddr_t
1053 sfmmu_kpm_getvaddr(page_t *pp, int *kpm_vac_rangep)
1054 {
1055 	int		vcolor, vcolor_pa;
1056 	caddr_t		vaddr;
1057 	uintptr_t	paddr;
1058 
1059 
1060 	ASSERT(sfmmu_mlist_held(pp));
1061 
1062 	paddr = ptob(pp->p_pagenum);
1063 	vcolor_pa = addr_to_vcolor(paddr);
1064 
1065 	if (pp->p_vnode && IS_SWAPFSVP(pp->p_vnode)) {
1066 		vcolor = (PP_NEWPAGE(pp) || PP_ISNC(pp)) ?
1067 		    vcolor_pa : PP_GET_VCOLOR(pp);
1068 	} else {
1069 		vcolor = addr_to_vcolor(pp->p_offset);
1070 	}
1071 
1072 	vaddr = kpm_vbase + paddr;
1073 	*kpm_vac_rangep = 0;
1074 
1075 	if (vcolor_pa != vcolor) {
1076 		*kpm_vac_rangep = abs(vcolor - vcolor_pa);
1077 		vaddr += ((uintptr_t)(vcolor - vcolor_pa) << MMU_PAGESHIFT);
1078 		vaddr += (vcolor_pa > vcolor) ?
1079 			((uintptr_t)vcolor_pa << kpm_size_shift) :
1080 			((uintptr_t)(vcolor - vcolor_pa) << kpm_size_shift);
1081 
1082 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1083 	}
1084 
1085 	if (PP_ISNC(pp))
1086 		return (vaddr);
1087 
1088 	if (PP_NEWPAGE(pp)) {
1089 		PP_SET_VCOLOR(pp, vcolor);
1090 		return (vaddr);
1091 	}
1092 
1093 	if (PP_GET_VCOLOR(pp) == vcolor)
1094 		return (vaddr);
1095 
1096 	ASSERT(!PP_ISMAPPED_KPM(pp));
1097 	sfmmu_kpm_vac_conflict(pp, vaddr);
1098 
1099 	return (vaddr);
1100 }
1101 
1102 /*
1103  * VAC conflict state bit values.
1104  * The following defines are used to make the handling of the
1105  * various input states more concise. For that the kpm states
1106  * per kpm_page and per page are combined in a summary state.
1107  * Each single state has a corresponding bit value in the
1108  * summary state. These defines only apply for kpm large page
1109  * mappings. Within comments the abbreviations "kc, c, ks, s"
1110  * are used as short form of the actual state, e.g. "kc" for
1111  * "kp_refcntc > 0", etc.
1112  */
1113 #define	KPM_KC	0x00000008	/* kpm_page: kp_refcntc > 0 */
1114 #define	KPM_C	0x00000004	/* page: P_KPMC set */
1115 #define	KPM_KS	0x00000002	/* kpm_page: kp_refcnts > 0 */
1116 #define	KPM_S	0x00000001	/* page: P_KPMS set */
1117 
1118 /*
1119  * Summary states used in sfmmu_kpm_fault (KPM_TSBM_*).
1120  * See also more detailed comments within in the sfmmu_kpm_fault switch.
1121  * Abbreviations used:
1122  * CONFL: VAC conflict(s) within a kpm_page.
1123  * MAPS:  Mapped small: Page mapped in using a regular page size kpm mapping.
1124  * RASM:  Re-assembling of a large page mapping possible.
1125  * RPLS:  Replace: TSB miss due to TSB replacement only.
1126  * BRKO:  Breakup Other: A large kpm mapping has to be broken because another
1127  *        page within the kpm_page is already involved in a VAC conflict.
1128  * BRKT:  Breakup This: A large kpm mapping has to be broken, this page is
1129  *        is involved in a VAC conflict.
1130  */
1131 #define	KPM_TSBM_CONFL_GONE	(0)
1132 #define	KPM_TSBM_MAPS_RASM	(KPM_KS)
1133 #define	KPM_TSBM_RPLS_RASM	(KPM_KS | KPM_S)
1134 #define	KPM_TSBM_MAPS_BRKO	(KPM_KC)
1135 #define	KPM_TSBM_MAPS		(KPM_KC | KPM_KS)
1136 #define	KPM_TSBM_RPLS		(KPM_KC | KPM_KS | KPM_S)
1137 #define	KPM_TSBM_MAPS_BRKT	(KPM_KC | KPM_C)
1138 #define	KPM_TSBM_MAPS_CONFL	(KPM_KC | KPM_C | KPM_KS)
1139 #define	KPM_TSBM_RPLS_CONFL	(KPM_KC | KPM_C | KPM_KS | KPM_S)
1140 
1141 /*
1142  * kpm fault handler for mappings with large page size.
1143  */
1144 int
1145 sfmmu_kpm_fault(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1146 {
1147 	int		error;
1148 	pgcnt_t		inx;
1149 	kpm_page_t	*kp;
1150 	tte_t		tte;
1151 	pfn_t		pfn = pp->p_pagenum;
1152 	kpm_hlk_t	*kpmp;
1153 	kmutex_t	*pml;
1154 	int		alias_range;
1155 	int		uncached = 0;
1156 	kmutex_t	*pmtx;
1157 	int		badstate;
1158 	uint_t		tsbmcase;
1159 
1160 	alias_range = IS_KPM_ALIAS_RANGE(vaddr);
1161 
1162 	inx = ptokpmp(kpmptop(ptokpmp(pfn)) - mseg->kpm_pbase);
1163 	if (inx >= mseg->kpm_nkpmpgs) {
1164 		cmn_err(CE_PANIC, "sfmmu_kpm_fault: kpm overflow in memseg "
1165 			"0x%p  pp 0x%p", (void *)mseg, (void *)pp);
1166 	}
1167 
1168 	kp = &mseg->kpm_pages[inx];
1169 	kpmp = KPMP_HASH(kp);
1170 
1171 	pml = sfmmu_mlist_enter(pp);
1172 
1173 	if (!PP_ISMAPPED_KPM(pp)) {
1174 		sfmmu_mlist_exit(pml);
1175 		return (EFAULT);
1176 	}
1177 
1178 	mutex_enter(&kpmp->khl_mutex);
1179 
1180 	if (alias_range) {
1181 		ASSERT(!PP_ISMAPPED_LARGE(pp));
1182 		if (kp->kp_refcnta > 0) {
1183 			if (PP_ISKPMC(pp)) {
1184 				pmtx = sfmmu_page_enter(pp);
1185 				PP_CLRKPMC(pp);
1186 				sfmmu_page_exit(pmtx);
1187 			}
1188 			/*
1189 			 * Check for vcolor conflicts. Return here
1190 			 * w/ either no conflict (fast path), removed hme
1191 			 * mapping chains (unload conflict) or uncached
1192 			 * (uncache conflict). VACaches are cleaned and
1193 			 * p_vcolor and PP_TNC are set accordingly for the
1194 			 * conflict cases.  Drop kpmp for uncache conflict
1195 			 * cases since it will be grabbed within
1196 			 * sfmmu_kpm_page_cache in case of an uncache
1197 			 * conflict.
1198 			 */
1199 			mutex_exit(&kpmp->khl_mutex);
1200 			sfmmu_kpm_vac_conflict(pp, vaddr);
1201 			mutex_enter(&kpmp->khl_mutex);
1202 
1203 			if (PP_ISNC(pp)) {
1204 				uncached = 1;
1205 				pmtx = sfmmu_page_enter(pp);
1206 				PP_SETKPMC(pp);
1207 				sfmmu_page_exit(pmtx);
1208 			}
1209 			goto smallexit;
1210 
1211 		} else {
1212 			/*
1213 			 * We got a tsbmiss on a not active kpm_page range.
1214 			 * Let segkpm_fault decide how to panic.
1215 			 */
1216 			error = EFAULT;
1217 		}
1218 		goto exit;
1219 	}
1220 
1221 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1222 	if (kp->kp_refcntc == -1) {
1223 		/*
1224 		 * We should come here only if trap level tsb miss
1225 		 * handler is disabled.
1226 		 */
1227 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1228 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1229 
1230 		if (badstate == 0)
1231 			goto largeexit;
1232 	}
1233 
1234 	if (badstate || kp->kp_refcntc < 0)
1235 		goto badstate_exit;
1236 
1237 	/*
1238 	 * Combine the per kpm_page and per page kpm VAC states to
1239 	 * a summary state in order to make the kpm fault handling
1240 	 * more concise.
1241 	 */
1242 	tsbmcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1243 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1244 			(PP_ISKPMC(pp) ? KPM_C : 0) |
1245 			(PP_ISKPMS(pp) ? KPM_S : 0));
1246 
1247 	switch (tsbmcase) {
1248 	case KPM_TSBM_CONFL_GONE:		/* - - - - */
1249 		/*
1250 		 * That's fine, we either have no more vac conflict in
1251 		 * this kpm page or someone raced in and has solved the
1252 		 * vac conflict for us -- call sfmmu_kpm_vac_conflict
1253 		 * to take care for correcting the vcolor and flushing
1254 		 * the dcache if required.
1255 		 */
1256 		mutex_exit(&kpmp->khl_mutex);
1257 		sfmmu_kpm_vac_conflict(pp, vaddr);
1258 		mutex_enter(&kpmp->khl_mutex);
1259 
1260 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1261 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1262 			panic("sfmmu_kpm_fault: inconsistent CONFL_GONE "
1263 				"state, pp=%p", (void *)pp);
1264 		}
1265 		goto largeexit;
1266 
1267 	case KPM_TSBM_MAPS_RASM:		/* - - ks - */
1268 		/*
1269 		 * All conflicts in this kpm page are gone but there are
1270 		 * already small mappings around, so we also map this
1271 		 * page small. This could be the trigger case for a
1272 		 * small mapping reaper, if this is really needed.
1273 		 * For now fall thru to the KPM_TSBM_MAPS handling.
1274 		 */
1275 
1276 	case KPM_TSBM_MAPS:			/* kc - ks - */
1277 		/*
1278 		 * Large page mapping is already broken, this page is not
1279 		 * conflicting, so map it small. Call sfmmu_kpm_vac_conflict
1280 		 * to take care for correcting the vcolor and flushing
1281 		 * the dcache if required.
1282 		 */
1283 		mutex_exit(&kpmp->khl_mutex);
1284 		sfmmu_kpm_vac_conflict(pp, vaddr);
1285 		mutex_enter(&kpmp->khl_mutex);
1286 
1287 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1288 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1289 			panic("sfmmu_kpm_fault:  inconsistent MAPS state, "
1290 				"pp=%p", (void *)pp);
1291 		}
1292 		kp->kp_refcnt--;
1293 		kp->kp_refcnts++;
1294 		pmtx = sfmmu_page_enter(pp);
1295 		PP_SETKPMS(pp);
1296 		sfmmu_page_exit(pmtx);
1297 		goto smallexit;
1298 
1299 	case KPM_TSBM_RPLS_RASM:		/* - - ks s */
1300 		/*
1301 		 * All conflicts in this kpm page are gone but this page
1302 		 * is mapped small. This could be the trigger case for a
1303 		 * small mapping reaper, if this is really needed.
1304 		 * For now we drop it in small again. Fall thru to the
1305 		 * KPM_TSBM_RPLS handling.
1306 		 */
1307 
1308 	case KPM_TSBM_RPLS:			/* kc - ks s */
1309 		/*
1310 		 * Large page mapping is already broken, this page is not
1311 		 * conflicting but already mapped small, so drop it in
1312 		 * small again.
1313 		 */
1314 		if (PP_ISNC(pp) ||
1315 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1316 			panic("sfmmu_kpm_fault:  inconsistent RPLS state, "
1317 				"pp=%p", (void *)pp);
1318 		}
1319 		goto smallexit;
1320 
1321 	case KPM_TSBM_MAPS_BRKO:		/* kc - - - */
1322 		/*
1323 		 * The kpm page where we live in is marked conflicting
1324 		 * but this page is not conflicting. So we have to map it
1325 		 * in small. Call sfmmu_kpm_vac_conflict to take care for
1326 		 * correcting the vcolor and flushing the dcache if required.
1327 		 */
1328 		mutex_exit(&kpmp->khl_mutex);
1329 		sfmmu_kpm_vac_conflict(pp, vaddr);
1330 		mutex_enter(&kpmp->khl_mutex);
1331 
1332 		if (PP_ISNC(pp) || kp->kp_refcnt <= 0 ||
1333 		    addr_to_vcolor(vaddr) != PP_GET_VCOLOR(pp)) {
1334 			panic("sfmmu_kpm_fault:  inconsistent MAPS_BRKO state, "
1335 				"pp=%p", (void *)pp);
1336 		}
1337 		kp->kp_refcnt--;
1338 		kp->kp_refcnts++;
1339 		pmtx = sfmmu_page_enter(pp);
1340 		PP_SETKPMS(pp);
1341 		sfmmu_page_exit(pmtx);
1342 		goto smallexit;
1343 
1344 	case KPM_TSBM_MAPS_BRKT:		/* kc c - - */
1345 	case KPM_TSBM_MAPS_CONFL:		/* kc c ks - */
1346 		if (!PP_ISMAPPED(pp)) {
1347 			/*
1348 			 * We got a tsbmiss on kpm large page range that is
1349 			 * marked to contain vac conflicting pages introduced
1350 			 * by hme mappings. The hme mappings are all gone and
1351 			 * must have bypassed the kpm alias prevention logic.
1352 			 */
1353 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1354 				(void *)pp);
1355 		}
1356 
1357 		/*
1358 		 * Check for vcolor conflicts. Return here w/ either no
1359 		 * conflict (fast path), removed hme mapping chains
1360 		 * (unload conflict) or uncached (uncache conflict).
1361 		 * Dcache is cleaned and p_vcolor and P_TNC are set
1362 		 * accordingly. Drop kpmp for uncache conflict cases
1363 		 * since it will be grabbed within sfmmu_kpm_page_cache
1364 		 * in case of an uncache conflict.
1365 		 */
1366 		mutex_exit(&kpmp->khl_mutex);
1367 		sfmmu_kpm_vac_conflict(pp, vaddr);
1368 		mutex_enter(&kpmp->khl_mutex);
1369 
1370 		if (kp->kp_refcnt <= 0)
1371 			panic("sfmmu_kpm_fault: bad refcnt kp=%p", (void *)kp);
1372 
1373 		if (PP_ISNC(pp)) {
1374 			uncached = 1;
1375 		} else {
1376 			/*
1377 			 * When an unload conflict is solved and there are
1378 			 * no other small mappings around, we can resume
1379 			 * largepage mode. Otherwise we have to map or drop
1380 			 * in small. This could be a trigger for a small
1381 			 * mapping reaper when this was the last conflict
1382 			 * within the kpm page and when there are only
1383 			 * other small mappings around.
1384 			 */
1385 			ASSERT(addr_to_vcolor(vaddr) == PP_GET_VCOLOR(pp));
1386 			ASSERT(kp->kp_refcntc > 0);
1387 			kp->kp_refcntc--;
1388 			pmtx = sfmmu_page_enter(pp);
1389 			PP_CLRKPMC(pp);
1390 			sfmmu_page_exit(pmtx);
1391 			ASSERT(PP_ISKPMS(pp) == 0);
1392 			if (kp->kp_refcntc == 0 && kp->kp_refcnts == 0)
1393 				goto largeexit;
1394 		}
1395 
1396 		kp->kp_refcnt--;
1397 		kp->kp_refcnts++;
1398 		pmtx = sfmmu_page_enter(pp);
1399 		PP_SETKPMS(pp);
1400 		sfmmu_page_exit(pmtx);
1401 		goto smallexit;
1402 
1403 	case KPM_TSBM_RPLS_CONFL:		/* kc c ks s */
1404 		if (!PP_ISMAPPED(pp)) {
1405 			/*
1406 			 * We got a tsbmiss on kpm large page range that is
1407 			 * marked to contain vac conflicting pages introduced
1408 			 * by hme mappings. They are all gone and must have
1409 			 * somehow bypassed the kpm alias prevention logic.
1410 			 */
1411 			panic("sfmmu_kpm_fault: stale VAC conflict, pp=%p",
1412 				(void *)pp);
1413 		}
1414 
1415 		/*
1416 		 * This state is only possible for an uncached mapping.
1417 		 */
1418 		if (!PP_ISNC(pp)) {
1419 			panic("sfmmu_kpm_fault: page not uncached, pp=%p",
1420 				(void *)pp);
1421 		}
1422 		uncached = 1;
1423 		goto smallexit;
1424 
1425 	default:
1426 badstate_exit:
1427 		panic("sfmmu_kpm_fault: inconsistent VAC state, vaddr=%p kp=%p "
1428 			"pp=%p", (void *)vaddr, (void *)kp, (void *)pp);
1429 	}
1430 
1431 smallexit:
1432 	/* tte assembly */
1433 	if (uncached == 0)
1434 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1435 	else
1436 		KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1437 
1438 	/* tsb dropin */
1439 	sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1440 
1441 	error = 0;
1442 	goto exit;
1443 
1444 largeexit:
1445 	if (kp->kp_refcnt > 0) {
1446 
1447 		/* tte assembly */
1448 		KPM_TTE_VCACHED(tte.ll, pfn, TTE4M);
1449 
1450 		/* tsb dropin */
1451 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT4M);
1452 
1453 		if (kp->kp_refcntc == 0) {
1454 			/* Set "go" flag for TL tsbmiss handler */
1455 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc, &kpmp->khl_lock,
1456 					KPMTSBM_START);
1457 		}
1458 		ASSERT(kp->kp_refcntc == -1);
1459 		error = 0;
1460 
1461 	} else
1462 		error = EFAULT;
1463 exit:
1464 	mutex_exit(&kpmp->khl_mutex);
1465 	sfmmu_mlist_exit(pml);
1466 	return (error);
1467 }
1468 
1469 /*
1470  * kpm fault handler for mappings with small page size.
1471  */
1472 int
1473 sfmmu_kpm_fault_small(caddr_t vaddr, struct memseg *mseg, page_t *pp)
1474 {
1475 	int		error = 0;
1476 	pgcnt_t		inx;
1477 	kpm_spage_t	*ksp;
1478 	kpm_shlk_t	*kpmsp;
1479 	kmutex_t	*pml;
1480 	pfn_t		pfn = pp->p_pagenum;
1481 	tte_t		tte;
1482 	kmutex_t	*pmtx;
1483 	int		oldval;
1484 
1485 	inx = pfn - mseg->kpm_pbase;
1486 	ksp = &mseg->kpm_spages[inx];
1487 	kpmsp = KPMP_SHASH(ksp);
1488 
1489 	pml = sfmmu_mlist_enter(pp);
1490 
1491 	if (!PP_ISMAPPED_KPM(pp)) {
1492 		sfmmu_mlist_exit(pml);
1493 		return (EFAULT);
1494 	}
1495 
1496 	/*
1497 	 * kp_mapped lookup protected by mlist mutex
1498 	 */
1499 	if (ksp->kp_mapped == KPM_MAPPEDS) {
1500 		/*
1501 		 * Fast path tsbmiss
1502 		 */
1503 		ASSERT(!PP_ISKPMC(pp));
1504 		ASSERT(!PP_ISNC(pp));
1505 
1506 		/* tte assembly */
1507 		KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1508 
1509 		/* tsb dropin */
1510 		sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1511 
1512 	} else if (ksp->kp_mapped == KPM_MAPPEDSC) {
1513 		/*
1514 		 * Got here due to existing or gone kpm/hme VAC conflict.
1515 		 * Recheck for vcolor conflicts. Return here w/ either
1516 		 * no conflict, removed hme mapping chain (unload
1517 		 * conflict) or uncached (uncache conflict). VACaches
1518 		 * are cleaned and p_vcolor and PP_TNC are set accordingly
1519 		 * for the conflict cases.
1520 		 */
1521 		sfmmu_kpm_vac_conflict(pp, vaddr);
1522 
1523 		if (PP_ISNC(pp)) {
1524 			/* ASSERT(pp->p_share); XXX use hat_page_getshare */
1525 
1526 			/* tte assembly */
1527 			KPM_TTE_VUNCACHED(tte.ll, pfn, TTE8K);
1528 
1529 			/* tsb dropin */
1530 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1531 
1532 		} else {
1533 			if (PP_ISKPMC(pp)) {
1534 				pmtx = sfmmu_page_enter(pp);
1535 				PP_CLRKPMC(pp);
1536 				sfmmu_page_exit(pmtx);
1537 			}
1538 
1539 			/* tte assembly */
1540 			KPM_TTE_VCACHED(tte.ll, pfn, TTE8K);
1541 
1542 			/* tsb dropin */
1543 			sfmmu_kpm_load_tsb(vaddr, &tte, MMU_PAGESHIFT);
1544 
1545 			oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1546 					&kpmsp->kshl_lock, KPM_MAPPEDS);
1547 
1548 			if (oldval != KPM_MAPPEDSC)
1549 				panic("sfmmu_kpm_fault_small: "
1550 					"stale smallpages mapping");
1551 		}
1552 
1553 	} else {
1554 		/*
1555 		 * We got a tsbmiss on a not active kpm_page range.
1556 		 * Let decide segkpm_fault how to panic.
1557 		 */
1558 		error = EFAULT;
1559 	}
1560 
1561 	sfmmu_mlist_exit(pml);
1562 	return (error);
1563 }
1564 
1565 /*
1566  * Check/handle potential hme/kpm mapping conflicts
1567  */
1568 static void
1569 sfmmu_kpm_vac_conflict(page_t *pp, caddr_t vaddr)
1570 {
1571 	int		vcolor;
1572 	struct sf_hment	*sfhmep;
1573 	struct hat	*tmphat;
1574 	struct sf_hment	*tmphme = NULL;
1575 	struct hme_blk	*hmeblkp;
1576 	tte_t		tte;
1577 
1578 	ASSERT(sfmmu_mlist_held(pp));
1579 
1580 	if (PP_ISNC(pp))
1581 		return;
1582 
1583 	vcolor = addr_to_vcolor(vaddr);
1584 	if (PP_GET_VCOLOR(pp) == vcolor)
1585 		return;
1586 
1587 	/*
1588 	 * There could be no vcolor conflict between a large cached
1589 	 * hme page and a non alias range kpm page (neither large nor
1590 	 * small mapped). So if a hme conflict already exists between
1591 	 * a constituent page of a large hme mapping and a shared small
1592 	 * conflicting hme mapping, both mappings must be already
1593 	 * uncached at this point.
1594 	 */
1595 	ASSERT(!PP_ISMAPPED_LARGE(pp));
1596 
1597 	if (!PP_ISMAPPED(pp)) {
1598 		/*
1599 		 * Previous hme user of page had a different color
1600 		 * but since there are no current users
1601 		 * we just flush the cache and change the color.
1602 		 */
1603 		SFMMU_STAT(sf_pgcolor_conflict);
1604 		sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1605 		PP_SET_VCOLOR(pp, vcolor);
1606 		return;
1607 	}
1608 
1609 	/*
1610 	 * If we get here we have a vac conflict with a current hme
1611 	 * mapping. This must have been established by forcing a wrong
1612 	 * colored mapping, e.g. by using mmap(2) with MAP_FIXED.
1613 	 */
1614 
1615 	/*
1616 	 * Check if any mapping is in same as or if it is locked
1617 	 * since in that case we need to uncache.
1618 	 */
1619 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1620 		tmphme = sfhmep->hme_next;
1621 		if (IS_PAHME(sfhmep))
1622 			continue;
1623 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1624 		if (hmeblkp->hblk_xhat_bit)
1625 			continue;
1626 		tmphat = hblktosfmmu(hmeblkp);
1627 		sfmmu_copytte(&sfhmep->hme_tte, &tte);
1628 		ASSERT(TTE_IS_VALID(&tte));
1629 		if ((tmphat == ksfmmup) || hmeblkp->hblk_lckcnt) {
1630 			/*
1631 			 * We have an uncache conflict
1632 			 */
1633 			SFMMU_STAT(sf_uncache_conflict);
1634 			sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
1635 			return;
1636 		}
1637 	}
1638 
1639 	/*
1640 	 * We have an unload conflict
1641 	 */
1642 	SFMMU_STAT(sf_unload_conflict);
1643 
1644 	for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
1645 		tmphme = sfhmep->hme_next;
1646 		if (IS_PAHME(sfhmep))
1647 			continue;
1648 		hmeblkp = sfmmu_hmetohblk(sfhmep);
1649 		if (hmeblkp->hblk_xhat_bit)
1650 			continue;
1651 		(void) sfmmu_pageunload(pp, sfhmep, TTE8K);
1652 	}
1653 
1654 	/*
1655 	 * Unloads only does tlb flushes so we need to flush the
1656 	 * dcache vcolor here.
1657 	 */
1658 	sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
1659 	PP_SET_VCOLOR(pp, vcolor);
1660 }
1661 
1662 /*
1663  * Remove all kpm mappings using kpme's for pp and check that
1664  * all kpm mappings (w/ and w/o kpme's) are gone.
1665  */
1666 void
1667 sfmmu_kpm_pageunload(page_t *pp)
1668 {
1669 	caddr_t		vaddr;
1670 	struct kpme	*kpme, *nkpme;
1671 
1672 	ASSERT(pp != NULL);
1673 	ASSERT(pp->p_kpmref);
1674 	ASSERT(sfmmu_mlist_held(pp));
1675 
1676 	vaddr = hat_kpm_page2va(pp, 1);
1677 
1678 	for (kpme = pp->p_kpmelist; kpme; kpme = nkpme) {
1679 		ASSERT(kpme->kpe_page == pp);
1680 
1681 		if (pp->p_kpmref == 0)
1682 			panic("sfmmu_kpm_pageunload: stale p_kpmref pp=%p "
1683 				"kpme=%p", (void *)pp, (void *)kpme);
1684 
1685 		nkpme = kpme->kpe_next;
1686 
1687 		/* Add instance callback here here if needed later */
1688 		sfmmu_kpme_sub(kpme, pp);
1689 	}
1690 
1691 	/*
1692 	 * Also correct after mixed kpme/nonkpme mappings. If nonkpme
1693 	 * segkpm clients have unlocked the page and forgot to mapout
1694 	 * we panic here.
1695 	 */
1696 	if (pp->p_kpmref != 0)
1697 		panic("sfmmu_kpm_pageunload: bad refcnt pp=%p", (void *)pp);
1698 
1699 	sfmmu_kpm_mapout(pp, vaddr);
1700 }
1701 
1702 /*
1703  * Remove a large kpm mapping from kernel TSB and all TLB's.
1704  */
1705 static void
1706 sfmmu_kpm_demap_large(caddr_t vaddr)
1707 {
1708 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT4M);
1709 	sfmmu_kpm_demap_tlbs(vaddr);
1710 }
1711 
1712 /*
1713  * Remove a small kpm mapping from kernel TSB and all TLB's.
1714  */
1715 static void
1716 sfmmu_kpm_demap_small(caddr_t vaddr)
1717 {
1718 	sfmmu_kpm_unload_tsb(vaddr, MMU_PAGESHIFT);
1719 	sfmmu_kpm_demap_tlbs(vaddr);
1720 }
1721 
1722 /*
1723  * Demap a kpm mapping in all TLB's.
1724  */
1725 static void
1726 sfmmu_kpm_demap_tlbs(caddr_t vaddr)
1727 {
1728 	cpuset_t cpuset;
1729 
1730 	kpreempt_disable();
1731 	cpuset = ksfmmup->sfmmu_cpusran;
1732 	CPUSET_AND(cpuset, cpu_ready_set);
1733 	CPUSET_DEL(cpuset, CPU->cpu_id);
1734 	SFMMU_XCALL_STATS(ksfmmup);
1735 
1736 	xt_some(cpuset, vtag_flushpage_tl1, (uint64_t)vaddr,
1737 	    (uint64_t)ksfmmup);
1738 	vtag_flushpage(vaddr, (uint64_t)ksfmmup);
1739 
1740 	kpreempt_enable();
1741 }
1742 
1743 /*
1744  * Summary states used in sfmmu_kpm_vac_unload (KPM_VUL__*).
1745  * See also more detailed comments within in the sfmmu_kpm_vac_unload switch.
1746  * Abbreviations used:
1747  * BIG:   Large page kpm mapping in use.
1748  * CONFL: VAC conflict(s) within a kpm_page.
1749  * INCR:  Count of conflicts within a kpm_page is going to be incremented.
1750  * DECR:  Count of conflicts within a kpm_page is going to be decremented.
1751  * UNMAP_SMALL: A small (regular page size) mapping is going to be unmapped.
1752  * TNC:   Temporary non cached: a kpm mapped page is mapped in TNC state.
1753  */
1754 #define	KPM_VUL_BIG		(0)
1755 #define	KPM_VUL_CONFL_INCR1	(KPM_KS)
1756 #define	KPM_VUL_UNMAP_SMALL1	(KPM_KS | KPM_S)
1757 #define	KPM_VUL_CONFL_INCR2	(KPM_KC)
1758 #define	KPM_VUL_CONFL_INCR3	(KPM_KC | KPM_KS)
1759 #define	KPM_VUL_UNMAP_SMALL2	(KPM_KC | KPM_KS | KPM_S)
1760 #define	KPM_VUL_CONFL_DECR1	(KPM_KC | KPM_C)
1761 #define	KPM_VUL_CONFL_DECR2	(KPM_KC | KPM_C | KPM_KS)
1762 #define	KPM_VUL_TNC		(KPM_KC | KPM_C | KPM_KS | KPM_S)
1763 
1764 /*
1765  * Handle VAC unload conflicts introduced by hme mappings or vice
1766  * versa when a hme conflict mapping is replaced by a non conflict
1767  * one. Perform actions and state transitions according to the
1768  * various page and kpm_page entry states. VACache flushes are in
1769  * the responsibiliy of the caller. We still hold the mlist lock.
1770  */
1771 void
1772 sfmmu_kpm_vac_unload(page_t *pp, caddr_t vaddr)
1773 {
1774 	kpm_page_t	*kp;
1775 	kpm_hlk_t	*kpmp;
1776 	caddr_t		kpmvaddr = hat_kpm_page2va(pp, 1);
1777 	int		newcolor;
1778 	kmutex_t	*pmtx;
1779 	uint_t		vacunlcase;
1780 	int		badstate = 0;
1781 	kpm_spage_t	*ksp;
1782 	kpm_shlk_t	*kpmsp;
1783 
1784 	ASSERT(PAGE_LOCKED(pp));
1785 	ASSERT(sfmmu_mlist_held(pp));
1786 	ASSERT(!PP_ISNC(pp));
1787 
1788 	newcolor = addr_to_vcolor(kpmvaddr) != addr_to_vcolor(vaddr);
1789 	if (kpm_smallpages)
1790 		goto smallpages_vac_unload;
1791 
1792 	PP2KPMPG(pp, kp);
1793 	kpmp = KPMP_HASH(kp);
1794 	mutex_enter(&kpmp->khl_mutex);
1795 
1796 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
1797 		if (kp->kp_refcnta < 1) {
1798 			panic("sfmmu_kpm_vac_unload: bad refcnta kpm_page=%p\n",
1799 				(void *)kp);
1800 		}
1801 
1802 		if (PP_ISKPMC(pp) == 0) {
1803 			if (newcolor == 0)
1804 				goto exit;
1805 			sfmmu_kpm_demap_small(kpmvaddr);
1806 			pmtx = sfmmu_page_enter(pp);
1807 			PP_SETKPMC(pp);
1808 			sfmmu_page_exit(pmtx);
1809 
1810 		} else if (newcolor == 0) {
1811 			pmtx = sfmmu_page_enter(pp);
1812 			PP_CLRKPMC(pp);
1813 			sfmmu_page_exit(pmtx);
1814 
1815 		} else {
1816 			badstate++;
1817 		}
1818 
1819 		goto exit;
1820 	}
1821 
1822 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
1823 	if (kp->kp_refcntc == -1) {
1824 		/*
1825 		 * We should come here only if trap level tsb miss
1826 		 * handler is disabled.
1827 		 */
1828 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
1829 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
1830 	} else {
1831 		badstate |= (kp->kp_refcntc < 0);
1832 	}
1833 
1834 	if (badstate)
1835 		goto exit;
1836 
1837 	if (PP_ISKPMC(pp) == 0 && newcolor == 0) {
1838 		ASSERT(PP_ISKPMS(pp) == 0);
1839 		goto exit;
1840 	}
1841 
1842 	/*
1843 	 * Combine the per kpm_page and per page kpm VAC states
1844 	 * to a summary state in order to make the vac unload
1845 	 * handling more concise.
1846 	 */
1847 	vacunlcase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
1848 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
1849 			(PP_ISKPMC(pp) ? KPM_C : 0) |
1850 			(PP_ISKPMS(pp) ? KPM_S : 0));
1851 
1852 	switch (vacunlcase) {
1853 	case KPM_VUL_BIG:				/* - - - - */
1854 		/*
1855 		 * Have to breakup the large page mapping to be
1856 		 * able to handle the conflicting hme vaddr.
1857 		 */
1858 		if (kp->kp_refcntc == -1) {
1859 			/* remove go indication */
1860 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
1861 					&kpmp->khl_lock, KPMTSBM_STOP);
1862 		}
1863 		sfmmu_kpm_demap_large(kpmvaddr);
1864 
1865 		ASSERT(kp->kp_refcntc == 0);
1866 		kp->kp_refcntc++;
1867 		pmtx = sfmmu_page_enter(pp);
1868 		PP_SETKPMC(pp);
1869 		sfmmu_page_exit(pmtx);
1870 		break;
1871 
1872 	case KPM_VUL_UNMAP_SMALL1:			/* -  - ks s */
1873 	case KPM_VUL_UNMAP_SMALL2:			/* kc - ks s */
1874 		/*
1875 		 * New conflict w/ an active kpm page, actually mapped
1876 		 * in by small TSB/TLB entries. Remove the mapping and
1877 		 * update states.
1878 		 */
1879 		ASSERT(newcolor);
1880 		sfmmu_kpm_demap_small(kpmvaddr);
1881 		kp->kp_refcnts--;
1882 		kp->kp_refcnt++;
1883 		kp->kp_refcntc++;
1884 		pmtx = sfmmu_page_enter(pp);
1885 		PP_CLRKPMS(pp);
1886 		PP_SETKPMC(pp);
1887 		sfmmu_page_exit(pmtx);
1888 		break;
1889 
1890 	case KPM_VUL_CONFL_INCR1:			/* -  - ks - */
1891 	case KPM_VUL_CONFL_INCR2:			/* kc - -  - */
1892 	case KPM_VUL_CONFL_INCR3:			/* kc - ks - */
1893 		/*
1894 		 * New conflict on a active kpm mapped page not yet in
1895 		 * TSB/TLB. Mark page and increment the kpm_page conflict
1896 		 * count.
1897 		 */
1898 		ASSERT(newcolor);
1899 		kp->kp_refcntc++;
1900 		pmtx = sfmmu_page_enter(pp);
1901 		PP_SETKPMC(pp);
1902 		sfmmu_page_exit(pmtx);
1903 		break;
1904 
1905 	case KPM_VUL_CONFL_DECR1:			/* kc c -  - */
1906 	case KPM_VUL_CONFL_DECR2:			/* kc c ks - */
1907 		/*
1908 		 * A conflicting hme mapping is removed for an active
1909 		 * kpm page not yet in TSB/TLB. Unmark page and decrement
1910 		 * the kpm_page conflict count.
1911 		 */
1912 		ASSERT(newcolor == 0);
1913 		kp->kp_refcntc--;
1914 		pmtx = sfmmu_page_enter(pp);
1915 		PP_CLRKPMC(pp);
1916 		sfmmu_page_exit(pmtx);
1917 		break;
1918 
1919 	case KPM_VUL_TNC:				/* kc c ks s */
1920 		cmn_err(CE_NOTE, "sfmmu_kpm_vac_unload: "
1921 			"page not in NC state");
1922 		/* FALLTHRU */
1923 
1924 	default:
1925 		badstate++;
1926 	}
1927 exit:
1928 	if (badstate) {
1929 		panic("sfmmu_kpm_vac_unload: inconsistent VAC state, "
1930 			"kpmvaddr=%p kp=%p pp=%p",
1931 			(void *)kpmvaddr, (void *)kp, (void *)pp);
1932 	}
1933 	mutex_exit(&kpmp->khl_mutex);
1934 
1935 	return;
1936 
1937 smallpages_vac_unload:
1938 	if (newcolor == 0)
1939 		return;
1940 
1941 	PP2KPMSPG(pp, ksp);
1942 	kpmsp = KPMP_SHASH(ksp);
1943 
1944 	if (PP_ISKPMC(pp) == 0) {
1945 		if (ksp->kp_mapped == KPM_MAPPEDS) {
1946 			/*
1947 			 * Stop TL tsbmiss handling
1948 			 */
1949 			(void) sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
1950 					&kpmsp->kshl_lock, KPM_MAPPEDSC);
1951 
1952 			sfmmu_kpm_demap_small(kpmvaddr);
1953 
1954 		} else if (ksp->kp_mapped != KPM_MAPPEDSC) {
1955 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1956 		}
1957 
1958 		pmtx = sfmmu_page_enter(pp);
1959 		PP_SETKPMC(pp);
1960 		sfmmu_page_exit(pmtx);
1961 
1962 	} else {
1963 		if (ksp->kp_mapped != KPM_MAPPEDSC)
1964 			panic("sfmmu_kpm_vac_unload: inconsistent mapping");
1965 	}
1966 }
1967 
1968 /*
1969  * Page is marked to be in VAC conflict to an existing kpm mapping
1970  * or is kpm mapped using only the regular pagesize. Called from
1971  * sfmmu_hblk_unload when a mlist is completely removed.
1972  */
1973 void
1974 sfmmu_kpm_hme_unload(page_t *pp)
1975 {
1976 	/* tte assembly */
1977 	kpm_page_t	*kp;
1978 	kpm_hlk_t	*kpmp;
1979 	caddr_t		vaddr;
1980 	kmutex_t	*pmtx;
1981 	uint_t		flags;
1982 	kpm_spage_t	*ksp;
1983 
1984 	ASSERT(sfmmu_mlist_held(pp));
1985 	ASSERT(PP_ISMAPPED_KPM(pp));
1986 
1987 	flags = pp->p_nrm & (P_KPMC | P_KPMS);
1988 	if (kpm_smallpages)
1989 		goto smallpages_hme_unload;
1990 
1991 	if (flags == (P_KPMC | P_KPMS)) {
1992 		panic("sfmmu_kpm_hme_unload: page should be uncached");
1993 
1994 	} else if (flags == P_KPMS) {
1995 		/*
1996 		 * Page mapped small but not involved in VAC conflict
1997 		 */
1998 		return;
1999 	}
2000 
2001 	vaddr = hat_kpm_page2va(pp, 1);
2002 
2003 	PP2KPMPG(pp, kp);
2004 	kpmp = KPMP_HASH(kp);
2005 	mutex_enter(&kpmp->khl_mutex);
2006 
2007 	if (IS_KPM_ALIAS_RANGE(vaddr)) {
2008 		if (kp->kp_refcnta < 1) {
2009 			panic("sfmmu_kpm_hme_unload: bad refcnta kpm_page=%p\n",
2010 				(void *)kp);
2011 		}
2012 	} else {
2013 		if (kp->kp_refcntc < 1) {
2014 			panic("sfmmu_kpm_hme_unload: bad refcntc kpm_page=%p\n",
2015 				(void *)kp);
2016 		}
2017 		kp->kp_refcntc--;
2018 	}
2019 
2020 	pmtx = sfmmu_page_enter(pp);
2021 	PP_CLRKPMC(pp);
2022 	sfmmu_page_exit(pmtx);
2023 
2024 	mutex_exit(&kpmp->khl_mutex);
2025 	return;
2026 
2027 smallpages_hme_unload:
2028 	if (flags != P_KPMC)
2029 		panic("sfmmu_kpm_hme_unload: page should be uncached");
2030 
2031 	vaddr = hat_kpm_page2va(pp, 1);
2032 	PP2KPMSPG(pp, ksp);
2033 
2034 	if (ksp->kp_mapped != KPM_MAPPEDSC)
2035 		panic("sfmmu_kpm_hme_unload: inconsistent mapping");
2036 
2037 	/*
2038 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2039 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2040 	 * There we can start over again.
2041 	 */
2042 
2043 	pmtx = sfmmu_page_enter(pp);
2044 	PP_CLRKPMC(pp);
2045 	sfmmu_page_exit(pmtx);
2046 }
2047 
2048 /*
2049  * Special hooks for sfmmu_page_cache_array() when changing the
2050  * cacheability of a page. It is used to obey the hat_kpm lock
2051  * ordering (mlist -> kpmp -> spl, and back).
2052  */
2053 kpm_hlk_t *
2054 sfmmu_kpm_kpmp_enter(page_t *pp, pgcnt_t npages)
2055 {
2056 	kpm_page_t	*kp;
2057 	kpm_hlk_t	*kpmp;
2058 
2059 	ASSERT(sfmmu_mlist_held(pp));
2060 
2061 	if (kpm_smallpages || PP_ISMAPPED_KPM(pp) == 0)
2062 		return (NULL);
2063 
2064 	ASSERT(npages <= kpmpnpgs);
2065 
2066 	PP2KPMPG(pp, kp);
2067 	kpmp = KPMP_HASH(kp);
2068 	mutex_enter(&kpmp->khl_mutex);
2069 
2070 	return (kpmp);
2071 }
2072 
2073 void
2074 sfmmu_kpm_kpmp_exit(kpm_hlk_t *kpmp)
2075 {
2076 	if (kpm_smallpages || kpmp == NULL)
2077 		return;
2078 
2079 	mutex_exit(&kpmp->khl_mutex);
2080 }
2081 
2082 /*
2083  * Summary states used in sfmmu_kpm_page_cache (KPM_*).
2084  * See also more detailed comments within in the sfmmu_kpm_page_cache switch.
2085  * Abbreviations used:
2086  * UNC:     Input state for an uncache request.
2087  *   BIG:     Large page kpm mapping in use.
2088  *   SMALL:   Page has a small kpm mapping within a kpm_page range.
2089  *   NODEMAP: No demap needed.
2090  *   NOP:     No operation needed on this input state.
2091  * CACHE:   Input state for a re-cache request.
2092  *   MAPS:    Page is in TNC and kpm VAC conflict state and kpm mapped small.
2093  *   NOMAP:   Page is in TNC and kpm VAC conflict state, but not small kpm
2094  *            mapped.
2095  *   NOMAPO:  Page is in TNC and kpm VAC conflict state, but not small kpm
2096  *            mapped. There are also other small kpm mappings within this
2097  *            kpm_page.
2098  */
2099 #define	KPM_UNC_BIG		(0)
2100 #define	KPM_UNC_NODEMAP1	(KPM_KS)
2101 #define	KPM_UNC_SMALL1		(KPM_KS | KPM_S)
2102 #define	KPM_UNC_NODEMAP2	(KPM_KC)
2103 #define	KPM_UNC_NODEMAP3	(KPM_KC | KPM_KS)
2104 #define	KPM_UNC_SMALL2		(KPM_KC | KPM_KS | KPM_S)
2105 #define	KPM_UNC_NOP1		(KPM_KC | KPM_C)
2106 #define	KPM_UNC_NOP2		(KPM_KC | KPM_C | KPM_KS)
2107 #define	KPM_CACHE_NOMAP		(KPM_KC | KPM_C)
2108 #define	KPM_CACHE_NOMAPO	(KPM_KC | KPM_C | KPM_KS)
2109 #define	KPM_CACHE_MAPS		(KPM_KC | KPM_C | KPM_KS | KPM_S)
2110 
2111 /*
2112  * This function is called when the virtual cacheability of a page
2113  * is changed and the page has an actice kpm mapping. The mlist mutex,
2114  * the spl hash lock and the kpmp mutex (if needed) are already grabbed.
2115  */
2116 /*ARGSUSED2*/
2117 void
2118 sfmmu_kpm_page_cache(page_t *pp, int flags, int cache_flush_tag)
2119 {
2120 	kpm_page_t	*kp;
2121 	kpm_hlk_t	*kpmp;
2122 	caddr_t		kpmvaddr;
2123 	int		badstate = 0;
2124 	uint_t		pgcacase;
2125 	kpm_spage_t	*ksp;
2126 	kpm_shlk_t	*kpmsp;
2127 	int		oldval;
2128 
2129 	ASSERT(PP_ISMAPPED_KPM(pp));
2130 	ASSERT(sfmmu_mlist_held(pp));
2131 	ASSERT(sfmmu_page_spl_held(pp));
2132 
2133 	if (flags != HAT_TMPNC && flags != HAT_CACHE)
2134 		panic("sfmmu_kpm_page_cache: bad flags");
2135 
2136 	kpmvaddr = hat_kpm_page2va(pp, 1);
2137 
2138 	if (flags == HAT_TMPNC && cache_flush_tag == CACHE_FLUSH) {
2139 		pfn_t pfn = pp->p_pagenum;
2140 		int vcolor = addr_to_vcolor(kpmvaddr);
2141 		cpuset_t cpuset = cpu_ready_set;
2142 
2143 		/* Flush vcolor in DCache */
2144 		CPUSET_DEL(cpuset, CPU->cpu_id);
2145 		SFMMU_XCALL_STATS(ksfmmup);
2146 		xt_some(cpuset, vac_flushpage_tl1, pfn, vcolor);
2147 		vac_flushpage(pfn, vcolor);
2148 	}
2149 
2150 	if (kpm_smallpages)
2151 		goto smallpages_page_cache;
2152 
2153 	PP2KPMPG(pp, kp);
2154 	kpmp = KPMP_HASH(kp);
2155 	ASSERT(MUTEX_HELD(&kpmp->khl_mutex));
2156 
2157 	if (IS_KPM_ALIAS_RANGE(kpmvaddr)) {
2158 		if (kp->kp_refcnta < 1) {
2159 			panic("sfmmu_kpm_page_cache: bad refcnta "
2160 				"kpm_page=%p\n", (void *)kp);
2161 		}
2162 		sfmmu_kpm_demap_small(kpmvaddr);
2163 		if (flags == HAT_TMPNC) {
2164 			PP_SETKPMC(pp);
2165 			ASSERT(!PP_ISKPMS(pp));
2166 		} else {
2167 			ASSERT(PP_ISKPMC(pp));
2168 			PP_CLRKPMC(pp);
2169 		}
2170 		goto exit;
2171 	}
2172 
2173 	badstate = (kp->kp_refcnt < 0 || kp->kp_refcnts < 0);
2174 	if (kp->kp_refcntc == -1) {
2175 		/*
2176 		 * We should come here only if trap level tsb miss
2177 		 * handler is disabled.
2178 		 */
2179 		badstate |= (kp->kp_refcnt == 0 || kp->kp_refcnts > 0 ||
2180 			PP_ISKPMC(pp) || PP_ISKPMS(pp) || PP_ISNC(pp));
2181 	} else {
2182 		badstate |= (kp->kp_refcntc < 0);
2183 	}
2184 
2185 	if (badstate)
2186 		goto exit;
2187 
2188 	/*
2189 	 * Combine the per kpm_page and per page kpm VAC states to
2190 	 * a summary state in order to make the VAC cache/uncache
2191 	 * handling more concise.
2192 	 */
2193 	pgcacase = (((kp->kp_refcntc > 0) ? KPM_KC : 0) |
2194 			((kp->kp_refcnts > 0) ? KPM_KS : 0) |
2195 			(PP_ISKPMC(pp) ? KPM_C : 0) |
2196 			(PP_ISKPMS(pp) ? KPM_S : 0));
2197 
2198 	if (flags == HAT_CACHE) {
2199 		switch (pgcacase) {
2200 		case KPM_CACHE_MAPS:			/* kc c ks s */
2201 			sfmmu_kpm_demap_small(kpmvaddr);
2202 			if (kp->kp_refcnts < 1) {
2203 				panic("sfmmu_kpm_page_cache: bad refcnts "
2204 				"kpm_page=%p\n", (void *)kp);
2205 			}
2206 			kp->kp_refcnts--;
2207 			kp->kp_refcnt++;
2208 			PP_CLRKPMS(pp);
2209 			/* FALLTHRU */
2210 
2211 		case KPM_CACHE_NOMAP:			/* kc c -  - */
2212 		case KPM_CACHE_NOMAPO:			/* kc c ks - */
2213 			kp->kp_refcntc--;
2214 			PP_CLRKPMC(pp);
2215 			break;
2216 
2217 		default:
2218 			badstate++;
2219 		}
2220 		goto exit;
2221 	}
2222 
2223 	switch (pgcacase) {
2224 	case KPM_UNC_BIG:				/* - - - - */
2225 		if (kp->kp_refcnt < 1) {
2226 			panic("sfmmu_kpm_page_cache: bad refcnt "
2227 				"kpm_page=%p\n", (void *)kp);
2228 		}
2229 
2230 		/*
2231 		 * Have to breakup the large page mapping in preparation
2232 		 * to the upcoming TNC mode handled by small mappings.
2233 		 * The demap can already be done due to another conflict
2234 		 * within the kpm_page.
2235 		 */
2236 		if (kp->kp_refcntc == -1) {
2237 			/* remove go indication */
2238 			sfmmu_kpm_tsbmtl(&kp->kp_refcntc,
2239 				&kpmp->khl_lock, KPMTSBM_STOP);
2240 		}
2241 		ASSERT(kp->kp_refcntc == 0);
2242 		sfmmu_kpm_demap_large(kpmvaddr);
2243 		kp->kp_refcntc++;
2244 		PP_SETKPMC(pp);
2245 		break;
2246 
2247 	case KPM_UNC_SMALL1:				/* -  - ks s */
2248 	case KPM_UNC_SMALL2:				/* kc - ks s */
2249 		/*
2250 		 * Have to demap an already small kpm mapping in preparation
2251 		 * to the upcoming TNC mode. The demap can already be done
2252 		 * due to another conflict within the kpm_page.
2253 		 */
2254 		sfmmu_kpm_demap_small(kpmvaddr);
2255 		kp->kp_refcntc++;
2256 		kp->kp_refcnts--;
2257 		kp->kp_refcnt++;
2258 		PP_CLRKPMS(pp);
2259 		PP_SETKPMC(pp);
2260 		break;
2261 
2262 	case KPM_UNC_NODEMAP1:				/* -  - ks - */
2263 		/* fallthru */
2264 
2265 	case KPM_UNC_NODEMAP2:				/* kc - -  - */
2266 	case KPM_UNC_NODEMAP3:				/* kc - ks - */
2267 		kp->kp_refcntc++;
2268 		PP_SETKPMC(pp);
2269 		break;
2270 
2271 	case KPM_UNC_NOP1:				/* kc c -  - */
2272 	case KPM_UNC_NOP2:				/* kc c ks - */
2273 		break;
2274 
2275 	default:
2276 		badstate++;
2277 	}
2278 exit:
2279 	if (badstate) {
2280 		panic("sfmmu_kpm_page_cache: inconsistent VAC state "
2281 			"kpmvaddr=%p kp=%p pp=%p", (void *)kpmvaddr,
2282 			(void *)kp, (void *)pp);
2283 	}
2284 	return;
2285 
2286 smallpages_page_cache:
2287 	PP2KPMSPG(pp, ksp);
2288 	kpmsp = KPMP_SHASH(ksp);
2289 
2290 	oldval = sfmmu_kpm_stsbmtl(&ksp->kp_mapped,
2291 				&kpmsp->kshl_lock, KPM_MAPPEDSC);
2292 
2293 	if (!(oldval == KPM_MAPPEDS || oldval == KPM_MAPPEDSC))
2294 		panic("smallpages_page_cache: inconsistent mapping");
2295 
2296 	sfmmu_kpm_demap_small(kpmvaddr);
2297 
2298 	if (flags == HAT_TMPNC) {
2299 		PP_SETKPMC(pp);
2300 		ASSERT(!PP_ISKPMS(pp));
2301 
2302 	} else {
2303 		ASSERT(PP_ISKPMC(pp));
2304 		PP_CLRKPMC(pp);
2305 	}
2306 
2307 	/*
2308 	 * Keep KPM_MAPPEDSC until the next kpm tsbmiss where it
2309 	 * prevents TL tsbmiss handling and force a hat_kpm_fault.
2310 	 * There we can start over again.
2311 	 */
2312 }
2313