xref: /dragonfly/sys/platform/pc64/x86_64/amd64_mem.c (revision 37de577a)
1 /*-
2  * Copyright (c) 1999 Michael Smith <msmith@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/kernel.h>
29 #include <sys/systm.h>
30 #include <sys/malloc.h>
31 #include <sys/memrange.h>
32 #include <sys/sysctl.h>
33 #include <sys/thread.h>
34 
35 #include <vm/vm.h>
36 #include <vm/vm_param.h>
37 #include <vm/pmap.h>
38 
39 #include <sys/thread2.h>
40 
41 #include <machine/cputypes.h>
42 #include <machine/md_var.h>
43 #include <machine/specialreg.h>
44 #include <machine/smp.h>
45 
46 /*
47  * amd64 memory range operations
48  *
49  * This code will probably be impenetrable without reference to the
50  * Intel Pentium Pro documentation or x86-64 programmers manual vol 2.
51  */
52 
53 static char *mem_owner_bios = "BIOS";
54 
55 #define	MR686_FIXMTRR	(1<<0)
56 
57 #define	mrwithin(mr, a)							\
58 	(((a) >= (mr)->mr_base) && ((a) < ((mr)->mr_base + (mr)->mr_len)))
59 #define	mroverlap(mra, mrb)						\
60 	(mrwithin(mra, mrb->mr_base) || mrwithin(mrb, mra->mr_base))
61 
62 #define	mrvalid(base, len) 						\
63 	((!(base & ((1 << 12) - 1))) &&	/* base is multiple of 4k */	\
64 	    ((len) >= (1 << 12)) &&	/* length is >= 4k */		\
65 	    powerof2((len)) &&		/* ... and power of two */	\
66 	    !((base) & ((len) - 1)))	/* range is not discontiuous */
67 
68 #define	mrcopyflags(curr, new)						\
69 	(((curr) & ~MDF_ATTRMASK) | ((new) & MDF_ATTRMASK))
70 
71 static int mtrrs_disabled;
72 TUNABLE_INT("machdep.disable_mtrrs", &mtrrs_disabled);
73 SYSCTL_INT(_machdep, OID_AUTO, disable_mtrrs, CTLFLAG_RD,
74     &mtrrs_disabled, 0, "Disable amd64 MTRRs.");
75 
76 static void	amd64_mrinit(struct mem_range_softc *sc);
77 static int	amd64_mrset(struct mem_range_softc *sc,
78 		    struct mem_range_desc *mrd, int *arg);
79 static void	amd64_mrAPinit(struct mem_range_softc *sc);
80 static void	amd64_mrreinit(struct mem_range_softc *sc);
81 
82 static struct mem_range_ops amd64_mrops = {
83 	amd64_mrinit,
84 	amd64_mrset,
85 	amd64_mrAPinit,
86 	amd64_mrreinit
87 };
88 
89 /* XXX for AP startup hook */
90 static u_int64_t mtrrcap, mtrrdef;
91 
92 /* The bitmask for the PhysBase and PhysMask fields of the variable MTRRs. */
93 static u_int64_t mtrr_physmask;
94 
95 static struct mem_range_desc *mem_range_match(struct mem_range_softc *sc,
96 		    struct mem_range_desc *mrd);
97 static void	amd64_mrfetch(struct mem_range_softc *sc);
98 static int	amd64_mtrrtype(int flags);
99 static int	amd64_mrt2mtrr(int flags, int oldval);
100 static int	amd64_mtrrconflict(int flag1, int flag2);
101 static void	amd64_mrstore(struct mem_range_softc *sc);
102 static void	amd64_mrstoreone(void *arg);
103 static struct mem_range_desc *amd64_mtrrfixsearch(struct mem_range_softc *sc,
104 		    u_int64_t addr);
105 static int	amd64_mrsetlow(struct mem_range_softc *sc,
106 		    struct mem_range_desc *mrd, int *arg);
107 static int	amd64_mrsetvariable(struct mem_range_softc *sc,
108 		    struct mem_range_desc *mrd, int *arg);
109 
110 /* amd64 MTRR type to memory range type conversion */
111 static int amd64_mtrrtomrt[] = {
112 	MDF_UNCACHEABLE,
113 	MDF_WRITECOMBINE,
114 	MDF_UNKNOWN,
115 	MDF_UNKNOWN,
116 	MDF_WRITETHROUGH,
117 	MDF_WRITEPROTECT,
118 	MDF_WRITEBACK
119 };
120 
121 #define	MTRRTOMRTLEN NELEM(amd64_mtrrtomrt)
122 
123 static int
124 amd64_mtrr2mrt(int val)
125 {
126 
127 	if (val < 0 || val >= MTRRTOMRTLEN)
128 		return (MDF_UNKNOWN);
129 	return (amd64_mtrrtomrt[val]);
130 }
131 
132 /*
133  * amd64 MTRR conflicts. Writeback and uncachable may overlap.
134  */
135 static int
136 amd64_mtrrconflict(int flag1, int flag2)
137 {
138 
139 	flag1 &= MDF_ATTRMASK;
140 	flag2 &= MDF_ATTRMASK;
141 	if ((flag1 & MDF_UNKNOWN) || (flag2 & MDF_UNKNOWN))
142 		return (1);
143 	if (flag1 == flag2 ||
144 	    (flag1 == MDF_WRITEBACK && flag2 == MDF_UNCACHEABLE) ||
145 	    (flag2 == MDF_WRITEBACK && flag1 == MDF_UNCACHEABLE))
146 		return (0);
147 	return (1);
148 }
149 
150 /*
151  * Look for an exactly-matching range.
152  */
153 static struct mem_range_desc *
154 mem_range_match(struct mem_range_softc *sc, struct mem_range_desc *mrd)
155 {
156 	struct mem_range_desc *cand;
157 	int i;
158 
159 	for (i = 0, cand = sc->mr_desc; i < sc->mr_ndesc; i++, cand++)
160 		if ((cand->mr_base == mrd->mr_base) &&
161 		    (cand->mr_len == mrd->mr_len))
162 			return (cand);
163 	return (NULL);
164 }
165 
166 /*
167  * Fetch the current mtrr settings from the current CPU (assumed to
168  * all be in sync in the SMP case).  Note that if we are here, we
169  * assume that MTRRs are enabled, and we may or may not have fixed
170  * MTRRs.
171  */
172 static void
173 amd64_mrfetch(struct mem_range_softc *sc)
174 {
175 	struct mem_range_desc *mrd;
176 	u_int64_t msrv;
177 	int i, j, msr;
178 
179 	mrd = sc->mr_desc;
180 
181 	/* Get fixed-range MTRRs. */
182 	if (sc->mr_cap & MR686_FIXMTRR) {
183 		msr = MSR_MTRR64kBase;
184 		for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
185 			msrv = rdmsr(msr);
186 			for (j = 0; j < 8; j++, mrd++) {
187 				mrd->mr_flags =
188 				    (mrd->mr_flags & ~MDF_ATTRMASK) |
189 				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
190 				if (mrd->mr_owner[0] == 0)
191 					strcpy(mrd->mr_owner, mem_owner_bios);
192 				msrv = msrv >> 8;
193 			}
194 		}
195 		msr = MSR_MTRR16kBase;
196 		for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
197 			msrv = rdmsr(msr);
198 			for (j = 0; j < 8; j++, mrd++) {
199 				mrd->mr_flags =
200 				    (mrd->mr_flags & ~MDF_ATTRMASK) |
201 				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
202 				if (mrd->mr_owner[0] == 0)
203 					strcpy(mrd->mr_owner, mem_owner_bios);
204 				msrv = msrv >> 8;
205 			}
206 		}
207 		msr = MSR_MTRR4kBase;
208 		for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
209 			msrv = rdmsr(msr);
210 			for (j = 0; j < 8; j++, mrd++) {
211 				mrd->mr_flags =
212 				    (mrd->mr_flags & ~MDF_ATTRMASK) |
213 				    amd64_mtrr2mrt(msrv & 0xff) | MDF_ACTIVE;
214 				if (mrd->mr_owner[0] == 0)
215 					strcpy(mrd->mr_owner, mem_owner_bios);
216 				msrv = msrv >> 8;
217 			}
218 		}
219 	}
220 
221 	/* Get remainder which must be variable MTRRs. */
222 	msr = MSR_MTRRVarBase;
223 	for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
224 		msrv = rdmsr(msr);
225 		mrd->mr_flags = (mrd->mr_flags & ~MDF_ATTRMASK) |
226 		    amd64_mtrr2mrt(msrv & MTRR_PHYSBASE_TYPE);
227 		mrd->mr_base = msrv & mtrr_physmask;
228 		msrv = rdmsr(msr + 1);
229 		mrd->mr_flags = (msrv & MTRR_PHYSMASK_VALID) ?
230 		    (mrd->mr_flags | MDF_ACTIVE) :
231 		    (mrd->mr_flags & ~MDF_ACTIVE);
232 
233 		/* Compute the range from the mask. Ick. */
234 		mrd->mr_len = (~(msrv & mtrr_physmask) &
235 		    (mtrr_physmask | 0xfffL)) + 1;
236 		if (!mrvalid(mrd->mr_base, mrd->mr_len))
237 			mrd->mr_flags |= MDF_BOGUS;
238 
239 		/* If unclaimed and active, must be the BIOS. */
240 		if ((mrd->mr_flags & MDF_ACTIVE) && (mrd->mr_owner[0] == 0))
241 			strcpy(mrd->mr_owner, mem_owner_bios);
242 	}
243 }
244 
245 /*
246  * Return the MTRR memory type matching a region's flags
247  */
248 static int
249 amd64_mtrrtype(int flags)
250 {
251 	int i;
252 
253 	flags &= MDF_ATTRMASK;
254 
255 	for (i = 0; i < MTRRTOMRTLEN; i++) {
256 		if (amd64_mtrrtomrt[i] == MDF_UNKNOWN)
257 			continue;
258 		if (flags == amd64_mtrrtomrt[i])
259 			return (i);
260 	}
261 	return (-1);
262 }
263 
264 static int
265 amd64_mrt2mtrr(int flags, int oldval)
266 {
267 	int val;
268 
269 	if ((val = amd64_mtrrtype(flags)) == -1)
270 		return (oldval & 0xff);
271 	return (val & 0xff);
272 }
273 
274 /*
275  * Update running CPU(s) MTRRs to match the ranges in the descriptor
276  * list.
277  *
278  * XXX Must be called with interrupts enabled.
279  */
280 static void
281 amd64_mrstore(struct mem_range_softc *sc)
282 {
283 	/*
284 	 * We should use ipi_all_but_self() to call other CPUs into a
285 	 * locking gate, then call a target function to do this work.
286 	 * The "proper" solution involves a generalised locking gate
287 	 * implementation, not ready yet.
288 	 */
289 	lwkt_send_ipiq_mask(smp_active_mask, (void *)amd64_mrstoreone, sc);
290 }
291 
292 /*
293  * Update the current CPU's MTRRs with those represented in the
294  * descriptor list.  Note that we do this wholesale rather than just
295  * stuffing one entry; this is simpler (but slower, of course).
296  */
297 static void
298 amd64_mrstoreone(void *arg)
299 {
300 	struct mem_range_softc *sc = arg;
301 	struct mem_range_desc *mrd;
302 	u_int64_t omsrv, msrv;
303 	int i, j, msr;
304 	u_long cr0, cr4;
305 
306 	mrd = sc->mr_desc;
307 
308 	crit_enter();
309 
310 	/* Disable PGE. */
311 	cr4 = rcr4();
312 	load_cr4(cr4 & ~CR4_PGE);
313 
314 	/* Disable caches (CD = 1, NW = 0). */
315 	cr0 = rcr0();
316 	load_cr0((cr0 & ~CR0_NW) | CR0_CD);
317 
318 	/* Flushes caches and TLBs. */
319 	wbinvd();
320 	cpu_invltlb();
321 
322 	/* Disable MTRRs (E = 0). */
323 	wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) & ~MTRR_DEF_ENABLE);
324 
325 	/* Set fixed-range MTRRs. */
326 	if (sc->mr_cap & MR686_FIXMTRR) {
327 		msr = MSR_MTRR64kBase;
328 		for (i = 0; i < (MTRR_N64K / 8); i++, msr++) {
329 			msrv = 0;
330 			omsrv = rdmsr(msr);
331 			for (j = 7; j >= 0; j--) {
332 				msrv = msrv << 8;
333 				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
334 				    omsrv >> (j * 8));
335 			}
336 			wrmsr(msr, msrv);
337 			mrd += 8;
338 		}
339 		msr = MSR_MTRR16kBase;
340 		for (i = 0; i < (MTRR_N16K / 8); i++, msr++) {
341 			msrv = 0;
342 			omsrv = rdmsr(msr);
343 			for (j = 7; j >= 0; j--) {
344 				msrv = msrv << 8;
345 				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
346 				    omsrv >> (j * 8));
347 			}
348 			wrmsr(msr, msrv);
349 			mrd += 8;
350 		}
351 		msr = MSR_MTRR4kBase;
352 		for (i = 0; i < (MTRR_N4K / 8); i++, msr++) {
353 			msrv = 0;
354 			omsrv = rdmsr(msr);
355 			for (j = 7; j >= 0; j--) {
356 				msrv = msrv << 8;
357 				msrv |= amd64_mrt2mtrr((mrd + j)->mr_flags,
358 				    omsrv >> (j * 8));
359 			}
360 			wrmsr(msr, msrv);
361 			mrd += 8;
362 		}
363 	}
364 
365 	/* Set remainder which must be variable MTRRs. */
366 	msr = MSR_MTRRVarBase;
367 	for (; (mrd - sc->mr_desc) < sc->mr_ndesc; msr += 2, mrd++) {
368 		/* base/type register */
369 		omsrv = rdmsr(msr);
370 		if (mrd->mr_flags & MDF_ACTIVE) {
371 			msrv = mrd->mr_base & mtrr_physmask;
372 			msrv |= amd64_mrt2mtrr(mrd->mr_flags, omsrv);
373 		} else {
374 			msrv = 0;
375 		}
376 		wrmsr(msr, msrv);
377 
378 		/* mask/active register */
379 		if (mrd->mr_flags & MDF_ACTIVE) {
380 			msrv = MTRR_PHYSMASK_VALID |
381 			    rounddown2(mtrr_physmask, mrd->mr_len);
382 		} else {
383 			msrv = 0;
384 		}
385 		wrmsr(msr + 1, msrv);
386 	}
387 
388 	/* Flush caches and TLBs. */
389 	wbinvd();
390 	cpu_invltlb();
391 
392 	/* Enable MTRRs. */
393 	wrmsr(MSR_MTRRdefType, rdmsr(MSR_MTRRdefType) | MTRR_DEF_ENABLE);
394 
395 	/* Restore caches and PGE. */
396 	load_cr0(cr0);
397 	load_cr4(cr4);
398 
399 	crit_exit();
400 }
401 
402 /*
403  * Hunt for the fixed MTRR referencing (addr)
404  */
405 static struct mem_range_desc *
406 amd64_mtrrfixsearch(struct mem_range_softc *sc, u_int64_t addr)
407 {
408 	struct mem_range_desc *mrd;
409 	int i;
410 
411 	for (i = 0, mrd = sc->mr_desc; i < (MTRR_N64K + MTRR_N16K + MTRR_N4K);
412 	     i++, mrd++)
413 		if ((addr >= mrd->mr_base) &&
414 		    (addr < (mrd->mr_base + mrd->mr_len)))
415 			return (mrd);
416 	return (NULL);
417 }
418 
419 /*
420  * Try to satisfy the given range request by manipulating the fixed
421  * MTRRs that cover low memory.
422  *
423  * Note that we try to be generous here; we'll bloat the range out to
424  * the next higher/lower boundary to avoid the consumer having to know
425  * too much about the mechanisms here.
426  *
427  * XXX note that this will have to be updated when we start supporting
428  * "busy" ranges.
429  */
430 static int
431 amd64_mrsetlow(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
432 {
433 	struct mem_range_desc *first_md, *last_md, *curr_md;
434 
435 	/* Range check. */
436 	if (((first_md = amd64_mtrrfixsearch(sc, mrd->mr_base)) == NULL) ||
437 	    ((last_md = amd64_mtrrfixsearch(sc, mrd->mr_base + mrd->mr_len - 1)) == NULL))
438 		return (EINVAL);
439 
440 	/* Check that we aren't doing something risky. */
441 	if (!(mrd->mr_flags & MDF_FORCE))
442 		for (curr_md = first_md; curr_md <= last_md; curr_md++) {
443 			if ((curr_md->mr_flags & MDF_ATTRMASK) == MDF_UNKNOWN)
444 				return (EACCES);
445 		}
446 
447 	/* Set flags, clear set-by-firmware flag. */
448 	for (curr_md = first_md; curr_md <= last_md; curr_md++) {
449 		curr_md->mr_flags = mrcopyflags(curr_md->mr_flags &
450 		    ~MDF_FIRMWARE, mrd->mr_flags);
451 		bcopy(mrd->mr_owner, curr_md->mr_owner, sizeof(mrd->mr_owner));
452 	}
453 
454 	return (0);
455 }
456 
457 /*
458  * Modify/add a variable MTRR to satisfy the request.
459  *
460  * XXX needs to be updated to properly support "busy" ranges.
461  */
462 static int
463 amd64_mrsetvariable(struct mem_range_softc *sc, struct mem_range_desc *mrd,
464     int *arg)
465 {
466 	struct mem_range_desc *curr_md, *free_md;
467 	int i;
468 
469 	/*
470 	 * Scan the currently active variable descriptors, look for
471 	 * one we exactly match (straight takeover) and for possible
472 	 * accidental overlaps.
473 	 *
474 	 * Keep track of the first empty variable descriptor in case
475 	 * we can't perform a takeover.
476 	 */
477 	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
478 	curr_md = sc->mr_desc + i;
479 	free_md = NULL;
480 	for (; i < sc->mr_ndesc; i++, curr_md++) {
481 		if (curr_md->mr_flags & MDF_ACTIVE) {
482 			/* Exact match? */
483 			if ((curr_md->mr_base == mrd->mr_base) &&
484 			    (curr_md->mr_len == mrd->mr_len)) {
485 
486 				/* Whoops, owned by someone. */
487 				if (curr_md->mr_flags & MDF_BUSY)
488 					return (EBUSY);
489 
490 				/* Check that we aren't doing something risky */
491 				if (!(mrd->mr_flags & MDF_FORCE) &&
492 				    ((curr_md->mr_flags & MDF_ATTRMASK) ==
493 				    MDF_UNKNOWN))
494 					return (EACCES);
495 
496 				/* Ok, just hijack this entry. */
497 				free_md = curr_md;
498 				break;
499 			}
500 
501 			/* Non-exact overlap? */
502 			if (mroverlap(curr_md, mrd)) {
503 				/* Between conflicting region types? */
504 				if (amd64_mtrrconflict(curr_md->mr_flags,
505 				    mrd->mr_flags))
506 					return (EINVAL);
507 			}
508 		} else if (free_md == NULL) {
509 			free_md = curr_md;
510 		}
511 	}
512 
513 	/* Got somewhere to put it? */
514 	if (free_md == NULL)
515 		return (ENOSPC);
516 
517 	/* Set up new descriptor. */
518 	free_md->mr_base = mrd->mr_base;
519 	free_md->mr_len = mrd->mr_len;
520 	free_md->mr_flags = mrcopyflags(MDF_ACTIVE, mrd->mr_flags);
521 	bcopy(mrd->mr_owner, free_md->mr_owner, sizeof(mrd->mr_owner));
522 	return (0);
523 }
524 
525 /*
526  * Handle requests to set memory range attributes by manipulating MTRRs.
527  */
528 static int
529 amd64_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg)
530 {
531 	struct mem_range_desc *targ;
532 	int error;
533 
534 	switch (*arg) {
535 	case MEMRANGE_SET_UPDATE:
536 		/*
537 		 * Make sure that what's being asked for is even
538 		 * possible at all.
539 		 */
540 		if (!mrvalid(mrd->mr_base, mrd->mr_len) ||
541 		    amd64_mtrrtype(mrd->mr_flags) == -1)
542 			return (EINVAL);
543 
544 #define	FIXTOP	((MTRR_N64K * 0x10000) + (MTRR_N16K * 0x4000) + (MTRR_N4K * 0x1000))
545 
546 		/* Are the "low memory" conditions applicable? */
547 		if ((sc->mr_cap & MR686_FIXMTRR) &&
548 		    ((mrd->mr_base + mrd->mr_len) <= FIXTOP)) {
549 			if ((error = amd64_mrsetlow(sc, mrd, arg)) != 0)
550 				return (error);
551 		} else {
552 			/* It's time to play with variable MTRRs. */
553 			if ((error = amd64_mrsetvariable(sc, mrd, arg)) != 0)
554 				return (error);
555 		}
556 		break;
557 
558 	case MEMRANGE_SET_REMOVE:
559 		if ((targ = mem_range_match(sc, mrd)) == NULL)
560 			return (ENOENT);
561 		if (targ->mr_flags & MDF_FIXACTIVE)
562 			return (EPERM);
563 		if (targ->mr_flags & MDF_BUSY)
564 			return (EBUSY);
565 		targ->mr_flags &= ~MDF_ACTIVE;
566 		targ->mr_owner[0] = 0;
567 		break;
568 
569 	default:
570 		return (EOPNOTSUPP);
571 	}
572 
573 #if 0
574 	/* XXX */
575 	/*
576 	 * Ensure that the direct map region does not contain any mappings
577 	 * that span MTRRs of different types.  However, the fixed MTRRs can
578 	 * be ignored, because a large page mapping the first 1 MB of physical
579 	 * memory is a special case that the processor handles.  The entire
580 	 * TLB will be invalidated by amd64_mrstore(), so pmap_demote_DMAP()
581 	 * needn't do it.
582 	 */
583 	int i;
584 
585 	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
586 	mrd = sc->mr_desc + i;
587 	for (; i < sc->mr_ndesc; i++, mrd++) {
588 		if ((mrd->mr_flags & (MDF_ACTIVE | MDF_BOGUS)) == MDF_ACTIVE)
589 			pmap_demote_DMAP(mrd->mr_base, mrd->mr_len, FALSE);
590 	}
591 #endif
592 
593 	/* Update the hardware. */
594 	amd64_mrstore(sc);
595 
596 	/* Refetch to see where we're at. */
597 	amd64_mrfetch(sc);
598 	return (0);
599 }
600 
601 /*
602  * Work out how many ranges we support, initialise storage for them,
603  * and fetch the initial settings.
604  */
605 static void
606 amd64_mrinit(struct mem_range_softc *sc)
607 {
608 	struct mem_range_desc *mrd;
609 	u_int regs[4];
610 	int i, nmdesc = 0, pabits;
611 
612 	mtrrcap = rdmsr(MSR_MTRRcap);
613 	mtrrdef = rdmsr(MSR_MTRRdefType);
614 
615 	/* For now, bail out if MTRRs are not enabled. */
616 	if (!(mtrrdef & MTRR_DEF_ENABLE)) {
617 		if (bootverbose)
618 			kprintf("CPU supports MTRRs but not enabled\n");
619 		return;
620 	}
621 	nmdesc = mtrrcap & MTRR_CAP_VCNT;
622 
623 	/*
624 	 * Determine the size of the PhysMask and PhysBase fields in
625 	 * the variable range MTRRs.  If the extended CPUID 0x80000008
626 	 * is present, use that to figure out how many physical
627 	 * address bits the CPU supports.  Otherwise, default to 36
628 	 * address bits.
629 	 */
630 	if (cpu_exthigh >= 0x80000008) {
631 		do_cpuid(0x80000008, regs);
632 		pabits = regs[0] & 0xff;
633 	} else
634 		pabits = 36;
635 	mtrr_physmask = ((1UL << pabits) - 1) & ~0xfffUL;
636 
637 	/* If fixed MTRRs supported and enabled. */
638 	if ((mtrrcap & MTRR_CAP_FIXED) && (mtrrdef & MTRR_DEF_FIXED_ENABLE)) {
639 		sc->mr_cap = MR686_FIXMTRR;
640 		nmdesc += MTRR_N64K + MTRR_N16K + MTRR_N4K;
641 	}
642 
643 	sc->mr_desc = kmalloc(nmdesc * sizeof(struct mem_range_desc),
644 			      M_MEMDESC, M_WAITOK | M_ZERO);
645 	sc->mr_ndesc = nmdesc;
646 
647 	mrd = sc->mr_desc;
648 
649 	/* Populate the fixed MTRR entries' base/length. */
650 	if (sc->mr_cap & MR686_FIXMTRR) {
651 		for (i = 0; i < MTRR_N64K; i++, mrd++) {
652 			mrd->mr_base = i * 0x10000;
653 			mrd->mr_len = 0x10000;
654 			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
655 			    MDF_FIXACTIVE;
656 		}
657 		for (i = 0; i < MTRR_N16K; i++, mrd++) {
658 			mrd->mr_base = i * 0x4000 + 0x80000;
659 			mrd->mr_len = 0x4000;
660 			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
661 			    MDF_FIXACTIVE;
662 		}
663 		for (i = 0; i < MTRR_N4K; i++, mrd++) {
664 			mrd->mr_base = i * 0x1000 + 0xc0000;
665 			mrd->mr_len = 0x1000;
666 			mrd->mr_flags = MDF_FIXBASE | MDF_FIXLEN |
667 			    MDF_FIXACTIVE;
668 		}
669 	}
670 
671 	/*
672 	 * Get current settings, anything set now is considered to
673 	 * have been set by the firmware. (XXX has something already
674 	 * played here?)
675 	 */
676 	amd64_mrfetch(sc);
677 	mrd = sc->mr_desc;
678 	for (i = 0; i < sc->mr_ndesc; i++, mrd++) {
679 		if (mrd->mr_flags & MDF_ACTIVE)
680 			mrd->mr_flags |= MDF_FIRMWARE;
681 	}
682 
683 #if 0
684 	/*
685 	 * Ensure that the direct map region does not contain any mappings
686 	 * that span MTRRs of different types.  However, the fixed MTRRs can
687 	 * be ignored, because a large page mapping the first 1 MB of physical
688 	 * memory is a special case that the processor handles.  Invalidate
689 	 * any old TLB entries that might hold inconsistent memory type
690 	 * information.
691 	 */
692 	i = (sc->mr_cap & MR686_FIXMTRR) ? MTRR_N64K + MTRR_N16K + MTRR_N4K : 0;
693 	mrd = sc->mr_desc + i;
694 	for (; i < sc->mr_ndesc; i++, mrd++) {
695 		if ((mrd->mr_flags & (MDF_ACTIVE | MDF_BOGUS)) == MDF_ACTIVE)
696 			pmap_demote_DMAP(mrd->mr_base, mrd->mr_len, TRUE);
697 	}
698 #endif
699 }
700 
701 /*
702  * Initialise MTRRs on an AP after the BSP has run the init code.
703  */
704 static void
705 amd64_mrAPinit(struct mem_range_softc *sc)
706 {
707 	amd64_mrstoreone(sc);
708 	wrmsr(MSR_MTRRdefType, mtrrdef);
709 }
710 
711 /*
712  * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor
713  * list.
714  *
715  * XXX Must be called with interrupts enabled.
716  */
717 static void
718 amd64_mrreinit(struct mem_range_softc *sc)
719 {
720 	/*
721 	 * We should use ipi_all_but_self() to call other CPUs into a
722 	 * locking gate, then call a target function to do this work.
723 	 * The "proper" solution involves a generalised locking gate
724 	 * implementation, not ready yet.
725 	 */
726 	lwkt_send_ipiq_mask(smp_active_mask, (void *)amd64_mrAPinit, sc);
727 }
728 
729 static void
730 amd64_mem_drvinit(void *unused)
731 {
732 
733 	if (mtrrs_disabled)
734 		return;
735 	if (!(cpu_feature & CPUID_MTRR))
736 		return;
737 	if ((cpu_id & 0xf00) != 0x600 && (cpu_id & 0xf00) != 0xf00)
738 		return;
739 	switch (cpu_vendor_id) {
740 	case CPU_VENDOR_INTEL:
741 	case CPU_VENDOR_AMD:
742 	case CPU_VENDOR_CENTAUR:
743 		break;
744 	default:
745 		return;
746 	}
747 	mem_range_softc.mr_op = &amd64_mrops;
748 }
749 SYSINIT(amd64memdev, SI_SUB_DRIVERS, SI_ORDER_FIRST, amd64_mem_drvinit, NULL);
750