xref: /illumos-gate/usr/src/uts/i86pc/os/memnode.c (revision d362b749)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
30 #include <sys/bootconf.h>
31 #include <sys/atomic.h>
32 #include <sys/lgrp.h>
33 #include <sys/memlist.h>
34 #include <sys/memnode.h>
35 #include <sys/platform_module.h>
36 #include <vm/vm_dep.h>
37 
38 int	max_mem_nodes = 1;
39 
40 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
41 int mem_node_pfn_shift;
42 /*
43  * num_memnodes should be updated atomically and always >=
44  * the number of bits in memnodes_mask or the algorithm may fail.
45  */
46 uint16_t num_memnodes;
47 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
48 
49 /*
50  * If set, mem_node_physalign should be a power of two, and
51  * should reflect the minimum address alignment of each node.
52  */
53 uint64_t mem_node_physalign;
54 
55 /*
56  * Platform hooks we will need.
57  */
58 
59 #pragma weak plat_build_mem_nodes
60 #pragma weak plat_slice_add
61 #pragma weak plat_slice_del
62 
63 /*
64  * Adjust the memnode config after a DR operation.
65  *
66  * It is rather tricky to do these updates since we can't
67  * protect the memnode structures with locks, so we must
68  * be mindful of the order in which updates and reads to
69  * these values can occur.
70  */
71 
72 void
73 mem_node_add_slice(pfn_t start, pfn_t end)
74 {
75 	int mnode;
76 	mnodeset_t newmask, oldmask;
77 
78 	/*
79 	 * DR will pass us the first pfn that is allocatable.
80 	 * We need to round down to get the real start of
81 	 * the slice.
82 	 */
83 	if (mem_node_physalign) {
84 		start &= ~(btop(mem_node_physalign) - 1);
85 		end = roundup(end, btop(mem_node_physalign)) - 1;
86 	}
87 
88 	if (&plat_slice_add)
89 		plat_slice_add(start, end);
90 
91 	mnode = PFN_2_MEM_NODE(start);
92 	ASSERT(mnode < max_mem_nodes);
93 
94 	if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
95 		/*
96 		 * Add slice to existing node.
97 		 */
98 		if (start < mem_node_config[mnode].physbase)
99 			mem_node_config[mnode].physbase = start;
100 		if (end > mem_node_config[mnode].physmax)
101 			mem_node_config[mnode].physmax = end;
102 	} else {
103 		mem_node_config[mnode].physbase = start;
104 		mem_node_config[mnode].physmax = end;
105 		atomic_add_16(&num_memnodes, 1);
106 		do {
107 			oldmask = memnodes_mask;
108 			newmask = memnodes_mask | (1ull << mnode);
109 		} while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
110 	}
111 
112 	/*
113 	 * Inform the common lgrp framework about the new memory
114 	 */
115 	lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
116 }
117 
118 /* ARGSUSED */
119 void
120 mem_node_pre_del_slice(pfn_t start, pfn_t end)
121 {
122 	int mnode = PFN_2_MEM_NODE(start);
123 
124 	ASSERT(mnode < max_mem_nodes);
125 	ASSERT(mem_node_config[mnode].exists == 1);
126 }
127 
128 /*
129  * Remove a PFN range from a memnode.  On some platforms,
130  * the memnode will be created with physbase at the first
131  * allocatable PFN, but later deleted with the MC slice
132  * base address converted to a PFN, in which case we need
133  * to assume physbase and up.
134  */
135 void
136 mem_node_post_del_slice(pfn_t start, pfn_t end, int cancelled)
137 {
138 	int mnode;
139 	pgcnt_t delta_pgcnt, node_size;
140 	mnodeset_t omask, nmask;
141 
142 	if (mem_node_physalign) {
143 		start &= ~(btop(mem_node_physalign) - 1);
144 		end = roundup(end, btop(mem_node_physalign)) - 1;
145 	}
146 	mnode = PFN_2_MEM_NODE(start);
147 
148 	ASSERT(mnode < max_mem_nodes);
149 	ASSERT(mem_node_config[mnode].exists == 1);
150 
151 	if (!cancelled) {
152 		delta_pgcnt = end - start;
153 		node_size = mem_node_config[mnode].physmax -
154 		    mem_node_config[mnode].physbase;
155 
156 		if (node_size > delta_pgcnt) {
157 			/*
158 			 * Subtract the slice from the memnode.
159 			 */
160 			if (start <= mem_node_config[mnode].physbase)
161 				mem_node_config[mnode].physbase = end + 1;
162 			ASSERT(end <= mem_node_config[mnode].physmax);
163 			if (end == mem_node_config[mnode].physmax)
164 				mem_node_config[mnode].physmax = start - 1;
165 		} else {
166 			/*
167 			 * Let the common lgrp framework know this mnode is
168 			 * leaving
169 			 */
170 			lgrp_config(LGRP_CONFIG_MEM_DEL,
171 			    mnode, MEM_NODE_2_LGRPHAND(mnode));
172 
173 			/*
174 			 * Delete the whole node.
175 			 */
176 			ASSERT(MNODE_PGCNT(mnode) == 0);
177 			do {
178 				omask = memnodes_mask;
179 				nmask = omask & ~(1ull << mnode);
180 			} while (cas64(&memnodes_mask, omask, nmask) != omask);
181 			atomic_add_16(&num_memnodes, -1);
182 			mem_node_config[mnode].exists = 0;
183 		}
184 
185 		if (&plat_slice_del)
186 			plat_slice_del(start, end);
187 	}
188 }
189 
190 void
191 startup_build_mem_nodes(struct memlist *list)
192 {
193 	pfn_t	start, end;
194 
195 	/* LINTED: ASSERT will always true or false */
196 	ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
197 
198 	if (&plat_build_mem_nodes) {
199 		plat_build_mem_nodes(list);
200 	} else {
201 		/*
202 		 * Boot install lists are arranged <addr, len>, ...
203 		 */
204 		while (list) {
205 			start = list->address >> PAGESHIFT;
206 			if (start > physmax)
207 				continue;
208 			end = (list->address + list->size - 1) >> PAGESHIFT;
209 			if (end > physmax)
210 				end = physmax;
211 			mem_node_add_slice(start, end);
212 			list = list->next;
213 		}
214 		mem_node_physalign = 0;
215 		mem_node_pfn_shift = 0;
216 	}
217 }
218 
219 /*
220  * Allocate an unassigned memnode.
221  */
222 int
223 mem_node_alloc()
224 {
225 	int mnode;
226 	mnodeset_t newmask, oldmask;
227 
228 	/*
229 	 * Find an unused memnode.  Update it atomically to prevent
230 	 * a first time memnode creation race.
231 	 */
232 	for (mnode = 0; mnode < max_mem_nodes; mnode++)
233 		if (cas32((uint32_t *)&mem_node_config[mnode].exists,
234 		    0, 1) == 0)
235 			break;
236 
237 	if (mnode >= max_mem_nodes)
238 		panic("Out of free memnodes\n");
239 
240 	mem_node_config[mnode].physbase = (pfn_t)-1l;
241 	mem_node_config[mnode].physmax = 0;
242 	atomic_add_16(&num_memnodes, 1);
243 	do {
244 		oldmask = memnodes_mask;
245 		newmask = memnodes_mask | (1ull << mnode);
246 	} while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
247 
248 	return (mnode);
249 }
250 
251 /*
252  * Find the intersection between a memnode and a memlist
253  * and returns the number of pages that overlap.
254  *
255  * Assumes the list is protected from DR operations by
256  * the memlist lock.
257  */
258 pgcnt_t
259 mem_node_memlist_pages(int mnode, struct memlist *mlist)
260 {
261 	pfn_t		base, end;
262 	pfn_t		cur_base, cur_end;
263 	pgcnt_t		npgs;
264 	struct memlist	*pmem;
265 
266 	base = mem_node_config[mnode].physbase;
267 	end = mem_node_config[mnode].physmax;
268 	npgs = 0;
269 
270 	memlist_read_lock();
271 
272 	for (pmem = mlist; pmem; pmem = pmem->next) {
273 		cur_base = btop(pmem->address);
274 		cur_end = cur_base + btop(pmem->size) - 1;
275 		if (end < cur_base || base > cur_end)
276 			continue;
277 		npgs = npgs + (MIN(cur_end, end) -
278 		    MAX(cur_base, base)) + 1;
279 	}
280 
281 	memlist_read_unlock();
282 
283 	return (npgs);
284 }
285