1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Page retirement can be an extended process due to the fact that a retirement
31  * may not be possible when the original request is made.  The kernel will
32  * repeatedly attempt to retire a given page, but will not let us know when the
33  * page has been retired.  We therefore have to poll to see if the retirement
34  * has been completed.  This poll is implemented with a bounded exponential
35  * backoff to reduce the burden which we impose upon the system.
36  *
37  * To reduce the burden on fmd in the face of retirement storms, we schedule
38  * all retries as a group.  In the simplest case, we attempt to retire a single
39  * page.  When forced to retry, we initially schedule a retry at a configurable
40  * interval t.  If the retry fails, we schedule another at 2 * t, and so on,
41  * until t reaches the maximum interval (also configurable).  Future retries
42  * for that page will occur with t equal to the maximum interval value.  We
43  * will never give up on a retirement.
44  *
45  * With multiple retirements, the situation gets slightly more complicated.  As
46  * indicated above, we schedule retries as a group.  We don't want to deny new
47  * pages their short retry intervals, so we'll (re)set the retry interval to the
48  * value appropriate for the newest page.
49  */
50 
51 #include <cma.h>
52 
53 #include <time.h>
54 #include <errno.h>
55 #include <unistd.h>
56 #include <strings.h>
57 #include <fm/fmd_api.h>
58 #include <fm/libtopo.h>
59 #include <sys/fm/protocol.h>
60 #include <sys/mem.h>
61 
62 static void
63 cma_page_free(fmd_hdl_t *hdl, cma_page_t *page)
64 {
65 	if (page->pg_fmri != NULL)
66 		nvlist_free(page->pg_fmri);
67 	fmd_hdl_free(hdl, page, sizeof (cma_page_t));
68 }
69 
70 /*
71  * Retire the specified ASRU, referring to a memory page by PA or by DIMM
72  * offset (i.e. the encoded coordinates internal bank, row, and column).
73  * In the initial FMA implementation, fault.memory.page exported an ASRU
74  * with an explicit physical address, which is valid at the initial time of
75  * diagnosis but may not be later following DR, DIMM removal, or interleave
76  * changes.  On SPARC, this issue was solved by exporting the DIMM offset
77  * and pushing the entire FMRI to the platform memory controller through
78  * /dev/mem so it can derive the current PA from the DIMM and offset.
79  * On x64, we also use DIMM and offset, but the mem:/// unum string is an
80  * encoded hc:/// FMRI that is then used by the x64 memory controller driver.
81  * At some point these three approaches need to be rationalized: all platforms
82  * should use the same scheme, either with decoding in the kernel or decoding
83  * in userland (i.e. with a libtopo method to compute and update the PA).
84  */
85 /*ARGSUSED*/
86 int
87 cma_page_retire(fmd_hdl_t *hdl, nvlist_t *nvl, nvlist_t *asru,
88     const char *uuid, boolean_t repair)
89 {
90 	cma_page_t *page;
91 	uint64_t pageaddr;
92 	char *unumstr;
93 	nvlist_t *asrucp = NULL;
94 	const char *action = repair ? "unretire" : "retire";
95 
96 	if (nvlist_dup(asru, &asrucp, 0) != 0) {
97 		fmd_hdl_debug(hdl, "page retire nvlist dup failed\n");
98 		return (CMA_RA_FAILURE);
99 	}
100 
101 	/* It should already be expanded, but we'll do it again anyway */
102 	if (fmd_nvl_fmri_expand(hdl, asrucp) < 0) {
103 		fmd_hdl_debug(hdl, "failed to expand page asru\n");
104 		cma_stats.bad_flts.fmds_value.ui64++;
105 		nvlist_free(asrucp);
106 		return (CMA_RA_FAILURE);
107 	}
108 
109 	if (!repair && !fmd_nvl_fmri_present(hdl, asrucp)) {
110 		fmd_hdl_debug(hdl, "page retire overtaken by events\n");
111 		cma_stats.page_nonent.fmds_value.ui64++;
112 		nvlist_free(asrucp);
113 		return (CMA_RA_SUCCESS);
114 	}
115 
116 	if (nvlist_lookup_uint64(asrucp, FM_FMRI_MEM_PHYSADDR, &pageaddr)
117 	    != 0) {
118 		fmd_hdl_debug(hdl, "mem fault missing '%s'\n",
119 		    FM_FMRI_MEM_PHYSADDR);
120 		cma_stats.bad_flts.fmds_value.ui64++;
121 		nvlist_free(asrucp);
122 		return (CMA_RA_FAILURE);
123 	}
124 
125 	if (repair) {
126 		if (!cma.cma_page_dounretire) {
127 			fmd_hdl_debug(hdl, "suppressed unretire of page %llx\n",
128 			    (u_longlong_t)pageaddr);
129 			cma_stats.page_supp.fmds_value.ui64++;
130 			nvlist_free(asrucp);
131 			return (CMA_RA_SUCCESS);
132 		}
133 	} else {
134 		if (!cma.cma_page_doretire) {
135 			fmd_hdl_debug(hdl, "suppressed retire of page %llx\n",
136 			    (u_longlong_t)pageaddr);
137 			cma_stats.page_supp.fmds_value.ui64++;
138 			nvlist_free(asrucp);
139 			return (CMA_RA_FAILURE);
140 		}
141 	}
142 
143 	/*
144 	 * If the unum is an hc fmri string expand it to an fmri and include
145 	 * that in a modified asru nvlist.
146 	 */
147 	if (nvlist_lookup_string(asrucp, FM_FMRI_MEM_UNUM, &unumstr) == 0 &&
148 	    strncmp(unumstr, "hc:/", 4) == 0) {
149 		int err;
150 		nvlist_t *unumfmri;
151 		struct topo_hdl *thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION);
152 
153 		if (topo_fmri_str2nvl(thp, unumstr, &unumfmri, &err) != 0) {
154 			fmd_hdl_debug(hdl, "page retire str2nvl failed: %s\n",
155 			    topo_strerror(err));
156 			fmd_hdl_topo_rele(hdl, thp);
157 			nvlist_free(asrucp);
158 			return (CMA_RA_FAILURE);
159 		}
160 
161 		fmd_hdl_topo_rele(hdl, thp);
162 
163 		if (nvlist_add_nvlist(asrucp, FM_FMRI_MEM_UNUM "-fmri",
164 		    unumfmri) != 0) {
165 			fmd_hdl_debug(hdl, "page retire failed to add "
166 			    "unumfmri to modified asru");
167 			nvlist_free(unumfmri);
168 			nvlist_free(asrucp);
169 			return (CMA_RA_FAILURE);
170 		}
171 		nvlist_free(unumfmri);
172 	}
173 
174 	if (cma_page_cmd(hdl,
175 	    repair ? MEM_PAGE_FMRI_UNRETIRE : MEM_PAGE_FMRI_RETIRE, asrucp)
176 	    == 0) {
177 		fmd_hdl_debug(hdl, "%sd page 0x%llx\n",
178 		    action, (u_longlong_t)pageaddr);
179 		if (repair)
180 			cma_stats.page_repairs.fmds_value.ui64++;
181 		else
182 			cma_stats.page_flts.fmds_value.ui64++;
183 		nvlist_free(asrucp);
184 		return (CMA_RA_SUCCESS);
185 	} else if (repair || errno != EAGAIN) {
186 		fmd_hdl_debug(hdl, "%s of page 0x%llx failed, will not "
187 		    "retry: %s\n", action, (u_longlong_t)pageaddr,
188 		    strerror(errno));
189 
190 		cma_stats.page_fails.fmds_value.ui64++;
191 
192 		nvlist_free(asrucp);
193 		return (CMA_RA_FAILURE);
194 	}
195 
196 	/*
197 	 * The page didn't immediately retire.  We'll need to periodically
198 	 * check to see if it has been retired.
199 	 */
200 	fmd_hdl_debug(hdl, "page didn't retire - sleeping\n");
201 
202 	page = fmd_hdl_zalloc(hdl, sizeof (cma_page_t), FMD_SLEEP);
203 	page->pg_addr = pageaddr;
204 	page->pg_fmri = asrucp;
205 	if (uuid != NULL)
206 		page->pg_uuid = fmd_hdl_strdup(hdl, uuid, FMD_SLEEP);
207 
208 	page->pg_next = cma.cma_pages;
209 	cma.cma_pages = page;
210 
211 	if (cma.cma_page_timerid != 0)
212 		fmd_timer_remove(hdl, cma.cma_page_timerid);
213 
214 	cma.cma_page_curdelay = cma.cma_page_mindelay;
215 
216 	cma.cma_page_timerid =
217 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
218 
219 	/* Don't free asrucp here.  This FMRI will be needed for retry. */
220 	return (CMA_RA_FAILURE);
221 }
222 
223 static int
224 page_retry(fmd_hdl_t *hdl, cma_page_t *page)
225 {
226 	if (page->pg_fmri != NULL && !fmd_nvl_fmri_present(hdl,
227 	    page->pg_fmri)) {
228 		fmd_hdl_debug(hdl, "page retire overtaken by events");
229 		cma_stats.page_nonent.fmds_value.ui64++;
230 
231 		if (page->pg_uuid != NULL)
232 			fmd_case_uuclose(hdl, page->pg_uuid);
233 		return (1); /* no longer a page to retire */
234 	}
235 
236 	if (cma_page_cmd(hdl, MEM_PAGE_FMRI_ISRETIRED, page->pg_fmri) == 0) {
237 		fmd_hdl_debug(hdl, "retired page 0x%llx on retry %u\n",
238 		    page->pg_addr, page->pg_nretries);
239 		cma_stats.page_flts.fmds_value.ui64++;
240 
241 		if (page->pg_uuid != NULL)
242 			fmd_case_uuclose(hdl, page->pg_uuid);
243 		return (1); /* page retired */
244 	}
245 
246 	if (errno == EAGAIN) {
247 		fmd_hdl_debug(hdl, "scheduling another retry for 0x%llx\n",
248 		    page->pg_addr);
249 		return (0); /* schedule another retry */
250 	} else {
251 		if (errno == EIO) {
252 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
253 			    "retirement: page isn't scheduled for retirement"
254 			    "(request made beyond page_retire limit?)\n",
255 			    page->pg_addr);
256 		} else {
257 			fmd_hdl_debug(hdl, "failed to retry page 0x%llx "
258 			    "retirement: %s\n", page->pg_addr,
259 			    strerror(errno));
260 		}
261 
262 		cma_stats.page_fails.fmds_value.ui64++;
263 		return (1); /* give up */
264 	}
265 }
266 
267 void
268 cma_page_retry(fmd_hdl_t *hdl)
269 {
270 	cma_page_t **pagep;
271 
272 	cma.cma_page_timerid = 0;
273 
274 	fmd_hdl_debug(hdl, "page_retry: timer fired\n");
275 
276 	pagep = &cma.cma_pages;
277 	while (*pagep != NULL) {
278 		cma_page_t *page = *pagep;
279 
280 		if (page_retry(hdl, page)) {
281 			/*
282 			 * Successful retry or we're giving up - remove from
283 			 * the list
284 			 */
285 			*pagep = page->pg_next;
286 
287 			if (page->pg_uuid != NULL)
288 				fmd_hdl_strfree(hdl, page->pg_uuid);
289 
290 			cma_page_free(hdl, page);
291 		} else {
292 			page->pg_nretries++;
293 			pagep = &page->pg_next;
294 		}
295 	}
296 
297 	if (cma.cma_pages == NULL)
298 		return; /* no more retirements */
299 
300 	/*
301 	 * We still have retirements that haven't completed.  Back the delay
302 	 * off, and schedule a retry.
303 	 */
304 	cma.cma_page_curdelay = MIN(cma.cma_page_curdelay * 2,
305 	    cma.cma_page_maxdelay);
306 
307 	fmd_hdl_debug(hdl, "scheduled page retirement retry for %llu secs\n",
308 	    (u_longlong_t)(cma.cma_page_curdelay / NANOSEC));
309 
310 	cma.cma_page_timerid =
311 	    fmd_timer_install(hdl, NULL, NULL, cma.cma_page_curdelay);
312 }
313 
314 void
315 cma_page_fini(fmd_hdl_t *hdl)
316 {
317 	cma_page_t *page;
318 
319 	while ((page = cma.cma_pages) != NULL) {
320 		cma.cma_pages = page->pg_next;
321 		cma_page_free(hdl, page);
322 	}
323 }
324