xref: /linux/drivers/edac/skx_common.c (revision c6fbb759)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *
4  * Shared code by both skx_edac and i10nm_edac. Originally split out
5  * from the skx_edac driver.
6  *
7  * This file is linked into both skx_edac and i10nm_edac drivers. In
8  * order to avoid link errors, this file must be like a pure library
9  * without including symbols and defines which would otherwise conflict,
10  * when linked once into a module and into a built-in object, at the
11  * same time. For example, __this_module symbol references when that
12  * file is being linked into a built-in object.
13  *
14  * Copyright (c) 2018, Intel Corporation.
15  */
16 
17 #include <linux/acpi.h>
18 #include <linux/dmi.h>
19 #include <linux/adxl.h>
20 #include <acpi/nfit.h>
21 #include <asm/mce.h>
22 #include "edac_module.h"
23 #include "skx_common.h"
24 
25 static const char * const component_names[] = {
26 	[INDEX_SOCKET]		= "ProcessorSocketId",
27 	[INDEX_MEMCTRL]		= "MemoryControllerId",
28 	[INDEX_CHANNEL]		= "ChannelId",
29 	[INDEX_DIMM]		= "DimmSlotId",
30 	[INDEX_CS]		= "ChipSelect",
31 	[INDEX_NM_MEMCTRL]	= "NmMemoryControllerId",
32 	[INDEX_NM_CHANNEL]	= "NmChannelId",
33 	[INDEX_NM_DIMM]		= "NmDimmSlotId",
34 	[INDEX_NM_CS]		= "NmChipSelect",
35 };
36 
37 static int component_indices[ARRAY_SIZE(component_names)];
38 static int adxl_component_count;
39 static const char * const *adxl_component_names;
40 static u64 *adxl_values;
41 static char *adxl_msg;
42 static unsigned long adxl_nm_bitmap;
43 
44 static char skx_msg[MSG_SIZE];
45 static skx_decode_f driver_decode;
46 static skx_show_retry_log_f skx_show_retry_rd_err_log;
47 static u64 skx_tolm, skx_tohm;
48 static LIST_HEAD(dev_edac_list);
49 static bool skx_mem_cfg_2lm;
50 
51 int __init skx_adxl_get(void)
52 {
53 	const char * const *names;
54 	int i, j;
55 
56 	names = adxl_get_component_names();
57 	if (!names) {
58 		skx_printk(KERN_NOTICE, "No firmware support for address translation.\n");
59 		return -ENODEV;
60 	}
61 
62 	for (i = 0; i < INDEX_MAX; i++) {
63 		for (j = 0; names[j]; j++) {
64 			if (!strcmp(component_names[i], names[j])) {
65 				component_indices[i] = j;
66 
67 				if (i >= INDEX_NM_FIRST)
68 					adxl_nm_bitmap |= 1 << i;
69 
70 				break;
71 			}
72 		}
73 
74 		if (!names[j] && i < INDEX_NM_FIRST)
75 			goto err;
76 	}
77 
78 	if (skx_mem_cfg_2lm) {
79 		if (!adxl_nm_bitmap)
80 			skx_printk(KERN_NOTICE, "Not enough ADXL components for 2-level memory.\n");
81 		else
82 			edac_dbg(2, "adxl_nm_bitmap: 0x%lx\n", adxl_nm_bitmap);
83 	}
84 
85 	adxl_component_names = names;
86 	while (*names++)
87 		adxl_component_count++;
88 
89 	adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
90 			      GFP_KERNEL);
91 	if (!adxl_values) {
92 		adxl_component_count = 0;
93 		return -ENOMEM;
94 	}
95 
96 	adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
97 	if (!adxl_msg) {
98 		adxl_component_count = 0;
99 		kfree(adxl_values);
100 		return -ENOMEM;
101 	}
102 
103 	return 0;
104 err:
105 	skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
106 		   component_names[i]);
107 	for (j = 0; names[j]; j++)
108 		skx_printk(KERN_CONT, "%s ", names[j]);
109 	skx_printk(KERN_CONT, "\n");
110 
111 	return -ENODEV;
112 }
113 
114 void __exit skx_adxl_put(void)
115 {
116 	kfree(adxl_values);
117 	kfree(adxl_msg);
118 }
119 
120 static bool skx_adxl_decode(struct decoded_addr *res, bool error_in_1st_level_mem)
121 {
122 	struct skx_dev *d;
123 	int i, len = 0;
124 
125 	if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
126 				      res->addr < BIT_ULL(32))) {
127 		edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
128 		return false;
129 	}
130 
131 	if (adxl_decode(res->addr, adxl_values)) {
132 		edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
133 		return false;
134 	}
135 
136 	res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
137 	if (error_in_1st_level_mem) {
138 		res->imc     = (adxl_nm_bitmap & BIT_NM_MEMCTRL) ?
139 			       (int)adxl_values[component_indices[INDEX_NM_MEMCTRL]] : -1;
140 		res->channel = (adxl_nm_bitmap & BIT_NM_CHANNEL) ?
141 			       (int)adxl_values[component_indices[INDEX_NM_CHANNEL]] : -1;
142 		res->dimm    = (adxl_nm_bitmap & BIT_NM_DIMM) ?
143 			       (int)adxl_values[component_indices[INDEX_NM_DIMM]] : -1;
144 		res->cs      = (adxl_nm_bitmap & BIT_NM_CS) ?
145 			       (int)adxl_values[component_indices[INDEX_NM_CS]] : -1;
146 	} else {
147 		res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
148 		res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
149 		res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
150 		res->cs      = (int)adxl_values[component_indices[INDEX_CS]];
151 	}
152 
153 	if (res->imc > NUM_IMC - 1 || res->imc < 0) {
154 		skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
155 		return false;
156 	}
157 
158 	list_for_each_entry(d, &dev_edac_list, list) {
159 		if (d->imc[0].src_id == res->socket) {
160 			res->dev = d;
161 			break;
162 		}
163 	}
164 
165 	if (!res->dev) {
166 		skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
167 			   res->socket, res->imc);
168 		return false;
169 	}
170 
171 	for (i = 0; i < adxl_component_count; i++) {
172 		if (adxl_values[i] == ~0x0ull)
173 			continue;
174 
175 		len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
176 				adxl_component_names[i], adxl_values[i]);
177 		if (MSG_SIZE - len <= 0)
178 			break;
179 	}
180 
181 	res->decoded_by_adxl = true;
182 
183 	return true;
184 }
185 
186 void skx_set_mem_cfg(bool mem_cfg_2lm)
187 {
188 	skx_mem_cfg_2lm = mem_cfg_2lm;
189 }
190 
191 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log)
192 {
193 	driver_decode = decode;
194 	skx_show_retry_rd_err_log = show_retry_log;
195 }
196 
197 int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
198 {
199 	u32 reg;
200 
201 	if (pci_read_config_dword(d->util_all, off, &reg)) {
202 		skx_printk(KERN_ERR, "Failed to read src id\n");
203 		return -ENODEV;
204 	}
205 
206 	*id = GET_BITFIELD(reg, 12, 14);
207 	return 0;
208 }
209 
210 int skx_get_node_id(struct skx_dev *d, u8 *id)
211 {
212 	u32 reg;
213 
214 	if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
215 		skx_printk(KERN_ERR, "Failed to read node id\n");
216 		return -ENODEV;
217 	}
218 
219 	*id = GET_BITFIELD(reg, 0, 2);
220 	return 0;
221 }
222 
223 static int get_width(u32 mtr)
224 {
225 	switch (GET_BITFIELD(mtr, 8, 9)) {
226 	case 0:
227 		return DEV_X4;
228 	case 1:
229 		return DEV_X8;
230 	case 2:
231 		return DEV_X16;
232 	}
233 	return DEV_UNKNOWN;
234 }
235 
236 /*
237  * We use the per-socket device @cfg->did to count how many sockets are present,
238  * and to detemine which PCI buses are associated with each socket. Allocate
239  * and build the full list of all the skx_dev structures that we need here.
240  */
241 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
242 {
243 	struct pci_dev *pdev, *prev;
244 	struct skx_dev *d;
245 	u32 reg;
246 	int ndev = 0;
247 
248 	prev = NULL;
249 	for (;;) {
250 		pdev = pci_get_device(PCI_VENDOR_ID_INTEL, cfg->decs_did, prev);
251 		if (!pdev)
252 			break;
253 		ndev++;
254 		d = kzalloc(sizeof(*d), GFP_KERNEL);
255 		if (!d) {
256 			pci_dev_put(pdev);
257 			return -ENOMEM;
258 		}
259 
260 		if (pci_read_config_dword(pdev, cfg->busno_cfg_offset, &reg)) {
261 			kfree(d);
262 			pci_dev_put(pdev);
263 			skx_printk(KERN_ERR, "Failed to read bus idx\n");
264 			return -ENODEV;
265 		}
266 
267 		d->bus[0] = GET_BITFIELD(reg, 0, 7);
268 		d->bus[1] = GET_BITFIELD(reg, 8, 15);
269 		if (cfg->type == SKX) {
270 			d->seg = pci_domain_nr(pdev->bus);
271 			d->bus[2] = GET_BITFIELD(reg, 16, 23);
272 			d->bus[3] = GET_BITFIELD(reg, 24, 31);
273 		} else {
274 			d->seg = GET_BITFIELD(reg, 16, 23);
275 		}
276 
277 		edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
278 			 d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
279 		list_add_tail(&d->list, &dev_edac_list);
280 		prev = pdev;
281 	}
282 
283 	if (list)
284 		*list = &dev_edac_list;
285 	return ndev;
286 }
287 
288 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm)
289 {
290 	struct pci_dev *pdev;
291 	u32 reg;
292 
293 	pdev = pci_get_device(PCI_VENDOR_ID_INTEL, did, NULL);
294 	if (!pdev) {
295 		edac_dbg(2, "Can't get tolm/tohm\n");
296 		return -ENODEV;
297 	}
298 
299 	if (pci_read_config_dword(pdev, off[0], &reg)) {
300 		skx_printk(KERN_ERR, "Failed to read tolm\n");
301 		goto fail;
302 	}
303 	skx_tolm = reg;
304 
305 	if (pci_read_config_dword(pdev, off[1], &reg)) {
306 		skx_printk(KERN_ERR, "Failed to read lower tohm\n");
307 		goto fail;
308 	}
309 	skx_tohm = reg;
310 
311 	if (pci_read_config_dword(pdev, off[2], &reg)) {
312 		skx_printk(KERN_ERR, "Failed to read upper tohm\n");
313 		goto fail;
314 	}
315 	skx_tohm |= (u64)reg << 32;
316 
317 	pci_dev_put(pdev);
318 	*tolm = skx_tolm;
319 	*tohm = skx_tohm;
320 	edac_dbg(2, "tolm = 0x%llx tohm = 0x%llx\n", skx_tolm, skx_tohm);
321 	return 0;
322 fail:
323 	pci_dev_put(pdev);
324 	return -ENODEV;
325 }
326 
327 static int skx_get_dimm_attr(u32 reg, int lobit, int hibit, int add,
328 			     int minval, int maxval, const char *name)
329 {
330 	u32 val = GET_BITFIELD(reg, lobit, hibit);
331 
332 	if (val < minval || val > maxval) {
333 		edac_dbg(2, "bad %s = %d (raw=0x%x)\n", name, val, reg);
334 		return -EINVAL;
335 	}
336 	return val + add;
337 }
338 
339 #define numrank(reg)	skx_get_dimm_attr(reg, 12, 13, 0, 0, 2, "ranks")
340 #define numrow(reg)	skx_get_dimm_attr(reg, 2, 4, 12, 1, 6, "rows")
341 #define numcol(reg)	skx_get_dimm_attr(reg, 0, 1, 10, 0, 2, "cols")
342 
343 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
344 		      struct skx_imc *imc, int chan, int dimmno,
345 		      struct res_config *cfg)
346 {
347 	int  banks, ranks, rows, cols, npages;
348 	enum mem_type mtype;
349 	u64 size;
350 
351 	ranks = numrank(mtr);
352 	rows = numrow(mtr);
353 	cols = imc->hbm_mc ? 6 : numcol(mtr);
354 
355 	if (imc->hbm_mc) {
356 		banks = 32;
357 		mtype = MEM_HBM2;
358 	} else if (cfg->support_ddr5 && (amap & 0x8)) {
359 		banks = 32;
360 		mtype = MEM_DDR5;
361 	} else {
362 		banks = 16;
363 		mtype = MEM_DDR4;
364 	}
365 
366 	/*
367 	 * Compute size in 8-byte (2^3) words, then shift to MiB (2^20)
368 	 */
369 	size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3);
370 	npages = MiB_TO_PAGES(size);
371 
372 	edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld MiB (%d pages) bank: %d, rank: %d, row: 0x%x, col: 0x%x\n",
373 		 imc->mc, chan, dimmno, size, npages,
374 		 banks, 1 << ranks, rows, cols);
375 
376 	imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mcmtr, 0, 0);
377 	imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mcmtr, 9, 9);
378 	imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0);
379 	imc->chan[chan].dimms[dimmno].rowbits = rows;
380 	imc->chan[chan].dimms[dimmno].colbits = cols;
381 
382 	dimm->nr_pages = npages;
383 	dimm->grain = 32;
384 	dimm->dtype = get_width(mtr);
385 	dimm->mtype = mtype;
386 	dimm->edac_mode = EDAC_SECDED; /* likely better than this */
387 
388 	if (imc->hbm_mc)
389 		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_HBMC#%u_Chan#%u",
390 			 imc->src_id, imc->lmc, chan);
391 	else
392 		snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
393 			 imc->src_id, imc->lmc, chan, dimmno);
394 
395 	return 1;
396 }
397 
398 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
399 			int chan, int dimmno, const char *mod_str)
400 {
401 	int smbios_handle;
402 	u32 dev_handle;
403 	u16 flags;
404 	u64 size = 0;
405 
406 	dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
407 						   imc->src_id, 0);
408 
409 	smbios_handle = nfit_get_smbios_id(dev_handle, &flags);
410 	if (smbios_handle == -EOPNOTSUPP) {
411 		pr_warn_once("%s: Can't find size of NVDIMM. Try enabling CONFIG_ACPI_NFIT\n", mod_str);
412 		goto unknown_size;
413 	}
414 
415 	if (smbios_handle < 0) {
416 		skx_printk(KERN_ERR, "Can't find handle for NVDIMM ADR=0x%x\n", dev_handle);
417 		goto unknown_size;
418 	}
419 
420 	if (flags & ACPI_NFIT_MEM_MAP_FAILED) {
421 		skx_printk(KERN_ERR, "NVDIMM ADR=0x%x is not mapped\n", dev_handle);
422 		goto unknown_size;
423 	}
424 
425 	size = dmi_memdev_size(smbios_handle);
426 	if (size == ~0ull)
427 		skx_printk(KERN_ERR, "Can't find size for NVDIMM ADR=0x%x/SMBIOS=0x%x\n",
428 			   dev_handle, smbios_handle);
429 
430 unknown_size:
431 	dimm->nr_pages = size >> PAGE_SHIFT;
432 	dimm->grain = 32;
433 	dimm->dtype = DEV_UNKNOWN;
434 	dimm->mtype = MEM_NVDIMM;
435 	dimm->edac_mode = EDAC_SECDED; /* likely better than this */
436 
437 	edac_dbg(0, "mc#%d: channel %d, dimm %d, %llu MiB (%u pages)\n",
438 		 imc->mc, chan, dimmno, size >> 20, dimm->nr_pages);
439 
440 	snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u",
441 		 imc->src_id, imc->lmc, chan, dimmno);
442 
443 	return (size == 0 || size == ~0ull) ? 0 : 1;
444 }
445 
446 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
447 		     const char *ctl_name, const char *mod_str,
448 		     get_dimm_config_f get_dimm_config,
449 		     struct res_config *cfg)
450 {
451 	struct mem_ctl_info *mci;
452 	struct edac_mc_layer layers[2];
453 	struct skx_pvt *pvt;
454 	int rc;
455 
456 	/* Allocate a new MC control structure */
457 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
458 	layers[0].size = NUM_CHANNELS;
459 	layers[0].is_virt_csrow = false;
460 	layers[1].type = EDAC_MC_LAYER_SLOT;
461 	layers[1].size = NUM_DIMMS;
462 	layers[1].is_virt_csrow = true;
463 	mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
464 			    sizeof(struct skx_pvt));
465 
466 	if (unlikely(!mci))
467 		return -ENOMEM;
468 
469 	edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci);
470 
471 	/* Associate skx_dev and mci for future usage */
472 	imc->mci = mci;
473 	pvt = mci->pvt_info;
474 	pvt->imc = imc;
475 
476 	mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
477 				  imc->node_id, imc->lmc);
478 	if (!mci->ctl_name) {
479 		rc = -ENOMEM;
480 		goto fail0;
481 	}
482 
483 	mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_NVDIMM;
484 	if (cfg->support_ddr5)
485 		mci->mtype_cap |= MEM_FLAG_DDR5;
486 	mci->edac_ctl_cap = EDAC_FLAG_NONE;
487 	mci->edac_cap = EDAC_FLAG_NONE;
488 	mci->mod_name = mod_str;
489 	mci->dev_name = pci_name(pdev);
490 	mci->ctl_page_to_phys = NULL;
491 
492 	rc = get_dimm_config(mci, cfg);
493 	if (rc < 0)
494 		goto fail;
495 
496 	/* Record ptr to the generic device */
497 	mci->pdev = &pdev->dev;
498 
499 	/* Add this new MC control structure to EDAC's list of MCs */
500 	if (unlikely(edac_mc_add_mc(mci))) {
501 		edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
502 		rc = -EINVAL;
503 		goto fail;
504 	}
505 
506 	return 0;
507 
508 fail:
509 	kfree(mci->ctl_name);
510 fail0:
511 	edac_mc_free(mci);
512 	imc->mci = NULL;
513 	return rc;
514 }
515 
516 static void skx_unregister_mci(struct skx_imc *imc)
517 {
518 	struct mem_ctl_info *mci = imc->mci;
519 
520 	if (!mci)
521 		return;
522 
523 	edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci);
524 
525 	/* Remove MC sysfs nodes */
526 	edac_mc_del_mc(mci->pdev);
527 
528 	edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
529 	kfree(mci->ctl_name);
530 	edac_mc_free(mci);
531 }
532 
533 static void skx_mce_output_error(struct mem_ctl_info *mci,
534 				 const struct mce *m,
535 				 struct decoded_addr *res)
536 {
537 	enum hw_event_mc_err_type tp_event;
538 	char *optype;
539 	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
540 	bool overflow = GET_BITFIELD(m->status, 62, 62);
541 	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
542 	bool scrub_err = false;
543 	bool recoverable;
544 	int len;
545 	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
546 	u32 mscod = GET_BITFIELD(m->status, 16, 31);
547 	u32 errcode = GET_BITFIELD(m->status, 0, 15);
548 	u32 optypenum = GET_BITFIELD(m->status, 4, 6);
549 
550 	recoverable = GET_BITFIELD(m->status, 56, 56);
551 
552 	if (uncorrected_error) {
553 		core_err_cnt = 1;
554 		if (ripv) {
555 			tp_event = HW_EVENT_ERR_UNCORRECTED;
556 		} else {
557 			tp_event = HW_EVENT_ERR_FATAL;
558 		}
559 	} else {
560 		tp_event = HW_EVENT_ERR_CORRECTED;
561 	}
562 
563 	/*
564 	 * According to Intel Architecture spec vol 3B,
565 	 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
566 	 * memory errors should fit one of these masks:
567 	 *	000f 0000 1mmm cccc (binary)
568 	 *	000f 0010 1mmm cccc (binary)	[RAM used as cache]
569 	 * where:
570 	 *	f = Correction Report Filtering Bit. If 1, subsequent errors
571 	 *	    won't be shown
572 	 *	mmm = error type
573 	 *	cccc = channel
574 	 * If the mask doesn't match, report an error to the parsing logic
575 	 */
576 	if (!((errcode & 0xef80) == 0x80 || (errcode & 0xef80) == 0x280)) {
577 		optype = "Can't parse: it is not a mem";
578 	} else {
579 		switch (optypenum) {
580 		case 0:
581 			optype = "generic undef request error";
582 			break;
583 		case 1:
584 			optype = "memory read error";
585 			break;
586 		case 2:
587 			optype = "memory write error";
588 			break;
589 		case 3:
590 			optype = "addr/cmd error";
591 			break;
592 		case 4:
593 			optype = "memory scrubbing error";
594 			scrub_err = true;
595 			break;
596 		default:
597 			optype = "reserved";
598 			break;
599 		}
600 	}
601 	if (res->decoded_by_adxl) {
602 		len = snprintf(skx_msg, MSG_SIZE, "%s%s err_code:0x%04x:0x%04x %s",
603 			 overflow ? " OVERFLOW" : "",
604 			 (uncorrected_error && recoverable) ? " recoverable" : "",
605 			 mscod, errcode, adxl_msg);
606 	} else {
607 		len = snprintf(skx_msg, MSG_SIZE,
608 			 "%s%s err_code:0x%04x:0x%04x ProcessorSocketId:0x%x MemoryControllerId:0x%x PhysicalRankId:0x%x Row:0x%x Column:0x%x Bank:0x%x BankGroup:0x%x",
609 			 overflow ? " OVERFLOW" : "",
610 			 (uncorrected_error && recoverable) ? " recoverable" : "",
611 			 mscod, errcode,
612 			 res->socket, res->imc, res->rank,
613 			 res->row, res->column, res->bank_address, res->bank_group);
614 	}
615 
616 	if (skx_show_retry_rd_err_log)
617 		skx_show_retry_rd_err_log(res, skx_msg + len, MSG_SIZE - len, scrub_err);
618 
619 	edac_dbg(0, "%s\n", skx_msg);
620 
621 	/* Call the helper to output message */
622 	edac_mc_handle_error(tp_event, mci, core_err_cnt,
623 			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
624 			     res->channel, res->dimm, -1,
625 			     optype, skx_msg);
626 }
627 
628 static bool skx_error_in_1st_level_mem(const struct mce *m)
629 {
630 	u32 errcode;
631 
632 	if (!skx_mem_cfg_2lm)
633 		return false;
634 
635 	errcode = GET_BITFIELD(m->status, 0, 15);
636 
637 	if ((errcode & 0xef80) != 0x280)
638 		return false;
639 
640 	return true;
641 }
642 
643 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
644 			void *data)
645 {
646 	struct mce *mce = (struct mce *)data;
647 	struct decoded_addr res;
648 	struct mem_ctl_info *mci;
649 	char *type;
650 
651 	if (mce->kflags & MCE_HANDLED_CEC)
652 		return NOTIFY_DONE;
653 
654 	/* ignore unless this is memory related with an address */
655 	if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
656 		return NOTIFY_DONE;
657 
658 	memset(&res, 0, sizeof(res));
659 	res.mce  = mce;
660 	res.addr = mce->addr;
661 
662 	/* Try driver decoder first */
663 	if (!(driver_decode && driver_decode(&res))) {
664 		/* Then try firmware decoder (ACPI DSM methods) */
665 		if (!(adxl_component_count && skx_adxl_decode(&res, skx_error_in_1st_level_mem(mce))))
666 			return NOTIFY_DONE;
667 	}
668 
669 	mci = res.dev->imc[res.imc].mci;
670 
671 	if (!mci)
672 		return NOTIFY_DONE;
673 
674 	if (mce->mcgstatus & MCG_STATUS_MCIP)
675 		type = "Exception";
676 	else
677 		type = "Event";
678 
679 	skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n");
680 
681 	skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: 0x%llx "
682 			   "Bank %d: 0x%llx\n", mce->extcpu, type,
683 			   mce->mcgstatus, mce->bank, mce->status);
684 	skx_mc_printk(mci, KERN_DEBUG, "TSC 0x%llx ", mce->tsc);
685 	skx_mc_printk(mci, KERN_DEBUG, "ADDR 0x%llx ", mce->addr);
686 	skx_mc_printk(mci, KERN_DEBUG, "MISC 0x%llx ", mce->misc);
687 
688 	skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:0x%x TIME %llu SOCKET "
689 			   "%u APIC 0x%x\n", mce->cpuvendor, mce->cpuid,
690 			   mce->time, mce->socketid, mce->apicid);
691 
692 	skx_mce_output_error(mci, mce, &res);
693 
694 	mce->kflags |= MCE_HANDLED_EDAC;
695 	return NOTIFY_DONE;
696 }
697 
698 void skx_remove(void)
699 {
700 	int i, j;
701 	struct skx_dev *d, *tmp;
702 
703 	edac_dbg(0, "\n");
704 
705 	list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
706 		list_del(&d->list);
707 		for (i = 0; i < NUM_IMC; i++) {
708 			if (d->imc[i].mci)
709 				skx_unregister_mci(&d->imc[i]);
710 
711 			if (d->imc[i].mdev)
712 				pci_dev_put(d->imc[i].mdev);
713 
714 			if (d->imc[i].mbase)
715 				iounmap(d->imc[i].mbase);
716 
717 			for (j = 0; j < NUM_CHANNELS; j++) {
718 				if (d->imc[i].chan[j].cdev)
719 					pci_dev_put(d->imc[i].chan[j].cdev);
720 			}
721 		}
722 		if (d->util_all)
723 			pci_dev_put(d->util_all);
724 		if (d->pcu_cr3)
725 			pci_dev_put(d->pcu_cr3);
726 		if (d->sad_all)
727 			pci_dev_put(d->sad_all);
728 		if (d->uracu)
729 			pci_dev_put(d->uracu);
730 
731 		kfree(d);
732 	}
733 }
734