xref: /linux/drivers/edac/thunderx_edac.c (revision 9aa31612)
1 /*
2  * Cavium ThunderX memory controller kernel module
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
9  *
10  */
11 
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/edac.h>
15 #include <linux/interrupt.h>
16 #include <linux/string.h>
17 #include <linux/stop_machine.h>
18 #include <linux/delay.h>
19 #include <linux/sizes.h>
20 #include <linux/atomic.h>
21 #include <linux/bitfield.h>
22 #include <linux/circ_buf.h>
23 
24 #include <asm/page.h>
25 
26 #include "edac_module.h"
27 
28 #define phys_to_pfn(phys)	(PFN_DOWN(phys))
29 
30 #define THUNDERX_NODE		GENMASK(45, 44)
31 
32 enum {
33 	ERR_CORRECTED	= 1,
34 	ERR_UNCORRECTED	= 2,
35 	ERR_UNKNOWN	= 3,
36 };
37 
38 struct error_descr {
39 	int	type;
40 	u64	mask;
41 	char	*descr;
42 };
43 
decode_register(char * str,size_t size,const struct error_descr * descr,const uint64_t reg)44 static void decode_register(char *str, size_t size,
45 			   const struct error_descr *descr,
46 			   const uint64_t reg)
47 {
48 	int ret = 0;
49 
50 	while (descr->type && descr->mask && descr->descr) {
51 		if (reg & descr->mask) {
52 			ret = snprintf(str, size, "\n\t%s, %s",
53 				       descr->type == ERR_CORRECTED ?
54 					 "Corrected" : "Uncorrected",
55 				       descr->descr);
56 			str += ret;
57 			size -= ret;
58 		}
59 		descr++;
60 	}
61 }
62 
get_bits(unsigned long data,int pos,int width)63 static unsigned long get_bits(unsigned long data, int pos, int width)
64 {
65 	return (data >> pos) & ((1 << width) - 1);
66 }
67 
68 #define L2C_CTL			0x87E080800000
69 #define L2C_CTL_DISIDXALIAS	BIT(0)
70 
71 #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
72 
73 #define LMC_FADR		0x20
74 #define LMC_FADR_FDIMM(x)	((x >> 37) & 0x1)
75 #define LMC_FADR_FBUNK(x)	((x >> 36) & 0x1)
76 #define LMC_FADR_FBANK(x)	((x >> 32) & 0xf)
77 #define LMC_FADR_FROW(x)	((x >> 14) & 0xffff)
78 #define LMC_FADR_FCOL(x)	((x >> 0) & 0x1fff)
79 
80 #define LMC_NXM_FADR		0x28
81 #define LMC_ECC_SYND		0x38
82 
83 #define LMC_ECC_PARITY_TEST	0x108
84 
85 #define LMC_INT_W1S		0x150
86 
87 #define LMC_INT_ENA_W1C		0x158
88 #define LMC_INT_ENA_W1S		0x160
89 
90 #define LMC_CONFIG		0x188
91 
92 #define LMC_CONFIG_BG2		BIT(62)
93 #define LMC_CONFIG_RANK_ENA	BIT(42)
94 #define LMC_CONFIG_PBANK_LSB(x)	(((x) >> 5) & 0xF)
95 #define LMC_CONFIG_ROW_LSB(x)	(((x) >> 2) & 0x7)
96 
97 #define LMC_CONTROL		0x190
98 #define LMC_CONTROL_XOR_BANK	BIT(16)
99 
100 #define LMC_INT			0x1F0
101 
102 #define LMC_INT_DDR_ERR		BIT(11)
103 #define LMC_INT_DED_ERR		(0xFUL << 5)
104 #define LMC_INT_SEC_ERR         (0xFUL << 1)
105 #define LMC_INT_NXM_WR_MASK	BIT(0)
106 
107 #define LMC_DDR_PLL_CTL		0x258
108 #define LMC_DDR_PLL_CTL_DDR4	BIT(29)
109 
110 #define LMC_FADR_SCRAMBLED	0x330
111 
112 #define LMC_INT_UE              (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
113 				 LMC_INT_NXM_WR_MASK)
114 
115 #define LMC_INT_CE		(LMC_INT_SEC_ERR)
116 
117 static const struct error_descr lmc_errors[] = {
118 	{
119 		.type  = ERR_CORRECTED,
120 		.mask  = LMC_INT_SEC_ERR,
121 		.descr = "Single-bit ECC error",
122 	},
123 	{
124 		.type  = ERR_UNCORRECTED,
125 		.mask  = LMC_INT_DDR_ERR,
126 		.descr = "DDR chip error",
127 	},
128 	{
129 		.type  = ERR_UNCORRECTED,
130 		.mask  = LMC_INT_DED_ERR,
131 		.descr = "Double-bit ECC error",
132 	},
133 	{
134 		.type = ERR_UNCORRECTED,
135 		.mask = LMC_INT_NXM_WR_MASK,
136 		.descr = "Non-existent memory write",
137 	},
138 	{0, 0, NULL},
139 };
140 
141 #define LMC_INT_EN_DDR_ERROR_ALERT_ENA	BIT(5)
142 #define LMC_INT_EN_DLCRAM_DED_ERR	BIT(4)
143 #define LMC_INT_EN_DLCRAM_SEC_ERR	BIT(3)
144 #define LMC_INT_INTR_DED_ENA		BIT(2)
145 #define LMC_INT_INTR_SEC_ENA		BIT(1)
146 #define LMC_INT_INTR_NXM_WR_ENA		BIT(0)
147 
148 #define LMC_INT_ENA_ALL			GENMASK(5, 0)
149 
150 #define LMC_DDR_PLL_CTL		0x258
151 #define LMC_DDR_PLL_CTL_DDR4	BIT(29)
152 
153 #define LMC_CONTROL		0x190
154 #define LMC_CONTROL_RDIMM	BIT(0)
155 
156 #define LMC_SCRAM_FADR		0x330
157 
158 #define LMC_CHAR_MASK0		0x228
159 #define LMC_CHAR_MASK2		0x238
160 
161 #define RING_ENTRIES	8
162 
163 struct debugfs_entry {
164 	const char *name;
165 	umode_t mode;
166 	const struct file_operations fops;
167 };
168 
169 struct lmc_err_ctx {
170 	u64 reg_int;
171 	u64 reg_fadr;
172 	u64 reg_nxm_fadr;
173 	u64 reg_scram_fadr;
174 	u64 reg_ecc_synd;
175 };
176 
177 struct thunderx_lmc {
178 	void __iomem *regs;
179 	struct pci_dev *pdev;
180 	struct msix_entry msix_ent;
181 
182 	atomic_t ecc_int;
183 
184 	u64 mask0;
185 	u64 mask2;
186 	u64 parity_test;
187 	u64 node;
188 
189 	int xbits;
190 	int bank_width;
191 	int pbank_lsb;
192 	int dimm_lsb;
193 	int rank_lsb;
194 	int bank_lsb;
195 	int row_lsb;
196 	int col_hi_lsb;
197 
198 	int xor_bank;
199 	int l2c_alias;
200 
201 	struct page *mem;
202 
203 	struct lmc_err_ctx err_ctx[RING_ENTRIES];
204 	unsigned long ring_head;
205 	unsigned long ring_tail;
206 };
207 
208 #define ring_pos(pos, size) ((pos) & (size - 1))
209 
210 #define DEBUGFS_STRUCT(_name, _mode, _write, _read)			    \
211 static struct debugfs_entry debugfs_##_name = {				    \
212 	.name = __stringify(_name),					    \
213 	.mode = VERIFY_OCTAL_PERMISSIONS(_mode),			    \
214 	.fops = {							    \
215 		.open = simple_open,					    \
216 		.write = _write,					    \
217 		.read  = _read,						    \
218 		.llseek = generic_file_llseek,				    \
219 	},								    \
220 }
221 
222 #define DEBUGFS_FIELD_ATTR(_type, _field)				    \
223 static ssize_t thunderx_##_type##_##_field##_read(struct file *file,	    \
224 					    char __user *data,		    \
225 					    size_t count, loff_t *ppos)	    \
226 {									    \
227 	struct thunderx_##_type *pdata = file->private_data;		    \
228 	char buf[20];							    \
229 									    \
230 	snprintf(buf, count, "0x%016llx", pdata->_field);		    \
231 	return simple_read_from_buffer(data, count, ppos,		    \
232 				       buf, sizeof(buf));		    \
233 }									    \
234 									    \
235 static ssize_t thunderx_##_type##_##_field##_write(struct file *file,	    \
236 					     const char __user *data,	    \
237 					     size_t count, loff_t *ppos)    \
238 {									    \
239 	struct thunderx_##_type *pdata = file->private_data;		    \
240 	int res;							    \
241 									    \
242 	res = kstrtoull_from_user(data, count, 0, &pdata->_field);	    \
243 									    \
244 	return res ? res : count;					    \
245 }									    \
246 									    \
247 DEBUGFS_STRUCT(_field, 0600,						    \
248 		   thunderx_##_type##_##_field##_write,			    \
249 		   thunderx_##_type##_##_field##_read)			    \
250 
251 #define DEBUGFS_REG_ATTR(_type, _name, _reg)				    \
252 static ssize_t thunderx_##_type##_##_name##_read(struct file *file,	    \
253 					   char __user *data,		    \
254 					   size_t count, loff_t *ppos)      \
255 {									    \
256 	struct thunderx_##_type *pdata = file->private_data;		    \
257 	char buf[20];							    \
258 									    \
259 	sprintf(buf, "0x%016llx", readq(pdata->regs + _reg));		    \
260 	return simple_read_from_buffer(data, count, ppos,		    \
261 				       buf, sizeof(buf));		    \
262 }									    \
263 									    \
264 static ssize_t thunderx_##_type##_##_name##_write(struct file *file,	    \
265 					    const char __user *data,	    \
266 					    size_t count, loff_t *ppos)     \
267 {									    \
268 	struct thunderx_##_type *pdata = file->private_data;		    \
269 	u64 val;							    \
270 	int res;							    \
271 									    \
272 	res = kstrtoull_from_user(data, count, 0, &val);		    \
273 									    \
274 	if (!res) {							    \
275 		writeq(val, pdata->regs + _reg);			    \
276 		res = count;						    \
277 	}								    \
278 									    \
279 	return res;							    \
280 }									    \
281 									    \
282 DEBUGFS_STRUCT(_name, 0600,						    \
283 	       thunderx_##_type##_##_name##_write,			    \
284 	       thunderx_##_type##_##_name##_read)
285 
286 #define LMC_DEBUGFS_ENT(_field)	DEBUGFS_FIELD_ATTR(lmc, _field)
287 
288 /*
289  * To get an ECC error injected, the following steps are needed:
290  * - Setup the ECC injection by writing the appropriate parameters:
291  *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
292  *	echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
293  *	echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
294  * - Do the actual injection:
295  *	echo 1 > /sys/kernel/debug/<device number>/inject_ecc
296  */
thunderx_lmc_inject_int_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)297 static ssize_t thunderx_lmc_inject_int_write(struct file *file,
298 					     const char __user *data,
299 					     size_t count, loff_t *ppos)
300 {
301 	struct thunderx_lmc *lmc = file->private_data;
302 	u64 val;
303 	int res;
304 
305 	res = kstrtoull_from_user(data, count, 0, &val);
306 
307 	if (!res) {
308 		/* Trigger the interrupt */
309 		writeq(val, lmc->regs + LMC_INT_W1S);
310 		res = count;
311 	}
312 
313 	return res;
314 }
315 
thunderx_lmc_int_read(struct file * file,char __user * data,size_t count,loff_t * ppos)316 static ssize_t thunderx_lmc_int_read(struct file *file,
317 				     char __user *data,
318 				     size_t count, loff_t *ppos)
319 {
320 	struct thunderx_lmc *lmc = file->private_data;
321 	char buf[20];
322 	u64 lmc_int = readq(lmc->regs + LMC_INT);
323 
324 	snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
325 	return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
326 }
327 
328 #define TEST_PATTERN 0xa5
329 
inject_ecc_fn(void * arg)330 static int inject_ecc_fn(void *arg)
331 {
332 	struct thunderx_lmc *lmc = arg;
333 	uintptr_t addr, phys;
334 	unsigned int cline_size = cache_line_size();
335 	const unsigned int lines = PAGE_SIZE / cline_size;
336 	unsigned int i, cl_idx;
337 
338 	addr = (uintptr_t)page_address(lmc->mem);
339 	phys = (uintptr_t)page_to_phys(lmc->mem);
340 
341 	cl_idx = (phys & 0x7f) >> 4;
342 	lmc->parity_test &= ~(7ULL << 8);
343 	lmc->parity_test |= (cl_idx << 8);
344 
345 	writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
346 	writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
347 	writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
348 
349 	readq(lmc->regs + LMC_CHAR_MASK0);
350 	readq(lmc->regs + LMC_CHAR_MASK2);
351 	readq(lmc->regs + LMC_ECC_PARITY_TEST);
352 
353 	for (i = 0; i < lines; i++) {
354 		memset((void *)addr, TEST_PATTERN, cline_size);
355 		barrier();
356 
357 		/*
358 		 * Flush L1 cachelines to the PoC (L2).
359 		 * This will cause cacheline eviction to the L2.
360 		 */
361 		asm volatile("dc civac, %0\n"
362 			     "dsb sy\n"
363 			     : : "r"(addr + i * cline_size));
364 	}
365 
366 	for (i = 0; i < lines; i++) {
367 		/*
368 		 * Flush L2 cachelines to the DRAM.
369 		 * This will cause cacheline eviction to the DRAM
370 		 * and ECC corruption according to the masks set.
371 		 */
372 		__asm__ volatile("sys #0,c11,C1,#2, %0\n"
373 				 : : "r"(phys + i * cline_size));
374 	}
375 
376 	for (i = 0; i < lines; i++) {
377 		/*
378 		 * Invalidate L2 cachelines.
379 		 * The subsequent load will cause cacheline fetch
380 		 * from the DRAM and an error interrupt
381 		 */
382 		__asm__ volatile("sys #0,c11,C1,#1, %0"
383 				 : : "r"(phys + i * cline_size));
384 	}
385 
386 	for (i = 0; i < lines; i++) {
387 		/*
388 		 * Invalidate L1 cachelines.
389 		 * The subsequent load will cause cacheline fetch
390 		 * from the L2 and/or DRAM
391 		 */
392 		asm volatile("dc ivac, %0\n"
393 			     "dsb sy\n"
394 			     : : "r"(addr + i * cline_size));
395 	}
396 
397 	return 0;
398 }
399 
thunderx_lmc_inject_ecc_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)400 static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
401 					     const char __user *data,
402 					     size_t count, loff_t *ppos)
403 {
404 	struct thunderx_lmc *lmc = file->private_data;
405 	unsigned int cline_size = cache_line_size();
406 	u8 *tmp;
407 	void __iomem *addr;
408 	unsigned int offs, timeout = 100000;
409 
410 	atomic_set(&lmc->ecc_int, 0);
411 
412 	lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
413 	if (!lmc->mem)
414 		return -ENOMEM;
415 
416 	tmp = kmalloc(cline_size, GFP_KERNEL);
417 	if (!tmp) {
418 		__free_pages(lmc->mem, 0);
419 		return -ENOMEM;
420 	}
421 
422 	addr = page_address(lmc->mem);
423 
424 	while (!atomic_read(&lmc->ecc_int) && timeout--) {
425 		stop_machine(inject_ecc_fn, lmc, NULL);
426 
427 		for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
428 			/*
429 			 * Do a load from the previously rigged location
430 			 * This should generate an error interrupt.
431 			 */
432 			memcpy(tmp, addr + offs, cline_size);
433 			asm volatile("dsb ld\n");
434 		}
435 	}
436 
437 	kfree(tmp);
438 	__free_pages(lmc->mem, 0);
439 
440 	return count;
441 }
442 
443 LMC_DEBUGFS_ENT(mask0);
444 LMC_DEBUGFS_ENT(mask2);
445 LMC_DEBUGFS_ENT(parity_test);
446 
447 DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
448 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
449 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
450 
451 static struct debugfs_entry *lmc_dfs_ents[] = {
452 	&debugfs_mask0,
453 	&debugfs_mask2,
454 	&debugfs_parity_test,
455 	&debugfs_inject_ecc,
456 	&debugfs_inject_int,
457 	&debugfs_int_w1c,
458 };
459 
thunderx_create_debugfs_nodes(struct dentry * parent,struct debugfs_entry * attrs[],void * data,size_t num)460 static int thunderx_create_debugfs_nodes(struct dentry *parent,
461 					  struct debugfs_entry *attrs[],
462 					  void *data,
463 					  size_t num)
464 {
465 	int i;
466 	struct dentry *ent;
467 
468 	if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
469 		return 0;
470 
471 	if (!parent)
472 		return -ENOENT;
473 
474 	for (i = 0; i < num; i++) {
475 		ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
476 					       parent, data, &attrs[i]->fops);
477 
478 		if (IS_ERR(ent))
479 			break;
480 	}
481 
482 	return i;
483 }
484 
thunderx_faddr_to_phys(u64 faddr,struct thunderx_lmc * lmc)485 static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
486 {
487 	phys_addr_t addr = 0;
488 	int bank, xbits;
489 
490 	addr |= lmc->node << 40;
491 	addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
492 	addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
493 	addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
494 	addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
495 
496 	bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
497 
498 	if (lmc->xor_bank)
499 		bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
500 
501 	addr |= bank << lmc->bank_lsb;
502 
503 	xbits = PCI_FUNC(lmc->pdev->devfn);
504 
505 	if (lmc->l2c_alias)
506 		xbits ^= get_bits(addr, 20, lmc->xbits) ^
507 			 get_bits(addr, 12, lmc->xbits);
508 
509 	addr |= xbits << 7;
510 
511 	return addr;
512 }
513 
thunderx_get_num_lmcs(unsigned int node)514 static unsigned int thunderx_get_num_lmcs(unsigned int node)
515 {
516 	unsigned int number = 0;
517 	struct pci_dev *pdev = NULL;
518 
519 	do {
520 		pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
521 				      PCI_DEVICE_ID_THUNDER_LMC,
522 				      pdev);
523 		if (pdev) {
524 #ifdef CONFIG_NUMA
525 			if (pdev->dev.numa_node == node)
526 				number++;
527 #else
528 			number++;
529 #endif
530 		}
531 	} while (pdev);
532 
533 	return number;
534 }
535 
536 #define LMC_MESSAGE_SIZE	120
537 #define LMC_OTHER_SIZE		(50 * ARRAY_SIZE(lmc_errors))
538 
thunderx_lmc_err_isr(int irq,void * dev_id)539 static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
540 {
541 	struct mem_ctl_info *mci = dev_id;
542 	struct thunderx_lmc *lmc = mci->pvt_info;
543 
544 	unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
545 	struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
546 
547 	writeq(0, lmc->regs + LMC_CHAR_MASK0);
548 	writeq(0, lmc->regs + LMC_CHAR_MASK2);
549 	writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
550 
551 	ctx->reg_int = readq(lmc->regs + LMC_INT);
552 	ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
553 	ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
554 	ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
555 	ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
556 
557 	lmc->ring_head++;
558 
559 	atomic_set(&lmc->ecc_int, 1);
560 
561 	/* Clear the interrupt */
562 	writeq(ctx->reg_int, lmc->regs + LMC_INT);
563 
564 	return IRQ_WAKE_THREAD;
565 }
566 
thunderx_lmc_threaded_isr(int irq,void * dev_id)567 static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
568 {
569 	struct mem_ctl_info *mci = dev_id;
570 	struct thunderx_lmc *lmc = mci->pvt_info;
571 	phys_addr_t phys_addr;
572 
573 	unsigned long tail;
574 	struct lmc_err_ctx *ctx;
575 
576 	irqreturn_t ret = IRQ_NONE;
577 
578 	char *msg;
579 	char *other;
580 
581 	msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
582 	other =  kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
583 
584 	if (!msg || !other)
585 		goto err_free;
586 
587 	while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
588 		ARRAY_SIZE(lmc->err_ctx))) {
589 		tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
590 
591 		ctx = &lmc->err_ctx[tail];
592 
593 		dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
594 			ctx->reg_int);
595 		dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
596 			ctx->reg_fadr);
597 		dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
598 			ctx->reg_nxm_fadr);
599 		dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
600 			ctx->reg_scram_fadr);
601 		dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
602 			ctx->reg_ecc_synd);
603 
604 		snprintf(msg, LMC_MESSAGE_SIZE,
605 			 "DIMM %lld rank %lld bank %lld row %lld col %lld",
606 			 LMC_FADR_FDIMM(ctx->reg_scram_fadr),
607 			 LMC_FADR_FBUNK(ctx->reg_scram_fadr),
608 			 LMC_FADR_FBANK(ctx->reg_scram_fadr),
609 			 LMC_FADR_FROW(ctx->reg_scram_fadr),
610 			 LMC_FADR_FCOL(ctx->reg_scram_fadr));
611 
612 		decode_register(other, LMC_OTHER_SIZE, lmc_errors,
613 				ctx->reg_int);
614 
615 		phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
616 
617 		if (ctx->reg_int & LMC_INT_UE)
618 			edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
619 					     phys_to_pfn(phys_addr),
620 					     offset_in_page(phys_addr),
621 					     0, -1, -1, -1, msg, other);
622 		else if (ctx->reg_int & LMC_INT_CE)
623 			edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
624 					     phys_to_pfn(phys_addr),
625 					     offset_in_page(phys_addr),
626 					     0, -1, -1, -1, msg, other);
627 
628 		lmc->ring_tail++;
629 	}
630 
631 	ret = IRQ_HANDLED;
632 
633 err_free:
634 	kfree(msg);
635 	kfree(other);
636 
637 	return ret;
638 }
639 
640 static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
641 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
642 	{ 0, },
643 };
644 
pci_dev_to_mc_idx(struct pci_dev * pdev)645 static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
646 {
647 	int node = dev_to_node(&pdev->dev);
648 	int ret = PCI_FUNC(pdev->devfn);
649 
650 	ret += max(node, 0) << 3;
651 
652 	return ret;
653 }
654 
thunderx_lmc_probe(struct pci_dev * pdev,const struct pci_device_id * id)655 static int thunderx_lmc_probe(struct pci_dev *pdev,
656 				const struct pci_device_id *id)
657 {
658 	struct thunderx_lmc *lmc;
659 	struct edac_mc_layer layer;
660 	struct mem_ctl_info *mci;
661 	u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
662 	int ret;
663 	u64 lmc_int;
664 	void *l2c_ioaddr;
665 
666 	layer.type = EDAC_MC_LAYER_SLOT;
667 	layer.size = 2;
668 	layer.is_virt_csrow = false;
669 
670 	ret = pcim_enable_device(pdev);
671 	if (ret) {
672 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
673 		return ret;
674 	}
675 
676 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
677 	if (ret) {
678 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
679 		return ret;
680 	}
681 
682 	mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
683 			    sizeof(struct thunderx_lmc));
684 	if (!mci)
685 		return -ENOMEM;
686 
687 	mci->pdev = &pdev->dev;
688 	lmc = mci->pvt_info;
689 
690 	pci_set_drvdata(pdev, mci);
691 
692 	lmc->regs = pcim_iomap_table(pdev)[0];
693 
694 	lmc_control = readq(lmc->regs + LMC_CONTROL);
695 	lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
696 	lmc_config = readq(lmc->regs + LMC_CONFIG);
697 
698 	if (lmc_control & LMC_CONTROL_RDIMM) {
699 		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
700 					   lmc_ddr_pll_ctl) ?
701 				MEM_RDDR4 : MEM_RDDR3;
702 	} else {
703 		mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
704 					   lmc_ddr_pll_ctl) ?
705 				MEM_DDR4 : MEM_DDR3;
706 	}
707 
708 	mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
709 	mci->edac_cap = EDAC_FLAG_SECDED;
710 
711 	mci->mod_name = "thunderx-lmc";
712 	mci->ctl_name = "thunderx-lmc";
713 	mci->dev_name = dev_name(&pdev->dev);
714 	mci->scrub_mode = SCRUB_NONE;
715 
716 	lmc->pdev = pdev;
717 	lmc->msix_ent.entry = 0;
718 
719 	lmc->ring_head = 0;
720 	lmc->ring_tail = 0;
721 
722 	ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
723 	if (ret) {
724 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
725 		goto err_free;
726 	}
727 
728 	ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
729 					thunderx_lmc_err_isr,
730 					thunderx_lmc_threaded_isr, 0,
731 					"[EDAC] ThunderX LMC", mci);
732 	if (ret) {
733 		dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
734 		goto err_free;
735 	}
736 
737 	lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
738 
739 	lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
740 	lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
741 			   FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
742 
743 	lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
744 	lmc->dimm_lsb  = 28 + lmc->pbank_lsb + lmc->xbits;
745 	lmc->rank_lsb = lmc->dimm_lsb;
746 	lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
747 	lmc->bank_lsb = 7 + lmc->xbits;
748 	lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
749 
750 	lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
751 
752 	lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
753 
754 	l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
755 	if (!l2c_ioaddr) {
756 		dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
757 		ret = -ENOMEM;
758 		goto err_free;
759 	}
760 
761 	lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
762 
763 	iounmap(l2c_ioaddr);
764 
765 	ret = edac_mc_add_mc(mci);
766 	if (ret) {
767 		dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
768 		goto err_free;
769 	}
770 
771 	lmc_int = readq(lmc->regs + LMC_INT);
772 	writeq(lmc_int, lmc->regs + LMC_INT);
773 
774 	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
775 
776 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
777 		ret = thunderx_create_debugfs_nodes(mci->debugfs,
778 						    lmc_dfs_ents,
779 						    lmc,
780 						    ARRAY_SIZE(lmc_dfs_ents));
781 
782 		if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
783 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
784 				 ret, ret >= 0 ? " created" : "");
785 		}
786 	}
787 
788 	return 0;
789 
790 err_free:
791 	pci_set_drvdata(pdev, NULL);
792 	edac_mc_free(mci);
793 
794 	return ret;
795 }
796 
thunderx_lmc_remove(struct pci_dev * pdev)797 static void thunderx_lmc_remove(struct pci_dev *pdev)
798 {
799 	struct mem_ctl_info *mci = pci_get_drvdata(pdev);
800 	struct thunderx_lmc *lmc = mci->pvt_info;
801 
802 	writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
803 
804 	edac_mc_del_mc(&pdev->dev);
805 	edac_mc_free(mci);
806 }
807 
808 MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
809 
810 static struct pci_driver thunderx_lmc_driver = {
811 	.name     = "thunderx_lmc_edac",
812 	.probe    = thunderx_lmc_probe,
813 	.remove   = thunderx_lmc_remove,
814 	.id_table = thunderx_lmc_pci_tbl,
815 };
816 
817 /*---------------------- OCX driver ---------------------------------*/
818 
819 #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
820 
821 #define OCX_LINK_INTS		3
822 #define OCX_INTS		(OCX_LINK_INTS + 1)
823 #define OCX_RX_LANES		24
824 #define OCX_RX_LANE_STATS	15
825 
826 #define OCX_COM_INT		0x100
827 #define OCX_COM_INT_W1S		0x108
828 #define OCX_COM_INT_ENA_W1S	0x110
829 #define OCX_COM_INT_ENA_W1C	0x118
830 
831 #define OCX_COM_IO_BADID		BIT(54)
832 #define OCX_COM_MEM_BADID		BIT(53)
833 #define OCX_COM_COPR_BADID		BIT(52)
834 #define OCX_COM_WIN_REQ_BADID		BIT(51)
835 #define OCX_COM_WIN_REQ_TOUT		BIT(50)
836 #define OCX_COM_RX_LANE			GENMASK(23, 0)
837 
838 #define OCX_COM_INT_CE			(OCX_COM_IO_BADID      | \
839 					 OCX_COM_MEM_BADID     | \
840 					 OCX_COM_COPR_BADID    | \
841 					 OCX_COM_WIN_REQ_BADID | \
842 					 OCX_COM_WIN_REQ_TOUT)
843 
844 static const struct error_descr ocx_com_errors[] = {
845 	{
846 		.type  = ERR_CORRECTED,
847 		.mask  = OCX_COM_IO_BADID,
848 		.descr = "Invalid IO transaction node ID",
849 	},
850 	{
851 		.type  = ERR_CORRECTED,
852 		.mask  = OCX_COM_MEM_BADID,
853 		.descr = "Invalid memory transaction node ID",
854 	},
855 	{
856 		.type  = ERR_CORRECTED,
857 		.mask  = OCX_COM_COPR_BADID,
858 		.descr = "Invalid coprocessor transaction node ID",
859 	},
860 	{
861 		.type  = ERR_CORRECTED,
862 		.mask  = OCX_COM_WIN_REQ_BADID,
863 		.descr = "Invalid SLI transaction node ID",
864 	},
865 	{
866 		.type  = ERR_CORRECTED,
867 		.mask  = OCX_COM_WIN_REQ_TOUT,
868 		.descr = "Window/core request timeout",
869 	},
870 	{0, 0, NULL},
871 };
872 
873 #define OCX_COM_LINKX_INT(x)		(0x120 + (x) * 8)
874 #define OCX_COM_LINKX_INT_W1S(x)	(0x140 + (x) * 8)
875 #define OCX_COM_LINKX_INT_ENA_W1S(x)	(0x160 + (x) * 8)
876 #define OCX_COM_LINKX_INT_ENA_W1C(x)	(0x180 + (x) * 8)
877 
878 #define OCX_COM_LINK_BAD_WORD			BIT(13)
879 #define OCX_COM_LINK_ALIGN_FAIL			BIT(12)
880 #define OCX_COM_LINK_ALIGN_DONE			BIT(11)
881 #define OCX_COM_LINK_UP				BIT(10)
882 #define OCX_COM_LINK_STOP			BIT(9)
883 #define OCX_COM_LINK_BLK_ERR			BIT(8)
884 #define OCX_COM_LINK_REINIT			BIT(7)
885 #define OCX_COM_LINK_LNK_DATA			BIT(6)
886 #define OCX_COM_LINK_RXFIFO_DBE			BIT(5)
887 #define OCX_COM_LINK_RXFIFO_SBE			BIT(4)
888 #define OCX_COM_LINK_TXFIFO_DBE			BIT(3)
889 #define OCX_COM_LINK_TXFIFO_SBE			BIT(2)
890 #define OCX_COM_LINK_REPLAY_DBE			BIT(1)
891 #define OCX_COM_LINK_REPLAY_SBE			BIT(0)
892 
893 static const struct error_descr ocx_com_link_errors[] = {
894 	{
895 		.type  = ERR_CORRECTED,
896 		.mask  = OCX_COM_LINK_REPLAY_SBE,
897 		.descr = "Replay buffer single-bit error",
898 	},
899 	{
900 		.type  = ERR_CORRECTED,
901 		.mask  = OCX_COM_LINK_TXFIFO_SBE,
902 		.descr = "TX FIFO single-bit error",
903 	},
904 	{
905 		.type  = ERR_CORRECTED,
906 		.mask  = OCX_COM_LINK_RXFIFO_SBE,
907 		.descr = "RX FIFO single-bit error",
908 	},
909 	{
910 		.type  = ERR_CORRECTED,
911 		.mask  = OCX_COM_LINK_BLK_ERR,
912 		.descr = "Block code error",
913 	},
914 	{
915 		.type  = ERR_CORRECTED,
916 		.mask  = OCX_COM_LINK_ALIGN_FAIL,
917 		.descr = "Link alignment failure",
918 	},
919 	{
920 		.type  = ERR_CORRECTED,
921 		.mask  = OCX_COM_LINK_BAD_WORD,
922 		.descr = "Bad code word",
923 	},
924 	{
925 		.type  = ERR_UNCORRECTED,
926 		.mask  = OCX_COM_LINK_REPLAY_DBE,
927 		.descr = "Replay buffer double-bit error",
928 	},
929 	{
930 		.type  = ERR_UNCORRECTED,
931 		.mask  = OCX_COM_LINK_TXFIFO_DBE,
932 		.descr = "TX FIFO double-bit error",
933 	},
934 	{
935 		.type  = ERR_UNCORRECTED,
936 		.mask  = OCX_COM_LINK_RXFIFO_DBE,
937 		.descr = "RX FIFO double-bit error",
938 	},
939 	{
940 		.type  = ERR_UNCORRECTED,
941 		.mask  = OCX_COM_LINK_STOP,
942 		.descr = "Link stopped",
943 	},
944 	{0, 0, NULL},
945 };
946 
947 #define OCX_COM_LINK_INT_UE       (OCX_COM_LINK_REPLAY_DBE | \
948 				   OCX_COM_LINK_TXFIFO_DBE | \
949 				   OCX_COM_LINK_RXFIFO_DBE | \
950 				   OCX_COM_LINK_STOP)
951 
952 #define OCX_COM_LINK_INT_CE       (OCX_COM_LINK_REPLAY_SBE | \
953 				   OCX_COM_LINK_TXFIFO_SBE | \
954 				   OCX_COM_LINK_RXFIFO_SBE | \
955 				   OCX_COM_LINK_BLK_ERR    | \
956 				   OCX_COM_LINK_ALIGN_FAIL | \
957 				   OCX_COM_LINK_BAD_WORD)
958 
959 #define OCX_LNE_INT(x)			(0x8018 + (x) * 0x100)
960 #define OCX_LNE_INT_EN(x)		(0x8020 + (x) * 0x100)
961 #define OCX_LNE_BAD_CNT(x)		(0x8028 + (x) * 0x100)
962 #define OCX_LNE_CFG(x)			(0x8000 + (x) * 0x100)
963 #define OCX_LNE_STAT(x, y)		(0x8040 + (x) * 0x100 + (y) * 8)
964 
965 #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS		BIT(8)
966 #define OCX_LNE_CFG_RX_STAT_WRAP_DIS		BIT(2)
967 #define OCX_LNE_CFG_RX_STAT_RDCLR		BIT(1)
968 #define OCX_LNE_CFG_RX_STAT_ENA			BIT(0)
969 
970 
971 #define OCX_LANE_BAD_64B67B			BIT(8)
972 #define OCX_LANE_DSKEW_FIFO_OVFL		BIT(5)
973 #define OCX_LANE_SCRM_SYNC_LOSS			BIT(4)
974 #define OCX_LANE_UKWN_CNTL_WORD			BIT(3)
975 #define OCX_LANE_CRC32_ERR			BIT(2)
976 #define OCX_LANE_BDRY_SYNC_LOSS			BIT(1)
977 #define OCX_LANE_SERDES_LOCK_LOSS		BIT(0)
978 
979 #define OCX_COM_LANE_INT_UE       (0)
980 #define OCX_COM_LANE_INT_CE       (OCX_LANE_SERDES_LOCK_LOSS | \
981 				   OCX_LANE_BDRY_SYNC_LOSS   | \
982 				   OCX_LANE_CRC32_ERR        | \
983 				   OCX_LANE_UKWN_CNTL_WORD   | \
984 				   OCX_LANE_SCRM_SYNC_LOSS   | \
985 				   OCX_LANE_DSKEW_FIFO_OVFL  | \
986 				   OCX_LANE_BAD_64B67B)
987 
988 static const struct error_descr ocx_lane_errors[] = {
989 	{
990 		.type  = ERR_CORRECTED,
991 		.mask  = OCX_LANE_SERDES_LOCK_LOSS,
992 		.descr = "RX SerDes lock lost",
993 	},
994 	{
995 		.type  = ERR_CORRECTED,
996 		.mask  = OCX_LANE_BDRY_SYNC_LOSS,
997 		.descr = "RX word boundary lost",
998 	},
999 	{
1000 		.type  = ERR_CORRECTED,
1001 		.mask  = OCX_LANE_CRC32_ERR,
1002 		.descr = "CRC32 error",
1003 	},
1004 	{
1005 		.type  = ERR_CORRECTED,
1006 		.mask  = OCX_LANE_UKWN_CNTL_WORD,
1007 		.descr = "Unknown control word",
1008 	},
1009 	{
1010 		.type  = ERR_CORRECTED,
1011 		.mask  = OCX_LANE_SCRM_SYNC_LOSS,
1012 		.descr = "Scrambler synchronization lost",
1013 	},
1014 	{
1015 		.type  = ERR_CORRECTED,
1016 		.mask  = OCX_LANE_DSKEW_FIFO_OVFL,
1017 		.descr = "RX deskew FIFO overflow",
1018 	},
1019 	{
1020 		.type  = ERR_CORRECTED,
1021 		.mask  = OCX_LANE_BAD_64B67B,
1022 		.descr = "Bad 64B/67B codeword",
1023 	},
1024 	{0, 0, NULL},
1025 };
1026 
1027 #define OCX_LNE_INT_ENA_ALL		(GENMASK(9, 8) | GENMASK(6, 0))
1028 #define OCX_COM_INT_ENA_ALL		(GENMASK(54, 50) | GENMASK(23, 0))
1029 #define OCX_COM_LINKX_INT_ENA_ALL	(GENMASK(13, 12) | \
1030 					 GENMASK(9, 7) | GENMASK(5, 0))
1031 
1032 #define OCX_TLKX_ECC_CTL(x)		(0x10018 + (x) * 0x2000)
1033 #define OCX_RLKX_ECC_CTL(x)		(0x18018 + (x) * 0x2000)
1034 
1035 struct ocx_com_err_ctx {
1036 	u64 reg_com_int;
1037 	u64 reg_lane_int[OCX_RX_LANES];
1038 	u64 reg_lane_stat11[OCX_RX_LANES];
1039 };
1040 
1041 struct ocx_link_err_ctx {
1042 	u64 reg_com_link_int;
1043 	int link;
1044 };
1045 
1046 struct thunderx_ocx {
1047 	void __iomem *regs;
1048 	int com_link;
1049 	struct pci_dev *pdev;
1050 	struct edac_device_ctl_info *edac_dev;
1051 
1052 	struct dentry *debugfs;
1053 	struct msix_entry msix_ent[OCX_INTS];
1054 
1055 	struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
1056 	struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
1057 
1058 	unsigned long com_ring_head;
1059 	unsigned long com_ring_tail;
1060 
1061 	unsigned long link_ring_head;
1062 	unsigned long link_ring_tail;
1063 };
1064 
1065 #define OCX_MESSAGE_SIZE	SZ_1K
1066 #define OCX_OTHER_SIZE		(50 * ARRAY_SIZE(ocx_com_link_errors))
1067 
1068 /* This handler is threaded */
thunderx_ocx_com_isr(int irq,void * irq_id)1069 static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
1070 {
1071 	struct msix_entry *msix = irq_id;
1072 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1073 						msix_ent[msix->entry]);
1074 
1075 	int lane;
1076 	unsigned long head = ring_pos(ocx->com_ring_head,
1077 				      ARRAY_SIZE(ocx->com_err_ctx));
1078 	struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
1079 
1080 	ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
1081 
1082 	for (lane = 0; lane < OCX_RX_LANES; lane++) {
1083 		ctx->reg_lane_int[lane] =
1084 			readq(ocx->regs + OCX_LNE_INT(lane));
1085 		ctx->reg_lane_stat11[lane] =
1086 			readq(ocx->regs + OCX_LNE_STAT(lane, 11));
1087 
1088 		writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
1089 	}
1090 
1091 	writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
1092 
1093 	ocx->com_ring_head++;
1094 
1095 	return IRQ_WAKE_THREAD;
1096 }
1097 
thunderx_ocx_com_threaded_isr(int irq,void * irq_id)1098 static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
1099 {
1100 	struct msix_entry *msix = irq_id;
1101 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1102 						msix_ent[msix->entry]);
1103 
1104 	irqreturn_t ret = IRQ_NONE;
1105 
1106 	unsigned long tail;
1107 	struct ocx_com_err_ctx *ctx;
1108 	int lane;
1109 	char *msg;
1110 	char *other;
1111 
1112 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1113 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1114 
1115 	if (!msg || !other)
1116 		goto err_free;
1117 
1118 	while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
1119 			ARRAY_SIZE(ocx->com_err_ctx))) {
1120 		tail = ring_pos(ocx->com_ring_tail,
1121 				ARRAY_SIZE(ocx->com_err_ctx));
1122 		ctx = &ocx->com_err_ctx[tail];
1123 
1124 		snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
1125 			ocx->edac_dev->ctl_name, ctx->reg_com_int);
1126 
1127 		decode_register(other, OCX_OTHER_SIZE,
1128 				ocx_com_errors, ctx->reg_com_int);
1129 
1130 		strlcat(msg, other, OCX_MESSAGE_SIZE);
1131 
1132 		for (lane = 0; lane < OCX_RX_LANES; lane++)
1133 			if (ctx->reg_com_int & BIT(lane)) {
1134 				snprintf(other, OCX_OTHER_SIZE,
1135 					 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
1136 					 lane, ctx->reg_lane_int[lane],
1137 					 lane, ctx->reg_lane_stat11[lane]);
1138 
1139 				strlcat(msg, other, OCX_MESSAGE_SIZE);
1140 
1141 				decode_register(other, OCX_OTHER_SIZE,
1142 						ocx_lane_errors,
1143 						ctx->reg_lane_int[lane]);
1144 				strlcat(msg, other, OCX_MESSAGE_SIZE);
1145 			}
1146 
1147 		if (ctx->reg_com_int & OCX_COM_INT_CE)
1148 			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1149 
1150 		ocx->com_ring_tail++;
1151 	}
1152 
1153 	ret = IRQ_HANDLED;
1154 
1155 err_free:
1156 	kfree(other);
1157 	kfree(msg);
1158 
1159 	return ret;
1160 }
1161 
thunderx_ocx_lnk_isr(int irq,void * irq_id)1162 static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
1163 {
1164 	struct msix_entry *msix = irq_id;
1165 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1166 						msix_ent[msix->entry]);
1167 	unsigned long head = ring_pos(ocx->link_ring_head,
1168 				      ARRAY_SIZE(ocx->link_err_ctx));
1169 	struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
1170 
1171 	ctx->link = msix->entry;
1172 	ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1173 
1174 	writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1175 
1176 	ocx->link_ring_head++;
1177 
1178 	return IRQ_WAKE_THREAD;
1179 }
1180 
thunderx_ocx_lnk_threaded_isr(int irq,void * irq_id)1181 static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
1182 {
1183 	struct msix_entry *msix = irq_id;
1184 	struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1185 						msix_ent[msix->entry]);
1186 	irqreturn_t ret = IRQ_NONE;
1187 	unsigned long tail;
1188 	struct ocx_link_err_ctx *ctx;
1189 
1190 	char *msg;
1191 	char *other;
1192 
1193 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1194 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1195 
1196 	if (!msg || !other)
1197 		goto err_free;
1198 
1199 	while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
1200 			ARRAY_SIZE(ocx->link_err_ctx))) {
1201 		tail = ring_pos(ocx->link_ring_head,
1202 				ARRAY_SIZE(ocx->link_err_ctx));
1203 
1204 		ctx = &ocx->link_err_ctx[tail];
1205 
1206 		snprintf(msg, OCX_MESSAGE_SIZE,
1207 			 "%s: OCX_COM_LINK_INT[%d]: %016llx",
1208 			 ocx->edac_dev->ctl_name,
1209 			 ctx->link, ctx->reg_com_link_int);
1210 
1211 		decode_register(other, OCX_OTHER_SIZE,
1212 				ocx_com_link_errors, ctx->reg_com_link_int);
1213 
1214 		strlcat(msg, other, OCX_MESSAGE_SIZE);
1215 
1216 		if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
1217 			edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
1218 		else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
1219 			edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1220 
1221 		ocx->link_ring_tail++;
1222 	}
1223 
1224 	ret = IRQ_HANDLED;
1225 err_free:
1226 	kfree(other);
1227 	kfree(msg);
1228 
1229 	return ret;
1230 }
1231 
1232 #define OCX_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(ocx, _name, _reg)
1233 
1234 OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
1235 OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
1236 OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
1237 
1238 OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
1239 OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
1240 OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
1241 
1242 OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
1243 OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
1244 OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
1245 
1246 OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
1247 OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
1248 OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
1249 OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
1250 OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
1251 OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
1252 OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
1253 OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
1254 
1255 OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
1256 OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
1257 OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
1258 OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
1259 OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
1260 OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
1261 OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
1262 OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
1263 
1264 OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
1265 OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
1266 OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
1267 OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
1268 OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
1269 OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
1270 OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
1271 OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
1272 
1273 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
1274 
1275 static struct debugfs_entry *ocx_dfs_ents[] = {
1276 	&debugfs_tlk0_ecc_ctl,
1277 	&debugfs_tlk1_ecc_ctl,
1278 	&debugfs_tlk2_ecc_ctl,
1279 
1280 	&debugfs_rlk0_ecc_ctl,
1281 	&debugfs_rlk1_ecc_ctl,
1282 	&debugfs_rlk2_ecc_ctl,
1283 
1284 	&debugfs_com_link0_int,
1285 	&debugfs_com_link1_int,
1286 	&debugfs_com_link2_int,
1287 
1288 	&debugfs_lne00_badcnt,
1289 	&debugfs_lne01_badcnt,
1290 	&debugfs_lne02_badcnt,
1291 	&debugfs_lne03_badcnt,
1292 	&debugfs_lne04_badcnt,
1293 	&debugfs_lne05_badcnt,
1294 	&debugfs_lne06_badcnt,
1295 	&debugfs_lne07_badcnt,
1296 	&debugfs_lne08_badcnt,
1297 	&debugfs_lne09_badcnt,
1298 	&debugfs_lne10_badcnt,
1299 	&debugfs_lne11_badcnt,
1300 	&debugfs_lne12_badcnt,
1301 	&debugfs_lne13_badcnt,
1302 	&debugfs_lne14_badcnt,
1303 	&debugfs_lne15_badcnt,
1304 	&debugfs_lne16_badcnt,
1305 	&debugfs_lne17_badcnt,
1306 	&debugfs_lne18_badcnt,
1307 	&debugfs_lne19_badcnt,
1308 	&debugfs_lne20_badcnt,
1309 	&debugfs_lne21_badcnt,
1310 	&debugfs_lne22_badcnt,
1311 	&debugfs_lne23_badcnt,
1312 
1313 	&debugfs_com_int,
1314 };
1315 
1316 static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
1317 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
1318 	{ 0, },
1319 };
1320 
thunderx_ocx_clearstats(struct thunderx_ocx * ocx)1321 static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
1322 {
1323 	int lane, stat, cfg;
1324 
1325 	for (lane = 0; lane < OCX_RX_LANES; lane++) {
1326 		cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
1327 		cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
1328 		cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
1329 		writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
1330 
1331 		for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
1332 			readq(ocx->regs + OCX_LNE_STAT(lane, stat));
1333 	}
1334 }
1335 
thunderx_ocx_probe(struct pci_dev * pdev,const struct pci_device_id * id)1336 static int thunderx_ocx_probe(struct pci_dev *pdev,
1337 			      const struct pci_device_id *id)
1338 {
1339 	struct thunderx_ocx *ocx;
1340 	struct edac_device_ctl_info *edac_dev;
1341 	char name[32];
1342 	int idx;
1343 	int i;
1344 	int ret;
1345 	u64 reg;
1346 
1347 	ret = pcim_enable_device(pdev);
1348 	if (ret) {
1349 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1350 		return ret;
1351 	}
1352 
1353 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
1354 	if (ret) {
1355 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1356 		return ret;
1357 	}
1358 
1359 	idx = edac_device_alloc_index();
1360 	snprintf(name, sizeof(name), "OCX%d", idx);
1361 	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
1362 					      name, 1, "CCPI", 1, 0, idx);
1363 	if (!edac_dev) {
1364 		dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
1365 		return -ENOMEM;
1366 	}
1367 	ocx = edac_dev->pvt_info;
1368 	ocx->edac_dev = edac_dev;
1369 	ocx->com_ring_head = 0;
1370 	ocx->com_ring_tail = 0;
1371 	ocx->link_ring_head = 0;
1372 	ocx->link_ring_tail = 0;
1373 
1374 	ocx->regs = pcim_iomap_table(pdev)[0];
1375 	if (!ocx->regs) {
1376 		dev_err(&pdev->dev, "Cannot map PCI resources\n");
1377 		ret = -ENODEV;
1378 		goto err_free;
1379 	}
1380 
1381 	ocx->pdev = pdev;
1382 
1383 	for (i = 0; i < OCX_INTS; i++) {
1384 		ocx->msix_ent[i].entry = i;
1385 		ocx->msix_ent[i].vector = 0;
1386 	}
1387 
1388 	ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
1389 	if (ret) {
1390 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
1391 		goto err_free;
1392 	}
1393 
1394 	for (i = 0; i < OCX_INTS; i++) {
1395 		ret = devm_request_threaded_irq(&pdev->dev,
1396 						ocx->msix_ent[i].vector,
1397 						(i == 3) ?
1398 						 thunderx_ocx_com_isr :
1399 						 thunderx_ocx_lnk_isr,
1400 						(i == 3) ?
1401 						 thunderx_ocx_com_threaded_isr :
1402 						 thunderx_ocx_lnk_threaded_isr,
1403 						0, "[EDAC] ThunderX OCX",
1404 						&ocx->msix_ent[i]);
1405 		if (ret)
1406 			goto err_free;
1407 	}
1408 
1409 	edac_dev->dev = &pdev->dev;
1410 	edac_dev->dev_name = dev_name(&pdev->dev);
1411 	edac_dev->mod_name = "thunderx-ocx";
1412 	edac_dev->ctl_name = "thunderx-ocx";
1413 
1414 	ret = edac_device_add_device(edac_dev);
1415 	if (ret) {
1416 		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
1417 		goto err_free;
1418 	}
1419 
1420 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
1421 		ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1422 
1423 		ret = thunderx_create_debugfs_nodes(ocx->debugfs,
1424 						    ocx_dfs_ents,
1425 						    ocx,
1426 						    ARRAY_SIZE(ocx_dfs_ents));
1427 		if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
1428 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
1429 				 ret, ret >= 0 ? " created" : "");
1430 		}
1431 	}
1432 
1433 	pci_set_drvdata(pdev, edac_dev);
1434 
1435 	thunderx_ocx_clearstats(ocx);
1436 
1437 	for (i = 0; i < OCX_RX_LANES; i++) {
1438 		writeq(OCX_LNE_INT_ENA_ALL,
1439 		       ocx->regs + OCX_LNE_INT_EN(i));
1440 
1441 		reg = readq(ocx->regs + OCX_LNE_INT(i));
1442 		writeq(reg, ocx->regs + OCX_LNE_INT(i));
1443 
1444 	}
1445 
1446 	for (i = 0; i < OCX_LINK_INTS; i++) {
1447 		reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
1448 		writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
1449 
1450 		writeq(OCX_COM_LINKX_INT_ENA_ALL,
1451 		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
1452 	}
1453 
1454 	reg = readq(ocx->regs + OCX_COM_INT);
1455 	writeq(reg, ocx->regs + OCX_COM_INT);
1456 
1457 	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
1458 
1459 	return 0;
1460 err_free:
1461 	edac_device_free_ctl_info(edac_dev);
1462 
1463 	return ret;
1464 }
1465 
thunderx_ocx_remove(struct pci_dev * pdev)1466 static void thunderx_ocx_remove(struct pci_dev *pdev)
1467 {
1468 	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
1469 	struct thunderx_ocx *ocx = edac_dev->pvt_info;
1470 	int i;
1471 
1472 	writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
1473 
1474 	for (i = 0; i < OCX_INTS; i++) {
1475 		writeq(OCX_COM_LINKX_INT_ENA_ALL,
1476 		       ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
1477 	}
1478 
1479 	edac_debugfs_remove_recursive(ocx->debugfs);
1480 
1481 	edac_device_del_device(&pdev->dev);
1482 	edac_device_free_ctl_info(edac_dev);
1483 }
1484 
1485 MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
1486 
1487 static struct pci_driver thunderx_ocx_driver = {
1488 	.name     = "thunderx_ocx_edac",
1489 	.probe    = thunderx_ocx_probe,
1490 	.remove   = thunderx_ocx_remove,
1491 	.id_table = thunderx_ocx_pci_tbl,
1492 };
1493 
1494 /*---------------------- L2C driver ---------------------------------*/
1495 
1496 #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
1497 #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
1498 #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
1499 
1500 #define L2C_TAD_INT_W1C		0x40000
1501 #define L2C_TAD_INT_W1S		0x40008
1502 
1503 #define L2C_TAD_INT_ENA_W1C	0x40020
1504 #define L2C_TAD_INT_ENA_W1S	0x40028
1505 
1506 
1507 #define L2C_TAD_INT_L2DDBE	 BIT(1)
1508 #define L2C_TAD_INT_SBFSBE	 BIT(2)
1509 #define L2C_TAD_INT_SBFDBE	 BIT(3)
1510 #define L2C_TAD_INT_FBFSBE	 BIT(4)
1511 #define L2C_TAD_INT_FBFDBE	 BIT(5)
1512 #define L2C_TAD_INT_TAGDBE	 BIT(9)
1513 #define L2C_TAD_INT_RDDISLMC	 BIT(15)
1514 #define L2C_TAD_INT_WRDISLMC	 BIT(16)
1515 #define L2C_TAD_INT_LFBTO	 BIT(17)
1516 #define L2C_TAD_INT_GSYNCTO	 BIT(18)
1517 #define L2C_TAD_INT_RTGSBE	 BIT(32)
1518 #define L2C_TAD_INT_RTGDBE	 BIT(33)
1519 #define L2C_TAD_INT_RDDISOCI	 BIT(34)
1520 #define L2C_TAD_INT_WRDISOCI	 BIT(35)
1521 
1522 #define L2C_TAD_INT_ECC		(L2C_TAD_INT_L2DDBE | \
1523 				 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
1524 				 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
1525 
1526 #define L2C_TAD_INT_CE          (L2C_TAD_INT_SBFSBE | \
1527 				 L2C_TAD_INT_FBFSBE)
1528 
1529 #define L2C_TAD_INT_UE          (L2C_TAD_INT_L2DDBE | \
1530 				 L2C_TAD_INT_SBFDBE | \
1531 				 L2C_TAD_INT_FBFDBE | \
1532 				 L2C_TAD_INT_TAGDBE | \
1533 				 L2C_TAD_INT_RTGDBE | \
1534 				 L2C_TAD_INT_WRDISOCI | \
1535 				 L2C_TAD_INT_RDDISOCI | \
1536 				 L2C_TAD_INT_WRDISLMC | \
1537 				 L2C_TAD_INT_RDDISLMC | \
1538 				 L2C_TAD_INT_LFBTO    | \
1539 				 L2C_TAD_INT_GSYNCTO)
1540 
1541 static const struct error_descr l2_tad_errors[] = {
1542 	{
1543 		.type  = ERR_CORRECTED,
1544 		.mask  = L2C_TAD_INT_SBFSBE,
1545 		.descr = "SBF single-bit error",
1546 	},
1547 	{
1548 		.type  = ERR_CORRECTED,
1549 		.mask  = L2C_TAD_INT_FBFSBE,
1550 		.descr = "FBF single-bit error",
1551 	},
1552 	{
1553 		.type  = ERR_UNCORRECTED,
1554 		.mask  = L2C_TAD_INT_L2DDBE,
1555 		.descr = "L2D double-bit error",
1556 	},
1557 	{
1558 		.type  = ERR_UNCORRECTED,
1559 		.mask  = L2C_TAD_INT_SBFDBE,
1560 		.descr = "SBF double-bit error",
1561 	},
1562 	{
1563 		.type  = ERR_UNCORRECTED,
1564 		.mask  = L2C_TAD_INT_FBFDBE,
1565 		.descr = "FBF double-bit error",
1566 	},
1567 	{
1568 		.type  = ERR_UNCORRECTED,
1569 		.mask  = L2C_TAD_INT_TAGDBE,
1570 		.descr = "TAG double-bit error",
1571 	},
1572 	{
1573 		.type  = ERR_UNCORRECTED,
1574 		.mask  = L2C_TAD_INT_RTGDBE,
1575 		.descr = "RTG double-bit error",
1576 	},
1577 	{
1578 		.type  = ERR_UNCORRECTED,
1579 		.mask  = L2C_TAD_INT_WRDISOCI,
1580 		.descr = "Write to a disabled CCPI",
1581 	},
1582 	{
1583 		.type  = ERR_UNCORRECTED,
1584 		.mask  = L2C_TAD_INT_RDDISOCI,
1585 		.descr = "Read from a disabled CCPI",
1586 	},
1587 	{
1588 		.type  = ERR_UNCORRECTED,
1589 		.mask  = L2C_TAD_INT_WRDISLMC,
1590 		.descr = "Write to a disabled LMC",
1591 	},
1592 	{
1593 		.type  = ERR_UNCORRECTED,
1594 		.mask  = L2C_TAD_INT_RDDISLMC,
1595 		.descr = "Read from a disabled LMC",
1596 	},
1597 	{
1598 		.type  = ERR_UNCORRECTED,
1599 		.mask  = L2C_TAD_INT_LFBTO,
1600 		.descr = "LFB entry timeout",
1601 	},
1602 	{
1603 		.type  = ERR_UNCORRECTED,
1604 		.mask  = L2C_TAD_INT_GSYNCTO,
1605 		.descr = "Global sync CCPI timeout",
1606 	},
1607 	{0, 0, NULL},
1608 };
1609 
1610 #define L2C_TAD_INT_TAG		(L2C_TAD_INT_TAGDBE)
1611 
1612 #define L2C_TAD_INT_RTG		(L2C_TAD_INT_RTGDBE)
1613 
1614 #define L2C_TAD_INT_DISLMC	(L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
1615 
1616 #define L2C_TAD_INT_DISOCI	(L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
1617 
1618 #define L2C_TAD_INT_ENA_ALL	(L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
1619 				 L2C_TAD_INT_RTG | \
1620 				 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
1621 				 L2C_TAD_INT_LFBTO)
1622 
1623 #define L2C_TAD_TIMETWO		0x50000
1624 #define L2C_TAD_TIMEOUT		0x50100
1625 #define L2C_TAD_ERR		0x60000
1626 #define L2C_TAD_TQD_ERR		0x60100
1627 #define L2C_TAD_TTG_ERR		0x60200
1628 
1629 
1630 #define L2C_CBC_INT_W1C		0x60000
1631 
1632 #define L2C_CBC_INT_RSDSBE	 BIT(0)
1633 #define L2C_CBC_INT_RSDDBE	 BIT(1)
1634 
1635 #define L2C_CBC_INT_RSD		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
1636 
1637 #define L2C_CBC_INT_MIBSBE	 BIT(4)
1638 #define L2C_CBC_INT_MIBDBE	 BIT(5)
1639 
1640 #define L2C_CBC_INT_MIB		 (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
1641 
1642 #define L2C_CBC_INT_IORDDISOCI	 BIT(6)
1643 #define L2C_CBC_INT_IOWRDISOCI	 BIT(7)
1644 
1645 #define L2C_CBC_INT_IODISOCI	 (L2C_CBC_INT_IORDDISOCI | \
1646 				  L2C_CBC_INT_IOWRDISOCI)
1647 
1648 #define L2C_CBC_INT_CE		 (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
1649 #define L2C_CBC_INT_UE		 (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
1650 
1651 
1652 static const struct error_descr l2_cbc_errors[] = {
1653 	{
1654 		.type  = ERR_CORRECTED,
1655 		.mask  = L2C_CBC_INT_RSDSBE,
1656 		.descr = "RSD single-bit error",
1657 	},
1658 	{
1659 		.type  = ERR_CORRECTED,
1660 		.mask  = L2C_CBC_INT_MIBSBE,
1661 		.descr = "MIB single-bit error",
1662 	},
1663 	{
1664 		.type  = ERR_UNCORRECTED,
1665 		.mask  = L2C_CBC_INT_RSDDBE,
1666 		.descr = "RSD double-bit error",
1667 	},
1668 	{
1669 		.type  = ERR_UNCORRECTED,
1670 		.mask  = L2C_CBC_INT_MIBDBE,
1671 		.descr = "MIB double-bit error",
1672 	},
1673 	{
1674 		.type  = ERR_UNCORRECTED,
1675 		.mask  = L2C_CBC_INT_IORDDISOCI,
1676 		.descr = "Read from a disabled CCPI",
1677 	},
1678 	{
1679 		.type  = ERR_UNCORRECTED,
1680 		.mask  = L2C_CBC_INT_IOWRDISOCI,
1681 		.descr = "Write to a disabled CCPI",
1682 	},
1683 	{0, 0, NULL},
1684 };
1685 
1686 #define L2C_CBC_INT_W1S		0x60008
1687 #define L2C_CBC_INT_ENA_W1C	0x60020
1688 
1689 #define L2C_CBC_INT_ENA_ALL	 (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
1690 				  L2C_CBC_INT_IODISOCI)
1691 
1692 #define L2C_CBC_INT_ENA_W1S	0x60028
1693 
1694 #define L2C_CBC_IODISOCIERR	0x80008
1695 #define L2C_CBC_IOCERR		0x80010
1696 #define L2C_CBC_RSDERR		0x80018
1697 #define L2C_CBC_MIBERR		0x80020
1698 
1699 
1700 #define L2C_MCI_INT_W1C		0x0
1701 
1702 #define L2C_MCI_INT_VBFSBE	 BIT(0)
1703 #define L2C_MCI_INT_VBFDBE	 BIT(1)
1704 
1705 static const struct error_descr l2_mci_errors[] = {
1706 	{
1707 		.type  = ERR_CORRECTED,
1708 		.mask  = L2C_MCI_INT_VBFSBE,
1709 		.descr = "VBF single-bit error",
1710 	},
1711 	{
1712 		.type  = ERR_UNCORRECTED,
1713 		.mask  = L2C_MCI_INT_VBFDBE,
1714 		.descr = "VBF double-bit error",
1715 	},
1716 	{0, 0, NULL},
1717 };
1718 
1719 #define L2C_MCI_INT_W1S		0x8
1720 #define L2C_MCI_INT_ENA_W1C	0x20
1721 
1722 #define L2C_MCI_INT_ENA_ALL	 (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
1723 
1724 #define L2C_MCI_INT_ENA_W1S	0x28
1725 
1726 #define L2C_MCI_ERR		0x10000
1727 
1728 #define L2C_MESSAGE_SIZE	SZ_1K
1729 #define L2C_OTHER_SIZE		(50 * ARRAY_SIZE(l2_tad_errors))
1730 
1731 struct l2c_err_ctx {
1732 	char *reg_ext_name;
1733 	u64  reg_int;
1734 	u64  reg_ext;
1735 };
1736 
1737 struct thunderx_l2c {
1738 	void __iomem *regs;
1739 	struct pci_dev *pdev;
1740 	struct edac_device_ctl_info *edac_dev;
1741 
1742 	struct dentry *debugfs;
1743 
1744 	int index;
1745 
1746 	struct msix_entry msix_ent;
1747 
1748 	struct l2c_err_ctx err_ctx[RING_ENTRIES];
1749 	unsigned long ring_head;
1750 	unsigned long ring_tail;
1751 };
1752 
thunderx_l2c_tad_isr(int irq,void * irq_id)1753 static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
1754 {
1755 	struct msix_entry *msix = irq_id;
1756 	struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
1757 						msix_ent);
1758 
1759 	unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
1760 	struct l2c_err_ctx *ctx = &tad->err_ctx[head];
1761 
1762 	ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
1763 
1764 	if (ctx->reg_int & L2C_TAD_INT_ECC) {
1765 		ctx->reg_ext_name = "TQD_ERR";
1766 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
1767 	} else if (ctx->reg_int & L2C_TAD_INT_TAG) {
1768 		ctx->reg_ext_name = "TTG_ERR";
1769 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
1770 	} else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
1771 		ctx->reg_ext_name = "TIMEOUT";
1772 		ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
1773 	} else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
1774 		ctx->reg_ext_name = "ERR";
1775 		ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
1776 	}
1777 
1778 	writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
1779 
1780 	tad->ring_head++;
1781 
1782 	return IRQ_WAKE_THREAD;
1783 }
1784 
thunderx_l2c_cbc_isr(int irq,void * irq_id)1785 static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
1786 {
1787 	struct msix_entry *msix = irq_id;
1788 	struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
1789 						msix_ent);
1790 
1791 	unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
1792 	struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
1793 
1794 	ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
1795 
1796 	if (ctx->reg_int & L2C_CBC_INT_RSD) {
1797 		ctx->reg_ext_name = "RSDERR";
1798 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
1799 	} else if (ctx->reg_int & L2C_CBC_INT_MIB) {
1800 		ctx->reg_ext_name = "MIBERR";
1801 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
1802 	} else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
1803 		ctx->reg_ext_name = "IODISOCIERR";
1804 		ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
1805 	}
1806 
1807 	writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
1808 
1809 	cbc->ring_head++;
1810 
1811 	return IRQ_WAKE_THREAD;
1812 }
1813 
thunderx_l2c_mci_isr(int irq,void * irq_id)1814 static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
1815 {
1816 	struct msix_entry *msix = irq_id;
1817 	struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
1818 						msix_ent);
1819 
1820 	unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
1821 	struct l2c_err_ctx *ctx = &mci->err_ctx[head];
1822 
1823 	ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
1824 	ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
1825 
1826 	writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
1827 
1828 	ctx->reg_ext_name = "ERR";
1829 
1830 	mci->ring_head++;
1831 
1832 	return IRQ_WAKE_THREAD;
1833 }
1834 
thunderx_l2c_threaded_isr(int irq,void * irq_id)1835 static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
1836 {
1837 	struct msix_entry *msix = irq_id;
1838 	struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
1839 						msix_ent);
1840 
1841 	unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
1842 	struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
1843 	irqreturn_t ret = IRQ_NONE;
1844 
1845 	u64 mask_ue, mask_ce;
1846 	const struct error_descr *l2_errors;
1847 	char *reg_int_name;
1848 
1849 	char *msg;
1850 	char *other;
1851 
1852 	msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1853 	other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1854 
1855 	if (!msg || !other)
1856 		goto err_free;
1857 
1858 	switch (l2c->pdev->device) {
1859 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1860 		reg_int_name = "L2C_TAD_INT";
1861 		mask_ue = L2C_TAD_INT_UE;
1862 		mask_ce = L2C_TAD_INT_CE;
1863 		l2_errors = l2_tad_errors;
1864 		break;
1865 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1866 		reg_int_name = "L2C_CBC_INT";
1867 		mask_ue = L2C_CBC_INT_UE;
1868 		mask_ce = L2C_CBC_INT_CE;
1869 		l2_errors = l2_cbc_errors;
1870 		break;
1871 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1872 		reg_int_name = "L2C_MCI_INT";
1873 		mask_ue = L2C_MCI_INT_VBFDBE;
1874 		mask_ce = L2C_MCI_INT_VBFSBE;
1875 		l2_errors = l2_mci_errors;
1876 		break;
1877 	default:
1878 		dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
1879 			l2c->pdev->device);
1880 		goto err_free;
1881 	}
1882 
1883 	while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
1884 			ARRAY_SIZE(l2c->err_ctx))) {
1885 		snprintf(msg, L2C_MESSAGE_SIZE,
1886 			 "%s: %s: %016llx, %s: %016llx",
1887 			 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
1888 			 ctx->reg_ext_name, ctx->reg_ext);
1889 
1890 		decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
1891 
1892 		strlcat(msg, other, L2C_MESSAGE_SIZE);
1893 
1894 		if (ctx->reg_int & mask_ue)
1895 			edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
1896 		else if (ctx->reg_int & mask_ce)
1897 			edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
1898 
1899 		l2c->ring_tail++;
1900 	}
1901 
1902 	ret = IRQ_HANDLED;
1903 
1904 err_free:
1905 	kfree(other);
1906 	kfree(msg);
1907 
1908 	return ret;
1909 }
1910 
1911 #define L2C_DEBUGFS_ATTR(_name, _reg)	DEBUGFS_REG_ATTR(l2c, _name, _reg)
1912 
1913 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
1914 
1915 static struct debugfs_entry *l2c_tad_dfs_ents[] = {
1916 	&debugfs_tad_int,
1917 };
1918 
1919 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
1920 
1921 static struct debugfs_entry *l2c_cbc_dfs_ents[] = {
1922 	&debugfs_cbc_int,
1923 };
1924 
1925 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
1926 
1927 static struct debugfs_entry *l2c_mci_dfs_ents[] = {
1928 	&debugfs_mci_int,
1929 };
1930 
1931 static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
1932 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
1933 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
1934 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
1935 	{ 0, },
1936 };
1937 
thunderx_l2c_probe(struct pci_dev * pdev,const struct pci_device_id * id)1938 static int thunderx_l2c_probe(struct pci_dev *pdev,
1939 			      const struct pci_device_id *id)
1940 {
1941 	struct thunderx_l2c *l2c;
1942 	struct edac_device_ctl_info *edac_dev;
1943 	struct debugfs_entry **l2c_devattr;
1944 	size_t dfs_entries;
1945 	irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
1946 	char name[32];
1947 	const char *fmt;
1948 	u64 reg_en_offs, reg_en_mask;
1949 	int idx;
1950 	int ret;
1951 
1952 	ret = pcim_enable_device(pdev);
1953 	if (ret) {
1954 		dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1955 		return ret;
1956 	}
1957 
1958 	ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
1959 	if (ret) {
1960 		dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1961 		return ret;
1962 	}
1963 
1964 	switch (pdev->device) {
1965 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1966 		thunderx_l2c_isr = thunderx_l2c_tad_isr;
1967 		l2c_devattr = l2c_tad_dfs_ents;
1968 		dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
1969 		fmt = "L2C-TAD%d";
1970 		reg_en_offs = L2C_TAD_INT_ENA_W1S;
1971 		reg_en_mask = L2C_TAD_INT_ENA_ALL;
1972 		break;
1973 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1974 		thunderx_l2c_isr = thunderx_l2c_cbc_isr;
1975 		l2c_devattr = l2c_cbc_dfs_ents;
1976 		dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
1977 		fmt = "L2C-CBC%d";
1978 		reg_en_offs = L2C_CBC_INT_ENA_W1S;
1979 		reg_en_mask = L2C_CBC_INT_ENA_ALL;
1980 		break;
1981 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1982 		thunderx_l2c_isr = thunderx_l2c_mci_isr;
1983 		l2c_devattr = l2c_mci_dfs_ents;
1984 		dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
1985 		fmt = "L2C-MCI%d";
1986 		reg_en_offs = L2C_MCI_INT_ENA_W1S;
1987 		reg_en_mask = L2C_MCI_INT_ENA_ALL;
1988 		break;
1989 	default:
1990 		//Should never ever get here
1991 		dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
1992 			pdev->device);
1993 		return -EINVAL;
1994 	}
1995 
1996 	idx = edac_device_alloc_index();
1997 	snprintf(name, sizeof(name), fmt, idx);
1998 
1999 	edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
2000 					      name, 1, "L2C", 1, 0, idx);
2001 	if (!edac_dev) {
2002 		dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
2003 		return -ENOMEM;
2004 	}
2005 
2006 	l2c = edac_dev->pvt_info;
2007 	l2c->edac_dev = edac_dev;
2008 
2009 	l2c->regs = pcim_iomap_table(pdev)[0];
2010 	if (!l2c->regs) {
2011 		dev_err(&pdev->dev, "Cannot map PCI resources\n");
2012 		ret = -ENODEV;
2013 		goto err_free;
2014 	}
2015 
2016 	l2c->pdev = pdev;
2017 
2018 	l2c->ring_head = 0;
2019 	l2c->ring_tail = 0;
2020 
2021 	l2c->msix_ent.entry = 0;
2022 	l2c->msix_ent.vector = 0;
2023 
2024 	ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
2025 	if (ret) {
2026 		dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
2027 		goto err_free;
2028 	}
2029 
2030 	ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
2031 					thunderx_l2c_isr,
2032 					thunderx_l2c_threaded_isr,
2033 					0, "[EDAC] ThunderX L2C",
2034 					&l2c->msix_ent);
2035 	if (ret)
2036 		goto err_free;
2037 
2038 	edac_dev->dev = &pdev->dev;
2039 	edac_dev->dev_name = dev_name(&pdev->dev);
2040 	edac_dev->mod_name = "thunderx-l2c";
2041 	edac_dev->ctl_name = "thunderx-l2c";
2042 
2043 	ret = edac_device_add_device(edac_dev);
2044 	if (ret) {
2045 		dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
2046 		goto err_free;
2047 	}
2048 
2049 	if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
2050 		l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
2051 
2052 		ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
2053 					      l2c, dfs_entries);
2054 
2055 		if (ret != dfs_entries) {
2056 			dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
2057 				 ret, ret >= 0 ? " created" : "");
2058 		}
2059 	}
2060 
2061 	pci_set_drvdata(pdev, edac_dev);
2062 
2063 	writeq(reg_en_mask, l2c->regs + reg_en_offs);
2064 
2065 	return 0;
2066 
2067 err_free:
2068 	edac_device_free_ctl_info(edac_dev);
2069 
2070 	return ret;
2071 }
2072 
thunderx_l2c_remove(struct pci_dev * pdev)2073 static void thunderx_l2c_remove(struct pci_dev *pdev)
2074 {
2075 	struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
2076 	struct thunderx_l2c *l2c = edac_dev->pvt_info;
2077 
2078 	switch (pdev->device) {
2079 	case PCI_DEVICE_ID_THUNDER_L2C_TAD:
2080 		writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
2081 		break;
2082 	case PCI_DEVICE_ID_THUNDER_L2C_CBC:
2083 		writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
2084 		break;
2085 	case PCI_DEVICE_ID_THUNDER_L2C_MCI:
2086 		writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
2087 		break;
2088 	}
2089 
2090 	edac_debugfs_remove_recursive(l2c->debugfs);
2091 
2092 	edac_device_del_device(&pdev->dev);
2093 	edac_device_free_ctl_info(edac_dev);
2094 }
2095 
2096 MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
2097 
2098 static struct pci_driver thunderx_l2c_driver = {
2099 	.name     = "thunderx_l2c_edac",
2100 	.probe    = thunderx_l2c_probe,
2101 	.remove   = thunderx_l2c_remove,
2102 	.id_table = thunderx_l2c_pci_tbl,
2103 };
2104 
thunderx_edac_init(void)2105 static int __init thunderx_edac_init(void)
2106 {
2107 	int rc = 0;
2108 
2109 	if (ghes_get_devices())
2110 		return -EBUSY;
2111 
2112 	rc = pci_register_driver(&thunderx_lmc_driver);
2113 	if (rc)
2114 		return rc;
2115 
2116 	rc = pci_register_driver(&thunderx_ocx_driver);
2117 	if (rc)
2118 		goto err_lmc;
2119 
2120 	rc = pci_register_driver(&thunderx_l2c_driver);
2121 	if (rc)
2122 		goto err_ocx;
2123 
2124 	return rc;
2125 err_ocx:
2126 	pci_unregister_driver(&thunderx_ocx_driver);
2127 err_lmc:
2128 	pci_unregister_driver(&thunderx_lmc_driver);
2129 
2130 	return rc;
2131 }
2132 
thunderx_edac_exit(void)2133 static void __exit thunderx_edac_exit(void)
2134 {
2135 	pci_unregister_driver(&thunderx_l2c_driver);
2136 	pci_unregister_driver(&thunderx_ocx_driver);
2137 	pci_unregister_driver(&thunderx_lmc_driver);
2138 
2139 }
2140 
2141 module_init(thunderx_edac_init);
2142 module_exit(thunderx_edac_exit);
2143 
2144 MODULE_LICENSE("GPL v2");
2145 MODULE_AUTHOR("Cavium, Inc.");
2146 MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
2147