1 /*
2 * Cavium ThunderX memory controller kernel module
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved.
9 *
10 */
11
12 #include <linux/module.h>
13 #include <linux/pci.h>
14 #include <linux/edac.h>
15 #include <linux/interrupt.h>
16 #include <linux/string.h>
17 #include <linux/stop_machine.h>
18 #include <linux/delay.h>
19 #include <linux/sizes.h>
20 #include <linux/atomic.h>
21 #include <linux/bitfield.h>
22 #include <linux/circ_buf.h>
23
24 #include <asm/page.h>
25
26 #include "edac_module.h"
27
28 #define phys_to_pfn(phys) (PFN_DOWN(phys))
29
30 #define THUNDERX_NODE GENMASK(45, 44)
31
32 enum {
33 ERR_CORRECTED = 1,
34 ERR_UNCORRECTED = 2,
35 ERR_UNKNOWN = 3,
36 };
37
38 struct error_descr {
39 int type;
40 u64 mask;
41 char *descr;
42 };
43
decode_register(char * str,size_t size,const struct error_descr * descr,const uint64_t reg)44 static void decode_register(char *str, size_t size,
45 const struct error_descr *descr,
46 const uint64_t reg)
47 {
48 int ret = 0;
49
50 while (descr->type && descr->mask && descr->descr) {
51 if (reg & descr->mask) {
52 ret = snprintf(str, size, "\n\t%s, %s",
53 descr->type == ERR_CORRECTED ?
54 "Corrected" : "Uncorrected",
55 descr->descr);
56 str += ret;
57 size -= ret;
58 }
59 descr++;
60 }
61 }
62
get_bits(unsigned long data,int pos,int width)63 static unsigned long get_bits(unsigned long data, int pos, int width)
64 {
65 return (data >> pos) & ((1 << width) - 1);
66 }
67
68 #define L2C_CTL 0x87E080800000
69 #define L2C_CTL_DISIDXALIAS BIT(0)
70
71 #define PCI_DEVICE_ID_THUNDER_LMC 0xa022
72
73 #define LMC_FADR 0x20
74 #define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1)
75 #define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1)
76 #define LMC_FADR_FBANK(x) ((x >> 32) & 0xf)
77 #define LMC_FADR_FROW(x) ((x >> 14) & 0xffff)
78 #define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff)
79
80 #define LMC_NXM_FADR 0x28
81 #define LMC_ECC_SYND 0x38
82
83 #define LMC_ECC_PARITY_TEST 0x108
84
85 #define LMC_INT_W1S 0x150
86
87 #define LMC_INT_ENA_W1C 0x158
88 #define LMC_INT_ENA_W1S 0x160
89
90 #define LMC_CONFIG 0x188
91
92 #define LMC_CONFIG_BG2 BIT(62)
93 #define LMC_CONFIG_RANK_ENA BIT(42)
94 #define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF)
95 #define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7)
96
97 #define LMC_CONTROL 0x190
98 #define LMC_CONTROL_XOR_BANK BIT(16)
99
100 #define LMC_INT 0x1F0
101
102 #define LMC_INT_DDR_ERR BIT(11)
103 #define LMC_INT_DED_ERR (0xFUL << 5)
104 #define LMC_INT_SEC_ERR (0xFUL << 1)
105 #define LMC_INT_NXM_WR_MASK BIT(0)
106
107 #define LMC_DDR_PLL_CTL 0x258
108 #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
109
110 #define LMC_FADR_SCRAMBLED 0x330
111
112 #define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \
113 LMC_INT_NXM_WR_MASK)
114
115 #define LMC_INT_CE (LMC_INT_SEC_ERR)
116
117 static const struct error_descr lmc_errors[] = {
118 {
119 .type = ERR_CORRECTED,
120 .mask = LMC_INT_SEC_ERR,
121 .descr = "Single-bit ECC error",
122 },
123 {
124 .type = ERR_UNCORRECTED,
125 .mask = LMC_INT_DDR_ERR,
126 .descr = "DDR chip error",
127 },
128 {
129 .type = ERR_UNCORRECTED,
130 .mask = LMC_INT_DED_ERR,
131 .descr = "Double-bit ECC error",
132 },
133 {
134 .type = ERR_UNCORRECTED,
135 .mask = LMC_INT_NXM_WR_MASK,
136 .descr = "Non-existent memory write",
137 },
138 {0, 0, NULL},
139 };
140
141 #define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5)
142 #define LMC_INT_EN_DLCRAM_DED_ERR BIT(4)
143 #define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3)
144 #define LMC_INT_INTR_DED_ENA BIT(2)
145 #define LMC_INT_INTR_SEC_ENA BIT(1)
146 #define LMC_INT_INTR_NXM_WR_ENA BIT(0)
147
148 #define LMC_INT_ENA_ALL GENMASK(5, 0)
149
150 #define LMC_DDR_PLL_CTL 0x258
151 #define LMC_DDR_PLL_CTL_DDR4 BIT(29)
152
153 #define LMC_CONTROL 0x190
154 #define LMC_CONTROL_RDIMM BIT(0)
155
156 #define LMC_SCRAM_FADR 0x330
157
158 #define LMC_CHAR_MASK0 0x228
159 #define LMC_CHAR_MASK2 0x238
160
161 #define RING_ENTRIES 8
162
163 struct debugfs_entry {
164 const char *name;
165 umode_t mode;
166 const struct file_operations fops;
167 };
168
169 struct lmc_err_ctx {
170 u64 reg_int;
171 u64 reg_fadr;
172 u64 reg_nxm_fadr;
173 u64 reg_scram_fadr;
174 u64 reg_ecc_synd;
175 };
176
177 struct thunderx_lmc {
178 void __iomem *regs;
179 struct pci_dev *pdev;
180 struct msix_entry msix_ent;
181
182 atomic_t ecc_int;
183
184 u64 mask0;
185 u64 mask2;
186 u64 parity_test;
187 u64 node;
188
189 int xbits;
190 int bank_width;
191 int pbank_lsb;
192 int dimm_lsb;
193 int rank_lsb;
194 int bank_lsb;
195 int row_lsb;
196 int col_hi_lsb;
197
198 int xor_bank;
199 int l2c_alias;
200
201 struct page *mem;
202
203 struct lmc_err_ctx err_ctx[RING_ENTRIES];
204 unsigned long ring_head;
205 unsigned long ring_tail;
206 };
207
208 #define ring_pos(pos, size) ((pos) & (size - 1))
209
210 #define DEBUGFS_STRUCT(_name, _mode, _write, _read) \
211 static struct debugfs_entry debugfs_##_name = { \
212 .name = __stringify(_name), \
213 .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \
214 .fops = { \
215 .open = simple_open, \
216 .write = _write, \
217 .read = _read, \
218 .llseek = generic_file_llseek, \
219 }, \
220 }
221
222 #define DEBUGFS_FIELD_ATTR(_type, _field) \
223 static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \
224 char __user *data, \
225 size_t count, loff_t *ppos) \
226 { \
227 struct thunderx_##_type *pdata = file->private_data; \
228 char buf[20]; \
229 \
230 snprintf(buf, count, "0x%016llx", pdata->_field); \
231 return simple_read_from_buffer(data, count, ppos, \
232 buf, sizeof(buf)); \
233 } \
234 \
235 static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \
236 const char __user *data, \
237 size_t count, loff_t *ppos) \
238 { \
239 struct thunderx_##_type *pdata = file->private_data; \
240 int res; \
241 \
242 res = kstrtoull_from_user(data, count, 0, &pdata->_field); \
243 \
244 return res ? res : count; \
245 } \
246 \
247 DEBUGFS_STRUCT(_field, 0600, \
248 thunderx_##_type##_##_field##_write, \
249 thunderx_##_type##_##_field##_read) \
250
251 #define DEBUGFS_REG_ATTR(_type, _name, _reg) \
252 static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \
253 char __user *data, \
254 size_t count, loff_t *ppos) \
255 { \
256 struct thunderx_##_type *pdata = file->private_data; \
257 char buf[20]; \
258 \
259 sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \
260 return simple_read_from_buffer(data, count, ppos, \
261 buf, sizeof(buf)); \
262 } \
263 \
264 static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \
265 const char __user *data, \
266 size_t count, loff_t *ppos) \
267 { \
268 struct thunderx_##_type *pdata = file->private_data; \
269 u64 val; \
270 int res; \
271 \
272 res = kstrtoull_from_user(data, count, 0, &val); \
273 \
274 if (!res) { \
275 writeq(val, pdata->regs + _reg); \
276 res = count; \
277 } \
278 \
279 return res; \
280 } \
281 \
282 DEBUGFS_STRUCT(_name, 0600, \
283 thunderx_##_type##_##_name##_write, \
284 thunderx_##_type##_##_name##_read)
285
286 #define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field)
287
288 /*
289 * To get an ECC error injected, the following steps are needed:
290 * - Setup the ECC injection by writing the appropriate parameters:
291 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0
292 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2
293 * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test
294 * - Do the actual injection:
295 * echo 1 > /sys/kernel/debug/<device number>/inject_ecc
296 */
thunderx_lmc_inject_int_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)297 static ssize_t thunderx_lmc_inject_int_write(struct file *file,
298 const char __user *data,
299 size_t count, loff_t *ppos)
300 {
301 struct thunderx_lmc *lmc = file->private_data;
302 u64 val;
303 int res;
304
305 res = kstrtoull_from_user(data, count, 0, &val);
306
307 if (!res) {
308 /* Trigger the interrupt */
309 writeq(val, lmc->regs + LMC_INT_W1S);
310 res = count;
311 }
312
313 return res;
314 }
315
thunderx_lmc_int_read(struct file * file,char __user * data,size_t count,loff_t * ppos)316 static ssize_t thunderx_lmc_int_read(struct file *file,
317 char __user *data,
318 size_t count, loff_t *ppos)
319 {
320 struct thunderx_lmc *lmc = file->private_data;
321 char buf[20];
322 u64 lmc_int = readq(lmc->regs + LMC_INT);
323
324 snprintf(buf, sizeof(buf), "0x%016llx", lmc_int);
325 return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf));
326 }
327
328 #define TEST_PATTERN 0xa5
329
inject_ecc_fn(void * arg)330 static int inject_ecc_fn(void *arg)
331 {
332 struct thunderx_lmc *lmc = arg;
333 uintptr_t addr, phys;
334 unsigned int cline_size = cache_line_size();
335 const unsigned int lines = PAGE_SIZE / cline_size;
336 unsigned int i, cl_idx;
337
338 addr = (uintptr_t)page_address(lmc->mem);
339 phys = (uintptr_t)page_to_phys(lmc->mem);
340
341 cl_idx = (phys & 0x7f) >> 4;
342 lmc->parity_test &= ~(7ULL << 8);
343 lmc->parity_test |= (cl_idx << 8);
344
345 writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0);
346 writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2);
347 writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST);
348
349 readq(lmc->regs + LMC_CHAR_MASK0);
350 readq(lmc->regs + LMC_CHAR_MASK2);
351 readq(lmc->regs + LMC_ECC_PARITY_TEST);
352
353 for (i = 0; i < lines; i++) {
354 memset((void *)addr, TEST_PATTERN, cline_size);
355 barrier();
356
357 /*
358 * Flush L1 cachelines to the PoC (L2).
359 * This will cause cacheline eviction to the L2.
360 */
361 asm volatile("dc civac, %0\n"
362 "dsb sy\n"
363 : : "r"(addr + i * cline_size));
364 }
365
366 for (i = 0; i < lines; i++) {
367 /*
368 * Flush L2 cachelines to the DRAM.
369 * This will cause cacheline eviction to the DRAM
370 * and ECC corruption according to the masks set.
371 */
372 __asm__ volatile("sys #0,c11,C1,#2, %0\n"
373 : : "r"(phys + i * cline_size));
374 }
375
376 for (i = 0; i < lines; i++) {
377 /*
378 * Invalidate L2 cachelines.
379 * The subsequent load will cause cacheline fetch
380 * from the DRAM and an error interrupt
381 */
382 __asm__ volatile("sys #0,c11,C1,#1, %0"
383 : : "r"(phys + i * cline_size));
384 }
385
386 for (i = 0; i < lines; i++) {
387 /*
388 * Invalidate L1 cachelines.
389 * The subsequent load will cause cacheline fetch
390 * from the L2 and/or DRAM
391 */
392 asm volatile("dc ivac, %0\n"
393 "dsb sy\n"
394 : : "r"(addr + i * cline_size));
395 }
396
397 return 0;
398 }
399
thunderx_lmc_inject_ecc_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)400 static ssize_t thunderx_lmc_inject_ecc_write(struct file *file,
401 const char __user *data,
402 size_t count, loff_t *ppos)
403 {
404 struct thunderx_lmc *lmc = file->private_data;
405 unsigned int cline_size = cache_line_size();
406 u8 *tmp;
407 void __iomem *addr;
408 unsigned int offs, timeout = 100000;
409
410 atomic_set(&lmc->ecc_int, 0);
411
412 lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0);
413 if (!lmc->mem)
414 return -ENOMEM;
415
416 tmp = kmalloc(cline_size, GFP_KERNEL);
417 if (!tmp) {
418 __free_pages(lmc->mem, 0);
419 return -ENOMEM;
420 }
421
422 addr = page_address(lmc->mem);
423
424 while (!atomic_read(&lmc->ecc_int) && timeout--) {
425 stop_machine(inject_ecc_fn, lmc, NULL);
426
427 for (offs = 0; offs < PAGE_SIZE; offs += cline_size) {
428 /*
429 * Do a load from the previously rigged location
430 * This should generate an error interrupt.
431 */
432 memcpy(tmp, addr + offs, cline_size);
433 asm volatile("dsb ld\n");
434 }
435 }
436
437 kfree(tmp);
438 __free_pages(lmc->mem, 0);
439
440 return count;
441 }
442
443 LMC_DEBUGFS_ENT(mask0);
444 LMC_DEBUGFS_ENT(mask2);
445 LMC_DEBUGFS_ENT(parity_test);
446
447 DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL);
448 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL);
449 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read);
450
451 static struct debugfs_entry *lmc_dfs_ents[] = {
452 &debugfs_mask0,
453 &debugfs_mask2,
454 &debugfs_parity_test,
455 &debugfs_inject_ecc,
456 &debugfs_inject_int,
457 &debugfs_int_w1c,
458 };
459
thunderx_create_debugfs_nodes(struct dentry * parent,struct debugfs_entry * attrs[],void * data,size_t num)460 static int thunderx_create_debugfs_nodes(struct dentry *parent,
461 struct debugfs_entry *attrs[],
462 void *data,
463 size_t num)
464 {
465 int i;
466 struct dentry *ent;
467
468 if (!IS_ENABLED(CONFIG_EDAC_DEBUG))
469 return 0;
470
471 if (!parent)
472 return -ENOENT;
473
474 for (i = 0; i < num; i++) {
475 ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode,
476 parent, data, &attrs[i]->fops);
477
478 if (IS_ERR(ent))
479 break;
480 }
481
482 return i;
483 }
484
thunderx_faddr_to_phys(u64 faddr,struct thunderx_lmc * lmc)485 static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc)
486 {
487 phys_addr_t addr = 0;
488 int bank, xbits;
489
490 addr |= lmc->node << 40;
491 addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb;
492 addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb;
493 addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb;
494 addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb;
495
496 bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb;
497
498 if (lmc->xor_bank)
499 bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width);
500
501 addr |= bank << lmc->bank_lsb;
502
503 xbits = PCI_FUNC(lmc->pdev->devfn);
504
505 if (lmc->l2c_alias)
506 xbits ^= get_bits(addr, 20, lmc->xbits) ^
507 get_bits(addr, 12, lmc->xbits);
508
509 addr |= xbits << 7;
510
511 return addr;
512 }
513
thunderx_get_num_lmcs(unsigned int node)514 static unsigned int thunderx_get_num_lmcs(unsigned int node)
515 {
516 unsigned int number = 0;
517 struct pci_dev *pdev = NULL;
518
519 do {
520 pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
521 PCI_DEVICE_ID_THUNDER_LMC,
522 pdev);
523 if (pdev) {
524 #ifdef CONFIG_NUMA
525 if (pdev->dev.numa_node == node)
526 number++;
527 #else
528 number++;
529 #endif
530 }
531 } while (pdev);
532
533 return number;
534 }
535
536 #define LMC_MESSAGE_SIZE 120
537 #define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors))
538
thunderx_lmc_err_isr(int irq,void * dev_id)539 static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id)
540 {
541 struct mem_ctl_info *mci = dev_id;
542 struct thunderx_lmc *lmc = mci->pvt_info;
543
544 unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx));
545 struct lmc_err_ctx *ctx = &lmc->err_ctx[head];
546
547 writeq(0, lmc->regs + LMC_CHAR_MASK0);
548 writeq(0, lmc->regs + LMC_CHAR_MASK2);
549 writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST);
550
551 ctx->reg_int = readq(lmc->regs + LMC_INT);
552 ctx->reg_fadr = readq(lmc->regs + LMC_FADR);
553 ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR);
554 ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR);
555 ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND);
556
557 lmc->ring_head++;
558
559 atomic_set(&lmc->ecc_int, 1);
560
561 /* Clear the interrupt */
562 writeq(ctx->reg_int, lmc->regs + LMC_INT);
563
564 return IRQ_WAKE_THREAD;
565 }
566
thunderx_lmc_threaded_isr(int irq,void * dev_id)567 static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id)
568 {
569 struct mem_ctl_info *mci = dev_id;
570 struct thunderx_lmc *lmc = mci->pvt_info;
571 phys_addr_t phys_addr;
572
573 unsigned long tail;
574 struct lmc_err_ctx *ctx;
575
576 irqreturn_t ret = IRQ_NONE;
577
578 char *msg;
579 char *other;
580
581 msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL);
582 other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL);
583
584 if (!msg || !other)
585 goto err_free;
586
587 while (CIRC_CNT(lmc->ring_head, lmc->ring_tail,
588 ARRAY_SIZE(lmc->err_ctx))) {
589 tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx));
590
591 ctx = &lmc->err_ctx[tail];
592
593 dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n",
594 ctx->reg_int);
595 dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n",
596 ctx->reg_fadr);
597 dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n",
598 ctx->reg_nxm_fadr);
599 dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n",
600 ctx->reg_scram_fadr);
601 dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n",
602 ctx->reg_ecc_synd);
603
604 snprintf(msg, LMC_MESSAGE_SIZE,
605 "DIMM %lld rank %lld bank %lld row %lld col %lld",
606 LMC_FADR_FDIMM(ctx->reg_scram_fadr),
607 LMC_FADR_FBUNK(ctx->reg_scram_fadr),
608 LMC_FADR_FBANK(ctx->reg_scram_fadr),
609 LMC_FADR_FROW(ctx->reg_scram_fadr),
610 LMC_FADR_FCOL(ctx->reg_scram_fadr));
611
612 decode_register(other, LMC_OTHER_SIZE, lmc_errors,
613 ctx->reg_int);
614
615 phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc);
616
617 if (ctx->reg_int & LMC_INT_UE)
618 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
619 phys_to_pfn(phys_addr),
620 offset_in_page(phys_addr),
621 0, -1, -1, -1, msg, other);
622 else if (ctx->reg_int & LMC_INT_CE)
623 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
624 phys_to_pfn(phys_addr),
625 offset_in_page(phys_addr),
626 0, -1, -1, -1, msg, other);
627
628 lmc->ring_tail++;
629 }
630
631 ret = IRQ_HANDLED;
632
633 err_free:
634 kfree(msg);
635 kfree(other);
636
637 return ret;
638 }
639
640 static const struct pci_device_id thunderx_lmc_pci_tbl[] = {
641 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) },
642 { 0, },
643 };
644
pci_dev_to_mc_idx(struct pci_dev * pdev)645 static inline int pci_dev_to_mc_idx(struct pci_dev *pdev)
646 {
647 int node = dev_to_node(&pdev->dev);
648 int ret = PCI_FUNC(pdev->devfn);
649
650 ret += max(node, 0) << 3;
651
652 return ret;
653 }
654
thunderx_lmc_probe(struct pci_dev * pdev,const struct pci_device_id * id)655 static int thunderx_lmc_probe(struct pci_dev *pdev,
656 const struct pci_device_id *id)
657 {
658 struct thunderx_lmc *lmc;
659 struct edac_mc_layer layer;
660 struct mem_ctl_info *mci;
661 u64 lmc_control, lmc_ddr_pll_ctl, lmc_config;
662 int ret;
663 u64 lmc_int;
664 void *l2c_ioaddr;
665
666 layer.type = EDAC_MC_LAYER_SLOT;
667 layer.size = 2;
668 layer.is_virt_csrow = false;
669
670 ret = pcim_enable_device(pdev);
671 if (ret) {
672 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
673 return ret;
674 }
675
676 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc");
677 if (ret) {
678 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
679 return ret;
680 }
681
682 mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer,
683 sizeof(struct thunderx_lmc));
684 if (!mci)
685 return -ENOMEM;
686
687 mci->pdev = &pdev->dev;
688 lmc = mci->pvt_info;
689
690 pci_set_drvdata(pdev, mci);
691
692 lmc->regs = pcim_iomap_table(pdev)[0];
693
694 lmc_control = readq(lmc->regs + LMC_CONTROL);
695 lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL);
696 lmc_config = readq(lmc->regs + LMC_CONFIG);
697
698 if (lmc_control & LMC_CONTROL_RDIMM) {
699 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
700 lmc_ddr_pll_ctl) ?
701 MEM_RDDR4 : MEM_RDDR3;
702 } else {
703 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4,
704 lmc_ddr_pll_ctl) ?
705 MEM_DDR4 : MEM_DDR3;
706 }
707
708 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
709 mci->edac_cap = EDAC_FLAG_SECDED;
710
711 mci->mod_name = "thunderx-lmc";
712 mci->ctl_name = "thunderx-lmc";
713 mci->dev_name = dev_name(&pdev->dev);
714 mci->scrub_mode = SCRUB_NONE;
715
716 lmc->pdev = pdev;
717 lmc->msix_ent.entry = 0;
718
719 lmc->ring_head = 0;
720 lmc->ring_tail = 0;
721
722 ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1);
723 if (ret) {
724 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
725 goto err_free;
726 }
727
728 ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector,
729 thunderx_lmc_err_isr,
730 thunderx_lmc_threaded_isr, 0,
731 "[EDAC] ThunderX LMC", mci);
732 if (ret) {
733 dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret);
734 goto err_free;
735 }
736
737 lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0));
738
739 lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1;
740 lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) &&
741 FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3;
742
743 lmc->pbank_lsb = (lmc_config >> 5) & 0xf;
744 lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits;
745 lmc->rank_lsb = lmc->dimm_lsb;
746 lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0;
747 lmc->bank_lsb = 7 + lmc->xbits;
748 lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits;
749
750 lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width;
751
752 lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK;
753
754 l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE);
755 if (!l2c_ioaddr) {
756 dev_err(&pdev->dev, "Cannot map L2C_CTL\n");
757 ret = -ENOMEM;
758 goto err_free;
759 }
760
761 lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS);
762
763 iounmap(l2c_ioaddr);
764
765 ret = edac_mc_add_mc(mci);
766 if (ret) {
767 dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret);
768 goto err_free;
769 }
770
771 lmc_int = readq(lmc->regs + LMC_INT);
772 writeq(lmc_int, lmc->regs + LMC_INT);
773
774 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S);
775
776 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
777 ret = thunderx_create_debugfs_nodes(mci->debugfs,
778 lmc_dfs_ents,
779 lmc,
780 ARRAY_SIZE(lmc_dfs_ents));
781
782 if (ret != ARRAY_SIZE(lmc_dfs_ents)) {
783 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
784 ret, ret >= 0 ? " created" : "");
785 }
786 }
787
788 return 0;
789
790 err_free:
791 pci_set_drvdata(pdev, NULL);
792 edac_mc_free(mci);
793
794 return ret;
795 }
796
thunderx_lmc_remove(struct pci_dev * pdev)797 static void thunderx_lmc_remove(struct pci_dev *pdev)
798 {
799 struct mem_ctl_info *mci = pci_get_drvdata(pdev);
800 struct thunderx_lmc *lmc = mci->pvt_info;
801
802 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C);
803
804 edac_mc_del_mc(&pdev->dev);
805 edac_mc_free(mci);
806 }
807
808 MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl);
809
810 static struct pci_driver thunderx_lmc_driver = {
811 .name = "thunderx_lmc_edac",
812 .probe = thunderx_lmc_probe,
813 .remove = thunderx_lmc_remove,
814 .id_table = thunderx_lmc_pci_tbl,
815 };
816
817 /*---------------------- OCX driver ---------------------------------*/
818
819 #define PCI_DEVICE_ID_THUNDER_OCX 0xa013
820
821 #define OCX_LINK_INTS 3
822 #define OCX_INTS (OCX_LINK_INTS + 1)
823 #define OCX_RX_LANES 24
824 #define OCX_RX_LANE_STATS 15
825
826 #define OCX_COM_INT 0x100
827 #define OCX_COM_INT_W1S 0x108
828 #define OCX_COM_INT_ENA_W1S 0x110
829 #define OCX_COM_INT_ENA_W1C 0x118
830
831 #define OCX_COM_IO_BADID BIT(54)
832 #define OCX_COM_MEM_BADID BIT(53)
833 #define OCX_COM_COPR_BADID BIT(52)
834 #define OCX_COM_WIN_REQ_BADID BIT(51)
835 #define OCX_COM_WIN_REQ_TOUT BIT(50)
836 #define OCX_COM_RX_LANE GENMASK(23, 0)
837
838 #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \
839 OCX_COM_MEM_BADID | \
840 OCX_COM_COPR_BADID | \
841 OCX_COM_WIN_REQ_BADID | \
842 OCX_COM_WIN_REQ_TOUT)
843
844 static const struct error_descr ocx_com_errors[] = {
845 {
846 .type = ERR_CORRECTED,
847 .mask = OCX_COM_IO_BADID,
848 .descr = "Invalid IO transaction node ID",
849 },
850 {
851 .type = ERR_CORRECTED,
852 .mask = OCX_COM_MEM_BADID,
853 .descr = "Invalid memory transaction node ID",
854 },
855 {
856 .type = ERR_CORRECTED,
857 .mask = OCX_COM_COPR_BADID,
858 .descr = "Invalid coprocessor transaction node ID",
859 },
860 {
861 .type = ERR_CORRECTED,
862 .mask = OCX_COM_WIN_REQ_BADID,
863 .descr = "Invalid SLI transaction node ID",
864 },
865 {
866 .type = ERR_CORRECTED,
867 .mask = OCX_COM_WIN_REQ_TOUT,
868 .descr = "Window/core request timeout",
869 },
870 {0, 0, NULL},
871 };
872
873 #define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8)
874 #define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8)
875 #define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8)
876 #define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8)
877
878 #define OCX_COM_LINK_BAD_WORD BIT(13)
879 #define OCX_COM_LINK_ALIGN_FAIL BIT(12)
880 #define OCX_COM_LINK_ALIGN_DONE BIT(11)
881 #define OCX_COM_LINK_UP BIT(10)
882 #define OCX_COM_LINK_STOP BIT(9)
883 #define OCX_COM_LINK_BLK_ERR BIT(8)
884 #define OCX_COM_LINK_REINIT BIT(7)
885 #define OCX_COM_LINK_LNK_DATA BIT(6)
886 #define OCX_COM_LINK_RXFIFO_DBE BIT(5)
887 #define OCX_COM_LINK_RXFIFO_SBE BIT(4)
888 #define OCX_COM_LINK_TXFIFO_DBE BIT(3)
889 #define OCX_COM_LINK_TXFIFO_SBE BIT(2)
890 #define OCX_COM_LINK_REPLAY_DBE BIT(1)
891 #define OCX_COM_LINK_REPLAY_SBE BIT(0)
892
893 static const struct error_descr ocx_com_link_errors[] = {
894 {
895 .type = ERR_CORRECTED,
896 .mask = OCX_COM_LINK_REPLAY_SBE,
897 .descr = "Replay buffer single-bit error",
898 },
899 {
900 .type = ERR_CORRECTED,
901 .mask = OCX_COM_LINK_TXFIFO_SBE,
902 .descr = "TX FIFO single-bit error",
903 },
904 {
905 .type = ERR_CORRECTED,
906 .mask = OCX_COM_LINK_RXFIFO_SBE,
907 .descr = "RX FIFO single-bit error",
908 },
909 {
910 .type = ERR_CORRECTED,
911 .mask = OCX_COM_LINK_BLK_ERR,
912 .descr = "Block code error",
913 },
914 {
915 .type = ERR_CORRECTED,
916 .mask = OCX_COM_LINK_ALIGN_FAIL,
917 .descr = "Link alignment failure",
918 },
919 {
920 .type = ERR_CORRECTED,
921 .mask = OCX_COM_LINK_BAD_WORD,
922 .descr = "Bad code word",
923 },
924 {
925 .type = ERR_UNCORRECTED,
926 .mask = OCX_COM_LINK_REPLAY_DBE,
927 .descr = "Replay buffer double-bit error",
928 },
929 {
930 .type = ERR_UNCORRECTED,
931 .mask = OCX_COM_LINK_TXFIFO_DBE,
932 .descr = "TX FIFO double-bit error",
933 },
934 {
935 .type = ERR_UNCORRECTED,
936 .mask = OCX_COM_LINK_RXFIFO_DBE,
937 .descr = "RX FIFO double-bit error",
938 },
939 {
940 .type = ERR_UNCORRECTED,
941 .mask = OCX_COM_LINK_STOP,
942 .descr = "Link stopped",
943 },
944 {0, 0, NULL},
945 };
946
947 #define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \
948 OCX_COM_LINK_TXFIFO_DBE | \
949 OCX_COM_LINK_RXFIFO_DBE | \
950 OCX_COM_LINK_STOP)
951
952 #define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \
953 OCX_COM_LINK_TXFIFO_SBE | \
954 OCX_COM_LINK_RXFIFO_SBE | \
955 OCX_COM_LINK_BLK_ERR | \
956 OCX_COM_LINK_ALIGN_FAIL | \
957 OCX_COM_LINK_BAD_WORD)
958
959 #define OCX_LNE_INT(x) (0x8018 + (x) * 0x100)
960 #define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100)
961 #define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100)
962 #define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100)
963 #define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8)
964
965 #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8)
966 #define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2)
967 #define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1)
968 #define OCX_LNE_CFG_RX_STAT_ENA BIT(0)
969
970
971 #define OCX_LANE_BAD_64B67B BIT(8)
972 #define OCX_LANE_DSKEW_FIFO_OVFL BIT(5)
973 #define OCX_LANE_SCRM_SYNC_LOSS BIT(4)
974 #define OCX_LANE_UKWN_CNTL_WORD BIT(3)
975 #define OCX_LANE_CRC32_ERR BIT(2)
976 #define OCX_LANE_BDRY_SYNC_LOSS BIT(1)
977 #define OCX_LANE_SERDES_LOCK_LOSS BIT(0)
978
979 #define OCX_COM_LANE_INT_UE (0)
980 #define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \
981 OCX_LANE_BDRY_SYNC_LOSS | \
982 OCX_LANE_CRC32_ERR | \
983 OCX_LANE_UKWN_CNTL_WORD | \
984 OCX_LANE_SCRM_SYNC_LOSS | \
985 OCX_LANE_DSKEW_FIFO_OVFL | \
986 OCX_LANE_BAD_64B67B)
987
988 static const struct error_descr ocx_lane_errors[] = {
989 {
990 .type = ERR_CORRECTED,
991 .mask = OCX_LANE_SERDES_LOCK_LOSS,
992 .descr = "RX SerDes lock lost",
993 },
994 {
995 .type = ERR_CORRECTED,
996 .mask = OCX_LANE_BDRY_SYNC_LOSS,
997 .descr = "RX word boundary lost",
998 },
999 {
1000 .type = ERR_CORRECTED,
1001 .mask = OCX_LANE_CRC32_ERR,
1002 .descr = "CRC32 error",
1003 },
1004 {
1005 .type = ERR_CORRECTED,
1006 .mask = OCX_LANE_UKWN_CNTL_WORD,
1007 .descr = "Unknown control word",
1008 },
1009 {
1010 .type = ERR_CORRECTED,
1011 .mask = OCX_LANE_SCRM_SYNC_LOSS,
1012 .descr = "Scrambler synchronization lost",
1013 },
1014 {
1015 .type = ERR_CORRECTED,
1016 .mask = OCX_LANE_DSKEW_FIFO_OVFL,
1017 .descr = "RX deskew FIFO overflow",
1018 },
1019 {
1020 .type = ERR_CORRECTED,
1021 .mask = OCX_LANE_BAD_64B67B,
1022 .descr = "Bad 64B/67B codeword",
1023 },
1024 {0, 0, NULL},
1025 };
1026
1027 #define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0))
1028 #define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0))
1029 #define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \
1030 GENMASK(9, 7) | GENMASK(5, 0))
1031
1032 #define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000)
1033 #define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000)
1034
1035 struct ocx_com_err_ctx {
1036 u64 reg_com_int;
1037 u64 reg_lane_int[OCX_RX_LANES];
1038 u64 reg_lane_stat11[OCX_RX_LANES];
1039 };
1040
1041 struct ocx_link_err_ctx {
1042 u64 reg_com_link_int;
1043 int link;
1044 };
1045
1046 struct thunderx_ocx {
1047 void __iomem *regs;
1048 int com_link;
1049 struct pci_dev *pdev;
1050 struct edac_device_ctl_info *edac_dev;
1051
1052 struct dentry *debugfs;
1053 struct msix_entry msix_ent[OCX_INTS];
1054
1055 struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES];
1056 struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES];
1057
1058 unsigned long com_ring_head;
1059 unsigned long com_ring_tail;
1060
1061 unsigned long link_ring_head;
1062 unsigned long link_ring_tail;
1063 };
1064
1065 #define OCX_MESSAGE_SIZE SZ_1K
1066 #define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors))
1067
1068 /* This handler is threaded */
thunderx_ocx_com_isr(int irq,void * irq_id)1069 static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id)
1070 {
1071 struct msix_entry *msix = irq_id;
1072 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1073 msix_ent[msix->entry]);
1074
1075 int lane;
1076 unsigned long head = ring_pos(ocx->com_ring_head,
1077 ARRAY_SIZE(ocx->com_err_ctx));
1078 struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head];
1079
1080 ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT);
1081
1082 for (lane = 0; lane < OCX_RX_LANES; lane++) {
1083 ctx->reg_lane_int[lane] =
1084 readq(ocx->regs + OCX_LNE_INT(lane));
1085 ctx->reg_lane_stat11[lane] =
1086 readq(ocx->regs + OCX_LNE_STAT(lane, 11));
1087
1088 writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane));
1089 }
1090
1091 writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT);
1092
1093 ocx->com_ring_head++;
1094
1095 return IRQ_WAKE_THREAD;
1096 }
1097
thunderx_ocx_com_threaded_isr(int irq,void * irq_id)1098 static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id)
1099 {
1100 struct msix_entry *msix = irq_id;
1101 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1102 msix_ent[msix->entry]);
1103
1104 irqreturn_t ret = IRQ_NONE;
1105
1106 unsigned long tail;
1107 struct ocx_com_err_ctx *ctx;
1108 int lane;
1109 char *msg;
1110 char *other;
1111
1112 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1113 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1114
1115 if (!msg || !other)
1116 goto err_free;
1117
1118 while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail,
1119 ARRAY_SIZE(ocx->com_err_ctx))) {
1120 tail = ring_pos(ocx->com_ring_tail,
1121 ARRAY_SIZE(ocx->com_err_ctx));
1122 ctx = &ocx->com_err_ctx[tail];
1123
1124 snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx",
1125 ocx->edac_dev->ctl_name, ctx->reg_com_int);
1126
1127 decode_register(other, OCX_OTHER_SIZE,
1128 ocx_com_errors, ctx->reg_com_int);
1129
1130 strlcat(msg, other, OCX_MESSAGE_SIZE);
1131
1132 for (lane = 0; lane < OCX_RX_LANES; lane++)
1133 if (ctx->reg_com_int & BIT(lane)) {
1134 snprintf(other, OCX_OTHER_SIZE,
1135 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx",
1136 lane, ctx->reg_lane_int[lane],
1137 lane, ctx->reg_lane_stat11[lane]);
1138
1139 strlcat(msg, other, OCX_MESSAGE_SIZE);
1140
1141 decode_register(other, OCX_OTHER_SIZE,
1142 ocx_lane_errors,
1143 ctx->reg_lane_int[lane]);
1144 strlcat(msg, other, OCX_MESSAGE_SIZE);
1145 }
1146
1147 if (ctx->reg_com_int & OCX_COM_INT_CE)
1148 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1149
1150 ocx->com_ring_tail++;
1151 }
1152
1153 ret = IRQ_HANDLED;
1154
1155 err_free:
1156 kfree(other);
1157 kfree(msg);
1158
1159 return ret;
1160 }
1161
thunderx_ocx_lnk_isr(int irq,void * irq_id)1162 static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id)
1163 {
1164 struct msix_entry *msix = irq_id;
1165 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1166 msix_ent[msix->entry]);
1167 unsigned long head = ring_pos(ocx->link_ring_head,
1168 ARRAY_SIZE(ocx->link_err_ctx));
1169 struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head];
1170
1171 ctx->link = msix->entry;
1172 ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1173
1174 writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link));
1175
1176 ocx->link_ring_head++;
1177
1178 return IRQ_WAKE_THREAD;
1179 }
1180
thunderx_ocx_lnk_threaded_isr(int irq,void * irq_id)1181 static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id)
1182 {
1183 struct msix_entry *msix = irq_id;
1184 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx,
1185 msix_ent[msix->entry]);
1186 irqreturn_t ret = IRQ_NONE;
1187 unsigned long tail;
1188 struct ocx_link_err_ctx *ctx;
1189
1190 char *msg;
1191 char *other;
1192
1193 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1194 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1195
1196 if (!msg || !other)
1197 goto err_free;
1198
1199 while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail,
1200 ARRAY_SIZE(ocx->link_err_ctx))) {
1201 tail = ring_pos(ocx->link_ring_head,
1202 ARRAY_SIZE(ocx->link_err_ctx));
1203
1204 ctx = &ocx->link_err_ctx[tail];
1205
1206 snprintf(msg, OCX_MESSAGE_SIZE,
1207 "%s: OCX_COM_LINK_INT[%d]: %016llx",
1208 ocx->edac_dev->ctl_name,
1209 ctx->link, ctx->reg_com_link_int);
1210
1211 decode_register(other, OCX_OTHER_SIZE,
1212 ocx_com_link_errors, ctx->reg_com_link_int);
1213
1214 strlcat(msg, other, OCX_MESSAGE_SIZE);
1215
1216 if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE)
1217 edac_device_handle_ue(ocx->edac_dev, 0, 0, msg);
1218 else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE)
1219 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg);
1220
1221 ocx->link_ring_tail++;
1222 }
1223
1224 ret = IRQ_HANDLED;
1225 err_free:
1226 kfree(other);
1227 kfree(msg);
1228
1229 return ret;
1230 }
1231
1232 #define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg)
1233
1234 OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0));
1235 OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1));
1236 OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2));
1237
1238 OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0));
1239 OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1));
1240 OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2));
1241
1242 OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0));
1243 OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1));
1244 OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2));
1245
1246 OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0));
1247 OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1));
1248 OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2));
1249 OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3));
1250 OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4));
1251 OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5));
1252 OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6));
1253 OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7));
1254
1255 OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8));
1256 OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9));
1257 OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10));
1258 OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11));
1259 OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12));
1260 OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13));
1261 OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14));
1262 OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15));
1263
1264 OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16));
1265 OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17));
1266 OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18));
1267 OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19));
1268 OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20));
1269 OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21));
1270 OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22));
1271 OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23));
1272
1273 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S);
1274
1275 static struct debugfs_entry *ocx_dfs_ents[] = {
1276 &debugfs_tlk0_ecc_ctl,
1277 &debugfs_tlk1_ecc_ctl,
1278 &debugfs_tlk2_ecc_ctl,
1279
1280 &debugfs_rlk0_ecc_ctl,
1281 &debugfs_rlk1_ecc_ctl,
1282 &debugfs_rlk2_ecc_ctl,
1283
1284 &debugfs_com_link0_int,
1285 &debugfs_com_link1_int,
1286 &debugfs_com_link2_int,
1287
1288 &debugfs_lne00_badcnt,
1289 &debugfs_lne01_badcnt,
1290 &debugfs_lne02_badcnt,
1291 &debugfs_lne03_badcnt,
1292 &debugfs_lne04_badcnt,
1293 &debugfs_lne05_badcnt,
1294 &debugfs_lne06_badcnt,
1295 &debugfs_lne07_badcnt,
1296 &debugfs_lne08_badcnt,
1297 &debugfs_lne09_badcnt,
1298 &debugfs_lne10_badcnt,
1299 &debugfs_lne11_badcnt,
1300 &debugfs_lne12_badcnt,
1301 &debugfs_lne13_badcnt,
1302 &debugfs_lne14_badcnt,
1303 &debugfs_lne15_badcnt,
1304 &debugfs_lne16_badcnt,
1305 &debugfs_lne17_badcnt,
1306 &debugfs_lne18_badcnt,
1307 &debugfs_lne19_badcnt,
1308 &debugfs_lne20_badcnt,
1309 &debugfs_lne21_badcnt,
1310 &debugfs_lne22_badcnt,
1311 &debugfs_lne23_badcnt,
1312
1313 &debugfs_com_int,
1314 };
1315
1316 static const struct pci_device_id thunderx_ocx_pci_tbl[] = {
1317 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) },
1318 { 0, },
1319 };
1320
thunderx_ocx_clearstats(struct thunderx_ocx * ocx)1321 static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx)
1322 {
1323 int lane, stat, cfg;
1324
1325 for (lane = 0; lane < OCX_RX_LANES; lane++) {
1326 cfg = readq(ocx->regs + OCX_LNE_CFG(lane));
1327 cfg |= OCX_LNE_CFG_RX_STAT_RDCLR;
1328 cfg &= ~OCX_LNE_CFG_RX_STAT_ENA;
1329 writeq(cfg, ocx->regs + OCX_LNE_CFG(lane));
1330
1331 for (stat = 0; stat < OCX_RX_LANE_STATS; stat++)
1332 readq(ocx->regs + OCX_LNE_STAT(lane, stat));
1333 }
1334 }
1335
thunderx_ocx_probe(struct pci_dev * pdev,const struct pci_device_id * id)1336 static int thunderx_ocx_probe(struct pci_dev *pdev,
1337 const struct pci_device_id *id)
1338 {
1339 struct thunderx_ocx *ocx;
1340 struct edac_device_ctl_info *edac_dev;
1341 char name[32];
1342 int idx;
1343 int i;
1344 int ret;
1345 u64 reg;
1346
1347 ret = pcim_enable_device(pdev);
1348 if (ret) {
1349 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1350 return ret;
1351 }
1352
1353 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx");
1354 if (ret) {
1355 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1356 return ret;
1357 }
1358
1359 idx = edac_device_alloc_index();
1360 snprintf(name, sizeof(name), "OCX%d", idx);
1361 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx),
1362 name, 1, "CCPI", 1, 0, idx);
1363 if (!edac_dev) {
1364 dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
1365 return -ENOMEM;
1366 }
1367 ocx = edac_dev->pvt_info;
1368 ocx->edac_dev = edac_dev;
1369 ocx->com_ring_head = 0;
1370 ocx->com_ring_tail = 0;
1371 ocx->link_ring_head = 0;
1372 ocx->link_ring_tail = 0;
1373
1374 ocx->regs = pcim_iomap_table(pdev)[0];
1375 if (!ocx->regs) {
1376 dev_err(&pdev->dev, "Cannot map PCI resources\n");
1377 ret = -ENODEV;
1378 goto err_free;
1379 }
1380
1381 ocx->pdev = pdev;
1382
1383 for (i = 0; i < OCX_INTS; i++) {
1384 ocx->msix_ent[i].entry = i;
1385 ocx->msix_ent[i].vector = 0;
1386 }
1387
1388 ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS);
1389 if (ret) {
1390 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
1391 goto err_free;
1392 }
1393
1394 for (i = 0; i < OCX_INTS; i++) {
1395 ret = devm_request_threaded_irq(&pdev->dev,
1396 ocx->msix_ent[i].vector,
1397 (i == 3) ?
1398 thunderx_ocx_com_isr :
1399 thunderx_ocx_lnk_isr,
1400 (i == 3) ?
1401 thunderx_ocx_com_threaded_isr :
1402 thunderx_ocx_lnk_threaded_isr,
1403 0, "[EDAC] ThunderX OCX",
1404 &ocx->msix_ent[i]);
1405 if (ret)
1406 goto err_free;
1407 }
1408
1409 edac_dev->dev = &pdev->dev;
1410 edac_dev->dev_name = dev_name(&pdev->dev);
1411 edac_dev->mod_name = "thunderx-ocx";
1412 edac_dev->ctl_name = "thunderx-ocx";
1413
1414 ret = edac_device_add_device(edac_dev);
1415 if (ret) {
1416 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
1417 goto err_free;
1418 }
1419
1420 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
1421 ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
1422
1423 ret = thunderx_create_debugfs_nodes(ocx->debugfs,
1424 ocx_dfs_ents,
1425 ocx,
1426 ARRAY_SIZE(ocx_dfs_ents));
1427 if (ret != ARRAY_SIZE(ocx_dfs_ents)) {
1428 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
1429 ret, ret >= 0 ? " created" : "");
1430 }
1431 }
1432
1433 pci_set_drvdata(pdev, edac_dev);
1434
1435 thunderx_ocx_clearstats(ocx);
1436
1437 for (i = 0; i < OCX_RX_LANES; i++) {
1438 writeq(OCX_LNE_INT_ENA_ALL,
1439 ocx->regs + OCX_LNE_INT_EN(i));
1440
1441 reg = readq(ocx->regs + OCX_LNE_INT(i));
1442 writeq(reg, ocx->regs + OCX_LNE_INT(i));
1443
1444 }
1445
1446 for (i = 0; i < OCX_LINK_INTS; i++) {
1447 reg = readq(ocx->regs + OCX_COM_LINKX_INT(i));
1448 writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i));
1449
1450 writeq(OCX_COM_LINKX_INT_ENA_ALL,
1451 ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i));
1452 }
1453
1454 reg = readq(ocx->regs + OCX_COM_INT);
1455 writeq(reg, ocx->regs + OCX_COM_INT);
1456
1457 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S);
1458
1459 return 0;
1460 err_free:
1461 edac_device_free_ctl_info(edac_dev);
1462
1463 return ret;
1464 }
1465
thunderx_ocx_remove(struct pci_dev * pdev)1466 static void thunderx_ocx_remove(struct pci_dev *pdev)
1467 {
1468 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
1469 struct thunderx_ocx *ocx = edac_dev->pvt_info;
1470 int i;
1471
1472 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C);
1473
1474 for (i = 0; i < OCX_INTS; i++) {
1475 writeq(OCX_COM_LINKX_INT_ENA_ALL,
1476 ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i));
1477 }
1478
1479 edac_debugfs_remove_recursive(ocx->debugfs);
1480
1481 edac_device_del_device(&pdev->dev);
1482 edac_device_free_ctl_info(edac_dev);
1483 }
1484
1485 MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl);
1486
1487 static struct pci_driver thunderx_ocx_driver = {
1488 .name = "thunderx_ocx_edac",
1489 .probe = thunderx_ocx_probe,
1490 .remove = thunderx_ocx_remove,
1491 .id_table = thunderx_ocx_pci_tbl,
1492 };
1493
1494 /*---------------------- L2C driver ---------------------------------*/
1495
1496 #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e
1497 #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f
1498 #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030
1499
1500 #define L2C_TAD_INT_W1C 0x40000
1501 #define L2C_TAD_INT_W1S 0x40008
1502
1503 #define L2C_TAD_INT_ENA_W1C 0x40020
1504 #define L2C_TAD_INT_ENA_W1S 0x40028
1505
1506
1507 #define L2C_TAD_INT_L2DDBE BIT(1)
1508 #define L2C_TAD_INT_SBFSBE BIT(2)
1509 #define L2C_TAD_INT_SBFDBE BIT(3)
1510 #define L2C_TAD_INT_FBFSBE BIT(4)
1511 #define L2C_TAD_INT_FBFDBE BIT(5)
1512 #define L2C_TAD_INT_TAGDBE BIT(9)
1513 #define L2C_TAD_INT_RDDISLMC BIT(15)
1514 #define L2C_TAD_INT_WRDISLMC BIT(16)
1515 #define L2C_TAD_INT_LFBTO BIT(17)
1516 #define L2C_TAD_INT_GSYNCTO BIT(18)
1517 #define L2C_TAD_INT_RTGSBE BIT(32)
1518 #define L2C_TAD_INT_RTGDBE BIT(33)
1519 #define L2C_TAD_INT_RDDISOCI BIT(34)
1520 #define L2C_TAD_INT_WRDISOCI BIT(35)
1521
1522 #define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \
1523 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \
1524 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE)
1525
1526 #define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \
1527 L2C_TAD_INT_FBFSBE)
1528
1529 #define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \
1530 L2C_TAD_INT_SBFDBE | \
1531 L2C_TAD_INT_FBFDBE | \
1532 L2C_TAD_INT_TAGDBE | \
1533 L2C_TAD_INT_RTGDBE | \
1534 L2C_TAD_INT_WRDISOCI | \
1535 L2C_TAD_INT_RDDISOCI | \
1536 L2C_TAD_INT_WRDISLMC | \
1537 L2C_TAD_INT_RDDISLMC | \
1538 L2C_TAD_INT_LFBTO | \
1539 L2C_TAD_INT_GSYNCTO)
1540
1541 static const struct error_descr l2_tad_errors[] = {
1542 {
1543 .type = ERR_CORRECTED,
1544 .mask = L2C_TAD_INT_SBFSBE,
1545 .descr = "SBF single-bit error",
1546 },
1547 {
1548 .type = ERR_CORRECTED,
1549 .mask = L2C_TAD_INT_FBFSBE,
1550 .descr = "FBF single-bit error",
1551 },
1552 {
1553 .type = ERR_UNCORRECTED,
1554 .mask = L2C_TAD_INT_L2DDBE,
1555 .descr = "L2D double-bit error",
1556 },
1557 {
1558 .type = ERR_UNCORRECTED,
1559 .mask = L2C_TAD_INT_SBFDBE,
1560 .descr = "SBF double-bit error",
1561 },
1562 {
1563 .type = ERR_UNCORRECTED,
1564 .mask = L2C_TAD_INT_FBFDBE,
1565 .descr = "FBF double-bit error",
1566 },
1567 {
1568 .type = ERR_UNCORRECTED,
1569 .mask = L2C_TAD_INT_TAGDBE,
1570 .descr = "TAG double-bit error",
1571 },
1572 {
1573 .type = ERR_UNCORRECTED,
1574 .mask = L2C_TAD_INT_RTGDBE,
1575 .descr = "RTG double-bit error",
1576 },
1577 {
1578 .type = ERR_UNCORRECTED,
1579 .mask = L2C_TAD_INT_WRDISOCI,
1580 .descr = "Write to a disabled CCPI",
1581 },
1582 {
1583 .type = ERR_UNCORRECTED,
1584 .mask = L2C_TAD_INT_RDDISOCI,
1585 .descr = "Read from a disabled CCPI",
1586 },
1587 {
1588 .type = ERR_UNCORRECTED,
1589 .mask = L2C_TAD_INT_WRDISLMC,
1590 .descr = "Write to a disabled LMC",
1591 },
1592 {
1593 .type = ERR_UNCORRECTED,
1594 .mask = L2C_TAD_INT_RDDISLMC,
1595 .descr = "Read from a disabled LMC",
1596 },
1597 {
1598 .type = ERR_UNCORRECTED,
1599 .mask = L2C_TAD_INT_LFBTO,
1600 .descr = "LFB entry timeout",
1601 },
1602 {
1603 .type = ERR_UNCORRECTED,
1604 .mask = L2C_TAD_INT_GSYNCTO,
1605 .descr = "Global sync CCPI timeout",
1606 },
1607 {0, 0, NULL},
1608 };
1609
1610 #define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE)
1611
1612 #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE)
1613
1614 #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC)
1615
1616 #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI)
1617
1618 #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \
1619 L2C_TAD_INT_RTG | \
1620 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \
1621 L2C_TAD_INT_LFBTO)
1622
1623 #define L2C_TAD_TIMETWO 0x50000
1624 #define L2C_TAD_TIMEOUT 0x50100
1625 #define L2C_TAD_ERR 0x60000
1626 #define L2C_TAD_TQD_ERR 0x60100
1627 #define L2C_TAD_TTG_ERR 0x60200
1628
1629
1630 #define L2C_CBC_INT_W1C 0x60000
1631
1632 #define L2C_CBC_INT_RSDSBE BIT(0)
1633 #define L2C_CBC_INT_RSDDBE BIT(1)
1634
1635 #define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE)
1636
1637 #define L2C_CBC_INT_MIBSBE BIT(4)
1638 #define L2C_CBC_INT_MIBDBE BIT(5)
1639
1640 #define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE)
1641
1642 #define L2C_CBC_INT_IORDDISOCI BIT(6)
1643 #define L2C_CBC_INT_IOWRDISOCI BIT(7)
1644
1645 #define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \
1646 L2C_CBC_INT_IOWRDISOCI)
1647
1648 #define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE)
1649 #define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE)
1650
1651
1652 static const struct error_descr l2_cbc_errors[] = {
1653 {
1654 .type = ERR_CORRECTED,
1655 .mask = L2C_CBC_INT_RSDSBE,
1656 .descr = "RSD single-bit error",
1657 },
1658 {
1659 .type = ERR_CORRECTED,
1660 .mask = L2C_CBC_INT_MIBSBE,
1661 .descr = "MIB single-bit error",
1662 },
1663 {
1664 .type = ERR_UNCORRECTED,
1665 .mask = L2C_CBC_INT_RSDDBE,
1666 .descr = "RSD double-bit error",
1667 },
1668 {
1669 .type = ERR_UNCORRECTED,
1670 .mask = L2C_CBC_INT_MIBDBE,
1671 .descr = "MIB double-bit error",
1672 },
1673 {
1674 .type = ERR_UNCORRECTED,
1675 .mask = L2C_CBC_INT_IORDDISOCI,
1676 .descr = "Read from a disabled CCPI",
1677 },
1678 {
1679 .type = ERR_UNCORRECTED,
1680 .mask = L2C_CBC_INT_IOWRDISOCI,
1681 .descr = "Write to a disabled CCPI",
1682 },
1683 {0, 0, NULL},
1684 };
1685
1686 #define L2C_CBC_INT_W1S 0x60008
1687 #define L2C_CBC_INT_ENA_W1C 0x60020
1688
1689 #define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \
1690 L2C_CBC_INT_IODISOCI)
1691
1692 #define L2C_CBC_INT_ENA_W1S 0x60028
1693
1694 #define L2C_CBC_IODISOCIERR 0x80008
1695 #define L2C_CBC_IOCERR 0x80010
1696 #define L2C_CBC_RSDERR 0x80018
1697 #define L2C_CBC_MIBERR 0x80020
1698
1699
1700 #define L2C_MCI_INT_W1C 0x0
1701
1702 #define L2C_MCI_INT_VBFSBE BIT(0)
1703 #define L2C_MCI_INT_VBFDBE BIT(1)
1704
1705 static const struct error_descr l2_mci_errors[] = {
1706 {
1707 .type = ERR_CORRECTED,
1708 .mask = L2C_MCI_INT_VBFSBE,
1709 .descr = "VBF single-bit error",
1710 },
1711 {
1712 .type = ERR_UNCORRECTED,
1713 .mask = L2C_MCI_INT_VBFDBE,
1714 .descr = "VBF double-bit error",
1715 },
1716 {0, 0, NULL},
1717 };
1718
1719 #define L2C_MCI_INT_W1S 0x8
1720 #define L2C_MCI_INT_ENA_W1C 0x20
1721
1722 #define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE)
1723
1724 #define L2C_MCI_INT_ENA_W1S 0x28
1725
1726 #define L2C_MCI_ERR 0x10000
1727
1728 #define L2C_MESSAGE_SIZE SZ_1K
1729 #define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors))
1730
1731 struct l2c_err_ctx {
1732 char *reg_ext_name;
1733 u64 reg_int;
1734 u64 reg_ext;
1735 };
1736
1737 struct thunderx_l2c {
1738 void __iomem *regs;
1739 struct pci_dev *pdev;
1740 struct edac_device_ctl_info *edac_dev;
1741
1742 struct dentry *debugfs;
1743
1744 int index;
1745
1746 struct msix_entry msix_ent;
1747
1748 struct l2c_err_ctx err_ctx[RING_ENTRIES];
1749 unsigned long ring_head;
1750 unsigned long ring_tail;
1751 };
1752
thunderx_l2c_tad_isr(int irq,void * irq_id)1753 static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id)
1754 {
1755 struct msix_entry *msix = irq_id;
1756 struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c,
1757 msix_ent);
1758
1759 unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx));
1760 struct l2c_err_ctx *ctx = &tad->err_ctx[head];
1761
1762 ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C);
1763
1764 if (ctx->reg_int & L2C_TAD_INT_ECC) {
1765 ctx->reg_ext_name = "TQD_ERR";
1766 ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR);
1767 } else if (ctx->reg_int & L2C_TAD_INT_TAG) {
1768 ctx->reg_ext_name = "TTG_ERR";
1769 ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR);
1770 } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) {
1771 ctx->reg_ext_name = "TIMEOUT";
1772 ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT);
1773 } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) {
1774 ctx->reg_ext_name = "ERR";
1775 ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR);
1776 }
1777
1778 writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C);
1779
1780 tad->ring_head++;
1781
1782 return IRQ_WAKE_THREAD;
1783 }
1784
thunderx_l2c_cbc_isr(int irq,void * irq_id)1785 static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id)
1786 {
1787 struct msix_entry *msix = irq_id;
1788 struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c,
1789 msix_ent);
1790
1791 unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx));
1792 struct l2c_err_ctx *ctx = &cbc->err_ctx[head];
1793
1794 ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C);
1795
1796 if (ctx->reg_int & L2C_CBC_INT_RSD) {
1797 ctx->reg_ext_name = "RSDERR";
1798 ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR);
1799 } else if (ctx->reg_int & L2C_CBC_INT_MIB) {
1800 ctx->reg_ext_name = "MIBERR";
1801 ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR);
1802 } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) {
1803 ctx->reg_ext_name = "IODISOCIERR";
1804 ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR);
1805 }
1806
1807 writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C);
1808
1809 cbc->ring_head++;
1810
1811 return IRQ_WAKE_THREAD;
1812 }
1813
thunderx_l2c_mci_isr(int irq,void * irq_id)1814 static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id)
1815 {
1816 struct msix_entry *msix = irq_id;
1817 struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c,
1818 msix_ent);
1819
1820 unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx));
1821 struct l2c_err_ctx *ctx = &mci->err_ctx[head];
1822
1823 ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C);
1824 ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR);
1825
1826 writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C);
1827
1828 ctx->reg_ext_name = "ERR";
1829
1830 mci->ring_head++;
1831
1832 return IRQ_WAKE_THREAD;
1833 }
1834
thunderx_l2c_threaded_isr(int irq,void * irq_id)1835 static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id)
1836 {
1837 struct msix_entry *msix = irq_id;
1838 struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c,
1839 msix_ent);
1840
1841 unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx));
1842 struct l2c_err_ctx *ctx = &l2c->err_ctx[tail];
1843 irqreturn_t ret = IRQ_NONE;
1844
1845 u64 mask_ue, mask_ce;
1846 const struct error_descr *l2_errors;
1847 char *reg_int_name;
1848
1849 char *msg;
1850 char *other;
1851
1852 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL);
1853 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL);
1854
1855 if (!msg || !other)
1856 goto err_free;
1857
1858 switch (l2c->pdev->device) {
1859 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1860 reg_int_name = "L2C_TAD_INT";
1861 mask_ue = L2C_TAD_INT_UE;
1862 mask_ce = L2C_TAD_INT_CE;
1863 l2_errors = l2_tad_errors;
1864 break;
1865 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1866 reg_int_name = "L2C_CBC_INT";
1867 mask_ue = L2C_CBC_INT_UE;
1868 mask_ce = L2C_CBC_INT_CE;
1869 l2_errors = l2_cbc_errors;
1870 break;
1871 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1872 reg_int_name = "L2C_MCI_INT";
1873 mask_ue = L2C_MCI_INT_VBFDBE;
1874 mask_ce = L2C_MCI_INT_VBFSBE;
1875 l2_errors = l2_mci_errors;
1876 break;
1877 default:
1878 dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n",
1879 l2c->pdev->device);
1880 goto err_free;
1881 }
1882
1883 while (CIRC_CNT(l2c->ring_head, l2c->ring_tail,
1884 ARRAY_SIZE(l2c->err_ctx))) {
1885 snprintf(msg, L2C_MESSAGE_SIZE,
1886 "%s: %s: %016llx, %s: %016llx",
1887 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int,
1888 ctx->reg_ext_name, ctx->reg_ext);
1889
1890 decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int);
1891
1892 strlcat(msg, other, L2C_MESSAGE_SIZE);
1893
1894 if (ctx->reg_int & mask_ue)
1895 edac_device_handle_ue(l2c->edac_dev, 0, 0, msg);
1896 else if (ctx->reg_int & mask_ce)
1897 edac_device_handle_ce(l2c->edac_dev, 0, 0, msg);
1898
1899 l2c->ring_tail++;
1900 }
1901
1902 ret = IRQ_HANDLED;
1903
1904 err_free:
1905 kfree(other);
1906 kfree(msg);
1907
1908 return ret;
1909 }
1910
1911 #define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg)
1912
1913 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S);
1914
1915 static struct debugfs_entry *l2c_tad_dfs_ents[] = {
1916 &debugfs_tad_int,
1917 };
1918
1919 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S);
1920
1921 static struct debugfs_entry *l2c_cbc_dfs_ents[] = {
1922 &debugfs_cbc_int,
1923 };
1924
1925 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S);
1926
1927 static struct debugfs_entry *l2c_mci_dfs_ents[] = {
1928 &debugfs_mci_int,
1929 };
1930
1931 static const struct pci_device_id thunderx_l2c_pci_tbl[] = {
1932 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), },
1933 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), },
1934 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), },
1935 { 0, },
1936 };
1937
thunderx_l2c_probe(struct pci_dev * pdev,const struct pci_device_id * id)1938 static int thunderx_l2c_probe(struct pci_dev *pdev,
1939 const struct pci_device_id *id)
1940 {
1941 struct thunderx_l2c *l2c;
1942 struct edac_device_ctl_info *edac_dev;
1943 struct debugfs_entry **l2c_devattr;
1944 size_t dfs_entries;
1945 irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL;
1946 char name[32];
1947 const char *fmt;
1948 u64 reg_en_offs, reg_en_mask;
1949 int idx;
1950 int ret;
1951
1952 ret = pcim_enable_device(pdev);
1953 if (ret) {
1954 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret);
1955 return ret;
1956 }
1957
1958 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c");
1959 if (ret) {
1960 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret);
1961 return ret;
1962 }
1963
1964 switch (pdev->device) {
1965 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
1966 thunderx_l2c_isr = thunderx_l2c_tad_isr;
1967 l2c_devattr = l2c_tad_dfs_ents;
1968 dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents);
1969 fmt = "L2C-TAD%d";
1970 reg_en_offs = L2C_TAD_INT_ENA_W1S;
1971 reg_en_mask = L2C_TAD_INT_ENA_ALL;
1972 break;
1973 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
1974 thunderx_l2c_isr = thunderx_l2c_cbc_isr;
1975 l2c_devattr = l2c_cbc_dfs_ents;
1976 dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents);
1977 fmt = "L2C-CBC%d";
1978 reg_en_offs = L2C_CBC_INT_ENA_W1S;
1979 reg_en_mask = L2C_CBC_INT_ENA_ALL;
1980 break;
1981 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
1982 thunderx_l2c_isr = thunderx_l2c_mci_isr;
1983 l2c_devattr = l2c_mci_dfs_ents;
1984 dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents);
1985 fmt = "L2C-MCI%d";
1986 reg_en_offs = L2C_MCI_INT_ENA_W1S;
1987 reg_en_mask = L2C_MCI_INT_ENA_ALL;
1988 break;
1989 default:
1990 //Should never ever get here
1991 dev_err(&pdev->dev, "Unsupported PCI device: %04x\n",
1992 pdev->device);
1993 return -EINVAL;
1994 }
1995
1996 idx = edac_device_alloc_index();
1997 snprintf(name, sizeof(name), fmt, idx);
1998
1999 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c),
2000 name, 1, "L2C", 1, 0, idx);
2001 if (!edac_dev) {
2002 dev_err(&pdev->dev, "Cannot allocate EDAC device\n");
2003 return -ENOMEM;
2004 }
2005
2006 l2c = edac_dev->pvt_info;
2007 l2c->edac_dev = edac_dev;
2008
2009 l2c->regs = pcim_iomap_table(pdev)[0];
2010 if (!l2c->regs) {
2011 dev_err(&pdev->dev, "Cannot map PCI resources\n");
2012 ret = -ENODEV;
2013 goto err_free;
2014 }
2015
2016 l2c->pdev = pdev;
2017
2018 l2c->ring_head = 0;
2019 l2c->ring_tail = 0;
2020
2021 l2c->msix_ent.entry = 0;
2022 l2c->msix_ent.vector = 0;
2023
2024 ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1);
2025 if (ret) {
2026 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret);
2027 goto err_free;
2028 }
2029
2030 ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector,
2031 thunderx_l2c_isr,
2032 thunderx_l2c_threaded_isr,
2033 0, "[EDAC] ThunderX L2C",
2034 &l2c->msix_ent);
2035 if (ret)
2036 goto err_free;
2037
2038 edac_dev->dev = &pdev->dev;
2039 edac_dev->dev_name = dev_name(&pdev->dev);
2040 edac_dev->mod_name = "thunderx-l2c";
2041 edac_dev->ctl_name = "thunderx-l2c";
2042
2043 ret = edac_device_add_device(edac_dev);
2044 if (ret) {
2045 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret);
2046 goto err_free;
2047 }
2048
2049 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) {
2050 l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name);
2051
2052 ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr,
2053 l2c, dfs_entries);
2054
2055 if (ret != dfs_entries) {
2056 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n",
2057 ret, ret >= 0 ? " created" : "");
2058 }
2059 }
2060
2061 pci_set_drvdata(pdev, edac_dev);
2062
2063 writeq(reg_en_mask, l2c->regs + reg_en_offs);
2064
2065 return 0;
2066
2067 err_free:
2068 edac_device_free_ctl_info(edac_dev);
2069
2070 return ret;
2071 }
2072
thunderx_l2c_remove(struct pci_dev * pdev)2073 static void thunderx_l2c_remove(struct pci_dev *pdev)
2074 {
2075 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev);
2076 struct thunderx_l2c *l2c = edac_dev->pvt_info;
2077
2078 switch (pdev->device) {
2079 case PCI_DEVICE_ID_THUNDER_L2C_TAD:
2080 writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C);
2081 break;
2082 case PCI_DEVICE_ID_THUNDER_L2C_CBC:
2083 writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C);
2084 break;
2085 case PCI_DEVICE_ID_THUNDER_L2C_MCI:
2086 writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C);
2087 break;
2088 }
2089
2090 edac_debugfs_remove_recursive(l2c->debugfs);
2091
2092 edac_device_del_device(&pdev->dev);
2093 edac_device_free_ctl_info(edac_dev);
2094 }
2095
2096 MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl);
2097
2098 static struct pci_driver thunderx_l2c_driver = {
2099 .name = "thunderx_l2c_edac",
2100 .probe = thunderx_l2c_probe,
2101 .remove = thunderx_l2c_remove,
2102 .id_table = thunderx_l2c_pci_tbl,
2103 };
2104
thunderx_edac_init(void)2105 static int __init thunderx_edac_init(void)
2106 {
2107 int rc = 0;
2108
2109 if (ghes_get_devices())
2110 return -EBUSY;
2111
2112 rc = pci_register_driver(&thunderx_lmc_driver);
2113 if (rc)
2114 return rc;
2115
2116 rc = pci_register_driver(&thunderx_ocx_driver);
2117 if (rc)
2118 goto err_lmc;
2119
2120 rc = pci_register_driver(&thunderx_l2c_driver);
2121 if (rc)
2122 goto err_ocx;
2123
2124 return rc;
2125 err_ocx:
2126 pci_unregister_driver(&thunderx_ocx_driver);
2127 err_lmc:
2128 pci_unregister_driver(&thunderx_lmc_driver);
2129
2130 return rc;
2131 }
2132
thunderx_edac_exit(void)2133 static void __exit thunderx_edac_exit(void)
2134 {
2135 pci_unregister_driver(&thunderx_l2c_driver);
2136 pci_unregister_driver(&thunderx_ocx_driver);
2137 pci_unregister_driver(&thunderx_lmc_driver);
2138
2139 }
2140
2141 module_init(thunderx_edac_init);
2142 module_exit(thunderx_edac_exit);
2143
2144 MODULE_LICENSE("GPL v2");
2145 MODULE_AUTHOR("Cavium, Inc.");
2146 MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX");
2147