1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Xilinx ZynqMP OCM ECC Driver
4 *
5 * Copyright (C) 2022 Advanced Micro Devices, Inc.
6 */
7
8 #include <linux/edac.h>
9 #include <linux/interrupt.h>
10 #include <linux/module.h>
11 #include <linux/of.h>
12 #include <linux/of_platform.h>
13 #include <linux/platform_device.h>
14
15 #include "edac_module.h"
16
17 #define ZYNQMP_OCM_EDAC_MSG_SIZE 256
18
19 #define ZYNQMP_OCM_EDAC_STRING "zynqmp_ocm"
20
21 /* Error/Interrupt registers */
22 #define ERR_CTRL_OFST 0x0
23 #define OCM_ISR_OFST 0x04
24 #define OCM_IMR_OFST 0x08
25 #define OCM_IEN_OFST 0x0C
26 #define OCM_IDS_OFST 0x10
27
28 /* ECC control register */
29 #define ECC_CTRL_OFST 0x14
30
31 /* Correctable error info registers */
32 #define CE_FFA_OFST 0x1C
33 #define CE_FFD0_OFST 0x20
34 #define CE_FFD1_OFST 0x24
35 #define CE_FFD2_OFST 0x28
36 #define CE_FFD3_OFST 0x2C
37 #define CE_FFE_OFST 0x30
38
39 /* Uncorrectable error info registers */
40 #define UE_FFA_OFST 0x34
41 #define UE_FFD0_OFST 0x38
42 #define UE_FFD1_OFST 0x3C
43 #define UE_FFD2_OFST 0x40
44 #define UE_FFD3_OFST 0x44
45 #define UE_FFE_OFST 0x48
46
47 /* ECC control register bit field definitions */
48 #define ECC_CTRL_CLR_CE_ERR 0x40
49 #define ECC_CTRL_CLR_UE_ERR 0x80
50
51 /* Fault injection data and count registers */
52 #define OCM_FID0_OFST 0x4C
53 #define OCM_FID1_OFST 0x50
54 #define OCM_FID2_OFST 0x54
55 #define OCM_FID3_OFST 0x58
56 #define OCM_FIC_OFST 0x74
57
58 #define UE_MAX_BITPOS_LOWER 31
59 #define UE_MIN_BITPOS_UPPER 32
60 #define UE_MAX_BITPOS_UPPER 63
61
62 /* Interrupt masks */
63 #define OCM_CEINTR_MASK BIT(6)
64 #define OCM_UEINTR_MASK BIT(7)
65 #define OCM_ECC_ENABLE_MASK BIT(0)
66
67 #define OCM_FICOUNT_MASK GENMASK(23, 0)
68 #define OCM_NUM_UE_BITPOS 2
69 #define OCM_BASEVAL 0xFFFC0000
70 #define EDAC_DEVICE "ZynqMP-OCM"
71
72 /**
73 * struct ecc_error_info - ECC error log information
74 * @addr: Fault generated at this address
75 * @fault_lo: Generated fault data (lower 32-bit)
76 * @fault_hi: Generated fault data (upper 32-bit)
77 */
78 struct ecc_error_info {
79 u32 addr;
80 u32 fault_lo;
81 u32 fault_hi;
82 };
83
84 /**
85 * struct ecc_status - ECC status information to report
86 * @ce_cnt: Correctable error count
87 * @ue_cnt: Uncorrectable error count
88 * @ceinfo: Correctable error log information
89 * @ueinfo: Uncorrectable error log information
90 */
91 struct ecc_status {
92 u32 ce_cnt;
93 u32 ue_cnt;
94 struct ecc_error_info ceinfo;
95 struct ecc_error_info ueinfo;
96 };
97
98 /**
99 * struct edac_priv - OCM private instance data
100 * @baseaddr: Base address of the OCM
101 * @message: Buffer for framing the event specific info
102 * @stat: ECC status information
103 * @ce_cnt: Correctable Error count
104 * @ue_cnt: Uncorrectable Error count
105 * @debugfs_dir: Directory entry for debugfs
106 * @ce_bitpos: Bit position for Correctable Error
107 * @ue_bitpos: Array to store UnCorrectable Error bit positions
108 * @fault_injection_cnt: Fault Injection Counter value
109 */
110 struct edac_priv {
111 void __iomem *baseaddr;
112 char message[ZYNQMP_OCM_EDAC_MSG_SIZE];
113 struct ecc_status stat;
114 u32 ce_cnt;
115 u32 ue_cnt;
116 #ifdef CONFIG_EDAC_DEBUG
117 struct dentry *debugfs_dir;
118 u8 ce_bitpos;
119 u8 ue_bitpos[OCM_NUM_UE_BITPOS];
120 u32 fault_injection_cnt;
121 #endif
122 };
123
124 /**
125 * get_error_info - Get the current ECC error info
126 * @base: Pointer to the base address of the OCM
127 * @p: Pointer to the OCM ECC status structure
128 * @mask: Status register mask value
129 *
130 * Determines there is any ECC error or not
131 *
132 */
get_error_info(void __iomem * base,struct ecc_status * p,int mask)133 static void get_error_info(void __iomem *base, struct ecc_status *p, int mask)
134 {
135 if (mask & OCM_CEINTR_MASK) {
136 p->ce_cnt++;
137 p->ceinfo.fault_lo = readl(base + CE_FFD0_OFST);
138 p->ceinfo.fault_hi = readl(base + CE_FFD1_OFST);
139 p->ceinfo.addr = (OCM_BASEVAL | readl(base + CE_FFA_OFST));
140 writel(ECC_CTRL_CLR_CE_ERR, base + OCM_ISR_OFST);
141 } else if (mask & OCM_UEINTR_MASK) {
142 p->ue_cnt++;
143 p->ueinfo.fault_lo = readl(base + UE_FFD0_OFST);
144 p->ueinfo.fault_hi = readl(base + UE_FFD1_OFST);
145 p->ueinfo.addr = (OCM_BASEVAL | readl(base + UE_FFA_OFST));
146 writel(ECC_CTRL_CLR_UE_ERR, base + OCM_ISR_OFST);
147 }
148 }
149
150 /**
151 * handle_error - Handle error types CE and UE
152 * @dci: Pointer to the EDAC device instance
153 * @p: Pointer to the OCM ECC status structure
154 *
155 * Handles correctable and uncorrectable errors.
156 */
handle_error(struct edac_device_ctl_info * dci,struct ecc_status * p)157 static void handle_error(struct edac_device_ctl_info *dci, struct ecc_status *p)
158 {
159 struct edac_priv *priv = dci->pvt_info;
160 struct ecc_error_info *pinf;
161
162 if (p->ce_cnt) {
163 pinf = &p->ceinfo;
164 snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
165 "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
166 "CE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
167 edac_device_handle_ce(dci, 0, 0, priv->message);
168 }
169
170 if (p->ue_cnt) {
171 pinf = &p->ueinfo;
172 snprintf(priv->message, ZYNQMP_OCM_EDAC_MSG_SIZE,
173 "\nOCM ECC error type :%s\nAddr: [0x%x]\nFault Data[0x%08x%08x]",
174 "UE", pinf->addr, pinf->fault_hi, pinf->fault_lo);
175 edac_device_handle_ue(dci, 0, 0, priv->message);
176 }
177
178 memset(p, 0, sizeof(*p));
179 }
180
181 /**
182 * intr_handler - ISR routine
183 * @irq: irq number
184 * @dev_id: device id pointer
185 *
186 * Return: IRQ_NONE, if CE/UE interrupt not set or IRQ_HANDLED otherwise
187 */
intr_handler(int irq,void * dev_id)188 static irqreturn_t intr_handler(int irq, void *dev_id)
189 {
190 struct edac_device_ctl_info *dci = dev_id;
191 struct edac_priv *priv = dci->pvt_info;
192 int regval;
193
194 regval = readl(priv->baseaddr + OCM_ISR_OFST);
195 if (!(regval & (OCM_CEINTR_MASK | OCM_UEINTR_MASK))) {
196 WARN_ONCE(1, "Unhandled IRQ%d, ISR: 0x%x", irq, regval);
197 return IRQ_NONE;
198 }
199
200 get_error_info(priv->baseaddr, &priv->stat, regval);
201
202 priv->ce_cnt += priv->stat.ce_cnt;
203 priv->ue_cnt += priv->stat.ue_cnt;
204 handle_error(dci, &priv->stat);
205
206 return IRQ_HANDLED;
207 }
208
209 /**
210 * get_eccstate - Return the ECC status
211 * @base: Pointer to the OCM base address
212 *
213 * Get the ECC enable/disable status
214 *
215 * Return: ECC status 0/1.
216 */
get_eccstate(void __iomem * base)217 static bool get_eccstate(void __iomem *base)
218 {
219 return readl(base + ECC_CTRL_OFST) & OCM_ECC_ENABLE_MASK;
220 }
221
222 #ifdef CONFIG_EDAC_DEBUG
223 /**
224 * write_fault_count - write fault injection count
225 * @priv: Pointer to the EDAC private struct
226 *
227 * Update the fault injection count register, once the counter reaches
228 * zero, it injects errors
229 */
write_fault_count(struct edac_priv * priv)230 static void write_fault_count(struct edac_priv *priv)
231 {
232 u32 ficount = priv->fault_injection_cnt;
233
234 if (ficount & ~OCM_FICOUNT_MASK) {
235 ficount &= OCM_FICOUNT_MASK;
236 edac_printk(KERN_INFO, EDAC_DEVICE,
237 "Fault injection count value truncated to %d\n", ficount);
238 }
239
240 writel(ficount, priv->baseaddr + OCM_FIC_OFST);
241 }
242
243 /*
244 * To get the Correctable Error injected, the following steps are needed:
245 * - Setup the optional Fault Injection Count:
246 * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
247 * - Write the Correctable Error bit position value:
248 * echo <bit_pos val> > /sys/kernel/debug/edac/ocm/inject_ce_bitpos
249 */
inject_ce_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)250 static ssize_t inject_ce_write(struct file *file, const char __user *data,
251 size_t count, loff_t *ppos)
252 {
253 struct edac_device_ctl_info *edac_dev = file->private_data;
254 struct edac_priv *priv = edac_dev->pvt_info;
255 int ret;
256
257 if (!data)
258 return -EFAULT;
259
260 ret = kstrtou8_from_user(data, count, 0, &priv->ce_bitpos);
261 if (ret)
262 return ret;
263
264 if (priv->ce_bitpos > UE_MAX_BITPOS_UPPER)
265 return -EINVAL;
266
267 if (priv->ce_bitpos <= UE_MAX_BITPOS_LOWER) {
268 writel(BIT(priv->ce_bitpos), priv->baseaddr + OCM_FID0_OFST);
269 writel(0, priv->baseaddr + OCM_FID1_OFST);
270 } else {
271 writel(BIT(priv->ce_bitpos - UE_MIN_BITPOS_UPPER),
272 priv->baseaddr + OCM_FID1_OFST);
273 writel(0, priv->baseaddr + OCM_FID0_OFST);
274 }
275
276 write_fault_count(priv);
277
278 return count;
279 }
280
281 static const struct file_operations inject_ce_fops = {
282 .open = simple_open,
283 .write = inject_ce_write,
284 .llseek = generic_file_llseek,
285 };
286
287 /*
288 * To get the Uncorrectable Error injected, the following steps are needed:
289 * - Setup the optional Fault Injection Count:
290 * echo <fault_count val> > /sys/kernel/debug/edac/ocm/inject_fault_count
291 * - Write the Uncorrectable Error bit position values:
292 * echo <bit_pos0 val>,<bit_pos1 val> > /sys/kernel/debug/edac/ocm/inject_ue_bitpos
293 */
inject_ue_write(struct file * file,const char __user * data,size_t count,loff_t * ppos)294 static ssize_t inject_ue_write(struct file *file, const char __user *data,
295 size_t count, loff_t *ppos)
296 {
297 struct edac_device_ctl_info *edac_dev = file->private_data;
298 struct edac_priv *priv = edac_dev->pvt_info;
299 char buf[6], *pbuf, *token[2];
300 u64 ue_bitpos;
301 int i, ret;
302 u8 len;
303
304 if (!data)
305 return -EFAULT;
306
307 len = min_t(size_t, count, sizeof(buf));
308 if (copy_from_user(buf, data, len))
309 return -EFAULT;
310
311 buf[len] = '\0';
312 pbuf = &buf[0];
313 for (i = 0; i < OCM_NUM_UE_BITPOS; i++)
314 token[i] = strsep(&pbuf, ",");
315
316 ret = kstrtou8(token[0], 0, &priv->ue_bitpos[0]);
317 if (ret)
318 return ret;
319
320 ret = kstrtou8(token[1], 0, &priv->ue_bitpos[1]);
321 if (ret)
322 return ret;
323
324 if (priv->ue_bitpos[0] > UE_MAX_BITPOS_UPPER ||
325 priv->ue_bitpos[1] > UE_MAX_BITPOS_UPPER)
326 return -EINVAL;
327
328 if (priv->ue_bitpos[0] == priv->ue_bitpos[1]) {
329 edac_printk(KERN_ERR, EDAC_DEVICE, "Bit positions should not be equal\n");
330 return -EINVAL;
331 }
332
333 ue_bitpos = BIT(priv->ue_bitpos[0]) | BIT(priv->ue_bitpos[1]);
334
335 writel((u32)ue_bitpos, priv->baseaddr + OCM_FID0_OFST);
336 writel((u32)(ue_bitpos >> 32), priv->baseaddr + OCM_FID1_OFST);
337
338 write_fault_count(priv);
339
340 return count;
341 }
342
343 static const struct file_operations inject_ue_fops = {
344 .open = simple_open,
345 .write = inject_ue_write,
346 .llseek = generic_file_llseek,
347 };
348
setup_debugfs(struct edac_device_ctl_info * edac_dev)349 static void setup_debugfs(struct edac_device_ctl_info *edac_dev)
350 {
351 struct edac_priv *priv = edac_dev->pvt_info;
352
353 priv->debugfs_dir = edac_debugfs_create_dir("ocm");
354 if (!priv->debugfs_dir)
355 return;
356
357 edac_debugfs_create_x32("inject_fault_count", 0644, priv->debugfs_dir,
358 &priv->fault_injection_cnt);
359 edac_debugfs_create_file("inject_ue_bitpos", 0644, priv->debugfs_dir,
360 edac_dev, &inject_ue_fops);
361 edac_debugfs_create_file("inject_ce_bitpos", 0644, priv->debugfs_dir,
362 edac_dev, &inject_ce_fops);
363 }
364 #endif
365
edac_probe(struct platform_device * pdev)366 static int edac_probe(struct platform_device *pdev)
367 {
368 struct edac_device_ctl_info *dci;
369 struct edac_priv *priv;
370 void __iomem *baseaddr;
371 struct resource *res;
372 int irq, ret;
373
374 baseaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
375 if (IS_ERR(baseaddr))
376 return PTR_ERR(baseaddr);
377
378 if (!get_eccstate(baseaddr)) {
379 edac_printk(KERN_INFO, EDAC_DEVICE, "ECC not enabled\n");
380 return -ENXIO;
381 }
382
383 dci = edac_device_alloc_ctl_info(sizeof(*priv), ZYNQMP_OCM_EDAC_STRING,
384 1, ZYNQMP_OCM_EDAC_STRING, 1, 0,
385 edac_device_alloc_index());
386 if (!dci)
387 return -ENOMEM;
388
389 priv = dci->pvt_info;
390 platform_set_drvdata(pdev, dci);
391 dci->dev = &pdev->dev;
392 priv->baseaddr = baseaddr;
393 dci->mod_name = pdev->dev.driver->name;
394 dci->ctl_name = ZYNQMP_OCM_EDAC_STRING;
395 dci->dev_name = dev_name(&pdev->dev);
396
397 irq = platform_get_irq(pdev, 0);
398 if (irq < 0) {
399 ret = irq;
400 goto free_dev_ctl;
401 }
402
403 ret = devm_request_irq(&pdev->dev, irq, intr_handler, 0,
404 dev_name(&pdev->dev), dci);
405 if (ret) {
406 edac_printk(KERN_ERR, EDAC_DEVICE, "Failed to request Irq\n");
407 goto free_dev_ctl;
408 }
409
410 /* Enable UE, CE interrupts */
411 writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IEN_OFST);
412
413 #ifdef CONFIG_EDAC_DEBUG
414 setup_debugfs(dci);
415 #endif
416
417 ret = edac_device_add_device(dci);
418 if (ret)
419 goto free_dev_ctl;
420
421 return 0;
422
423 free_dev_ctl:
424 edac_device_free_ctl_info(dci);
425
426 return ret;
427 }
428
edac_remove(struct platform_device * pdev)429 static void edac_remove(struct platform_device *pdev)
430 {
431 struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
432 struct edac_priv *priv = dci->pvt_info;
433
434 /* Disable UE, CE interrupts */
435 writel((OCM_CEINTR_MASK | OCM_UEINTR_MASK), priv->baseaddr + OCM_IDS_OFST);
436
437 #ifdef CONFIG_EDAC_DEBUG
438 debugfs_remove_recursive(priv->debugfs_dir);
439 #endif
440
441 edac_device_del_device(&pdev->dev);
442 edac_device_free_ctl_info(dci);
443 }
444
445 static const struct of_device_id zynqmp_ocm_edac_match[] = {
446 { .compatible = "xlnx,zynqmp-ocmc-1.0"},
447 { /* end of table */ }
448 };
449
450 MODULE_DEVICE_TABLE(of, zynqmp_ocm_edac_match);
451
452 static struct platform_driver zynqmp_ocm_edac_driver = {
453 .driver = {
454 .name = "zynqmp-ocm-edac",
455 .of_match_table = zynqmp_ocm_edac_match,
456 },
457 .probe = edac_probe,
458 .remove_new = edac_remove,
459 };
460
461 module_platform_driver(zynqmp_ocm_edac_driver);
462
463 MODULE_AUTHOR("Advanced Micro Devices, Inc");
464 MODULE_DESCRIPTION("Xilinx ZynqMP OCM ECC driver");
465 MODULE_LICENSE("GPL");
466