xref: /qemu/hw/pci/pcie_aer.c (revision ca61e750)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "sysemu/sysemu.h"
23 #include "qapi/qmp/qdict.h"
24 #include "migration/vmstate.h"
25 #include "monitor/monitor.h"
26 #include "hw/pci/pci_bridge.h"
27 #include "hw/pci/pcie.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/msi.h"
30 #include "hw/pci/pci_bus.h"
31 #include "hw/pci/pcie_regs.h"
32 #include "qapi/error.h"
33 
34 //#define DEBUG_PCIE
35 #ifdef DEBUG_PCIE
36 # define PCIE_DPRINTF(fmt, ...)                                         \
37     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
38 #else
39 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
40 #endif
41 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
42     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
43 
44 #define PCI_ERR_SRC_COR_OFFS    0
45 #define PCI_ERR_SRC_UNCOR_OFFS  2
46 
47 typedef struct PCIEErrorDetails {
48     const char *id;
49     const char *root_bus;
50     int bus;
51     int devfn;
52 } PCIEErrorDetails;
53 
54 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
55 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
56 {
57     switch (status) {
58     case PCI_ERR_UNC_INTN:
59     case PCI_ERR_UNC_DLP:
60     case PCI_ERR_UNC_SDN:
61     case PCI_ERR_UNC_RX_OVER:
62     case PCI_ERR_UNC_FCP:
63     case PCI_ERR_UNC_MALF_TLP:
64         return PCI_ERR_ROOT_CMD_FATAL_EN;
65     case PCI_ERR_UNC_POISON_TLP:
66     case PCI_ERR_UNC_ECRC:
67     case PCI_ERR_UNC_UNSUP:
68     case PCI_ERR_UNC_COMP_TIME:
69     case PCI_ERR_UNC_COMP_ABORT:
70     case PCI_ERR_UNC_UNX_COMP:
71     case PCI_ERR_UNC_ACSV:
72     case PCI_ERR_UNC_MCBTLP:
73     case PCI_ERR_UNC_ATOP_EBLOCKED:
74     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
75         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
76     default:
77         abort();
78         break;
79     }
80     return PCI_ERR_ROOT_CMD_FATAL_EN;
81 }
82 
83 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
84 {
85     if (aer_log->log_num == aer_log->log_max) {
86         return -1;
87     }
88     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
89     aer_log->log_num++;
90     return 0;
91 }
92 
93 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
94 {
95     assert(aer_log->log_num);
96     *err = aer_log->log[0];
97     aer_log->log_num--;
98     memmove(&aer_log->log[0], &aer_log->log[1],
99             aer_log->log_num * sizeof *err);
100 }
101 
102 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
103 {
104     aer_log->log_num = 0;
105 }
106 
107 int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
108                   uint16_t size, Error **errp)
109 {
110     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
111                         offset, size);
112     dev->exp.aer_cap = offset;
113 
114     /* clip down the value to avoid unreasonable memory usage */
115     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
116         error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
117                 "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
118         return -EINVAL;
119     }
120     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
121                                         dev->exp.aer_log.log_max);
122 
123     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
124                  PCI_ERR_UNC_SUPPORTED);
125 
126     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
127                  PCI_ERR_UNC_SEVERITY_DEFAULT);
128     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
129                  PCI_ERR_UNC_SUPPORTED);
130 
131     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
132                                PCI_ERR_COR_SUPPORTED);
133 
134     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
135                  PCI_ERR_COR_MASK_DEFAULT);
136     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
137                  PCI_ERR_COR_SUPPORTED);
138 
139     /* capabilities and control. multiple header logging is supported */
140     if (dev->exp.aer_log.log_max > 0) {
141         pci_set_long(dev->config + offset + PCI_ERR_CAP,
142                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
143                      PCI_ERR_CAP_MHRC);
144         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
145                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
146                      PCI_ERR_CAP_MHRE);
147     } else {
148         pci_set_long(dev->config + offset + PCI_ERR_CAP,
149                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
150         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
151                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
152     }
153 
154     switch (pcie_cap_get_type(dev)) {
155     case PCI_EXP_TYPE_ROOT_PORT:
156         /* this case will be set by pcie_aer_root_init() */
157         /* fallthrough */
158     case PCI_EXP_TYPE_DOWNSTREAM:
159     case PCI_EXP_TYPE_UPSTREAM:
160         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
161                                    PCI_BRIDGE_CTL_SERR);
162         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
163                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
164         break;
165     default:
166         /* nothing */
167         break;
168     }
169     return 0;
170 }
171 
172 void pcie_aer_exit(PCIDevice *dev)
173 {
174     g_free(dev->exp.aer_log.log);
175 }
176 
177 static void pcie_aer_update_uncor_status(PCIDevice *dev)
178 {
179     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
180     PCIEAERLog *aer_log = &dev->exp.aer_log;
181 
182     uint16_t i;
183     for (i = 0; i < aer_log->log_num; i++) {
184         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
185                                    dev->exp.aer_log.log[i].status);
186     }
187 }
188 
189 /*
190  * return value:
191  * true: error message needs to be sent up
192  * false: error message is masked
193  *
194  * 6.2.6 Error Message Control
195  * Figure 6-3
196  * all pci express devices part
197  */
198 static bool
199 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
200 {
201     if (!(pcie_aer_msg_is_uncor(msg) &&
202           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
203         return false;
204     }
205 
206     /* Signaled System Error
207      *
208      * 7.5.1.1 Command register
209      * Bit 8 SERR# Enable
210      *
211      * When Set, this bit enables reporting of Non-fatal and Fatal
212      * errors detected by the Function to the Root Complex. Note that
213      * errors are reported if enabled either through this bit or through
214      * the PCI Express specific bits in the Device Control register (see
215      * Section 7.8.4).
216      */
217     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
218                                PCI_STATUS_SIG_SYSTEM_ERROR);
219 
220     if (!(msg->severity &
221           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
222         return false;
223     }
224 
225     /* send up error message */
226     return true;
227 }
228 
229 /*
230  * return value:
231  * true: error message is sent up
232  * false: error message is masked
233  *
234  * 6.2.6 Error Message Control
235  * Figure 6-3
236  * virtual pci bridge part
237  */
238 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
239 {
240     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
241 
242     if (pcie_aer_msg_is_uncor(msg)) {
243         /* Received System Error */
244         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
245                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
246     }
247 
248     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
249         return false;
250     }
251     return true;
252 }
253 
254 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
255 {
256     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
257     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
258     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
259                                  PCI_ERR_ROOT_IRQ);
260     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
261                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
262 }
263 
264 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
265 {
266     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
267     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
268     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
269 }
270 
271 /* Given a status register, get corresponding bits in the command register */
272 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
273 {
274     uint32_t cmd = 0;
275     if (status & PCI_ERR_ROOT_COR_RCV) {
276         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
277     }
278     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
279         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
280     }
281     if (status & PCI_ERR_ROOT_FATAL_RCV) {
282         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
283     }
284     return cmd;
285 }
286 
287 static void pcie_aer_root_notify(PCIDevice *dev)
288 {
289     if (msix_enabled(dev)) {
290         msix_notify(dev, pcie_aer_root_get_vector(dev));
291     } else if (msi_enabled(dev)) {
292         msi_notify(dev, pcie_aer_root_get_vector(dev));
293     } else if (pci_intx(dev) != -1) {
294         pci_irq_assert(dev);
295     }
296 }
297 
298 /*
299  * 6.2.6 Error Message Control
300  * Figure 6-3
301  * root port part
302  */
303 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
304 {
305     uint16_t cmd;
306     uint8_t *aer_cap;
307     uint32_t root_cmd;
308     uint32_t root_status, prev_status;
309 
310     cmd = pci_get_word(dev->config + PCI_COMMAND);
311     aer_cap = dev->config + dev->exp.aer_cap;
312     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
313     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
314 
315     if (cmd & PCI_COMMAND_SERR) {
316         /* System Error.
317          *
318          * The way to report System Error is platform specific and
319          * it isn't implemented in qemu right now.
320          * So just discard the error for now.
321          * OS which cares of aer would receive errors via
322          * native aer mechanims, so this wouldn't matter.
323          */
324     }
325 
326     /* Errro Message Received: Root Error Status register */
327     switch (msg->severity) {
328     case PCI_ERR_ROOT_CMD_COR_EN:
329         if (root_status & PCI_ERR_ROOT_COR_RCV) {
330             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
331         } else {
332             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
333                          msg->source_id);
334         }
335         root_status |= PCI_ERR_ROOT_COR_RCV;
336         break;
337     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
338         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
339         break;
340     case PCI_ERR_ROOT_CMD_FATAL_EN:
341         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
342             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
343         }
344         root_status |= PCI_ERR_ROOT_FATAL_RCV;
345         break;
346     default:
347         abort();
348         break;
349     }
350     if (pcie_aer_msg_is_uncor(msg)) {
351         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
352             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
353         } else {
354             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
355                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
356         }
357         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
358     }
359     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
360 
361     /* 6.2.4.1.2 Interrupt Generation */
362     /* All the above did was set some bits in the status register.
363      * Specifically these that match message severity.
364      * The below code relies on this fact. */
365     if (!(root_cmd & msg->severity) ||
366         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
367         /* Condition is not being set or was already true so nothing to do. */
368         return;
369     }
370 
371     pcie_aer_root_notify(dev);
372 }
373 
374 /*
375  * 6.2.6 Error Message Control Figure 6-3
376  *
377  * Walk up the bus tree from the device, propagate the error message.
378  */
379 static void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
380 {
381     uint8_t type;
382 
383     while (dev) {
384         if (!pci_is_express(dev)) {
385             /* just ignore it */
386             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
387              * Consider e.g. a PCI bridge above a PCI Express device. */
388             return;
389         }
390 
391         type = pcie_cap_get_type(dev);
392         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
393             type == PCI_EXP_TYPE_UPSTREAM ||
394             type == PCI_EXP_TYPE_DOWNSTREAM) &&
395             !pcie_aer_msg_vbridge(dev, msg)) {
396                 return;
397         }
398         if (!pcie_aer_msg_alldev(dev, msg)) {
399             return;
400         }
401         if (type == PCI_EXP_TYPE_ROOT_PORT) {
402             pcie_aer_msg_root_port(dev, msg);
403             /* Root port can notify system itself,
404                or send the error message to root complex event collector. */
405             /*
406              * if root port is associated with an event collector,
407              * return the root complex event collector here.
408              * For now root complex event collector isn't supported.
409              */
410             return;
411         }
412         dev = pci_bridge_get_device(pci_get_bus(dev));
413     }
414 }
415 
416 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
417 {
418     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
419     uint8_t first_bit = ctz32(err->status);
420     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
421     int i;
422 
423     assert(err->status);
424     assert(!(err->status & (err->status - 1)));
425 
426     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
427     errcap |= PCI_ERR_CAP_FEP(first_bit);
428 
429     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
430         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
431             /* 7.10.8 Header Log Register */
432             uint8_t *header_log =
433                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
434             stl_be_p(header_log, err->header[i]);
435         }
436     } else {
437         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
438         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
439     }
440 
441     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
442         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
443          PCI_EXP_DEVCAP2_EETLPP)) {
444         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
445             /* 7.10.12 tlp prefix log register */
446             uint8_t *prefix_log =
447                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
448             stl_be_p(prefix_log, err->prefix[i]);
449         }
450         errcap |= PCI_ERR_CAP_TLP;
451     } else {
452         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
453                PCI_ERR_TLP_PREFIX_LOG_SIZE);
454     }
455     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
456 }
457 
458 static void pcie_aer_clear_log(PCIDevice *dev)
459 {
460     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
461 
462     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
463                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
464 
465     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
466     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
467 }
468 
469 static void pcie_aer_clear_error(PCIDevice *dev)
470 {
471     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
472     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
473     PCIEAERLog *aer_log = &dev->exp.aer_log;
474     PCIEAERErr err;
475 
476     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
477         pcie_aer_clear_log(dev);
478         return;
479     }
480 
481     /*
482      * If more errors are queued, set corresponding bits in uncorrectable
483      * error status.
484      * We emulate uncorrectable error status register as W1CS.
485      * So set bit in uncorrectable error status here again for multiple
486      * error recording support.
487      *
488      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
489      */
490     pcie_aer_update_uncor_status(dev);
491 
492     aer_log_del_err(aer_log, &err);
493     pcie_aer_update_log(dev, &err);
494 }
495 
496 static int pcie_aer_record_error(PCIDevice *dev,
497                                  const PCIEAERErr *err)
498 {
499     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
500     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
501     int fep = PCI_ERR_CAP_FEP(errcap);
502 
503     assert(err->status);
504     assert(!(err->status & (err->status - 1)));
505 
506     if (errcap & PCI_ERR_CAP_MHRE &&
507         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
508         /*  Not first error. queue error */
509         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
510             /* overflow */
511             return -1;
512         }
513         return 0;
514     }
515 
516     pcie_aer_update_log(dev, err);
517     return 0;
518 }
519 
520 typedef struct PCIEAERInject {
521     PCIDevice *dev;
522     uint8_t *aer_cap;
523     const PCIEAERErr *err;
524     uint16_t devctl;
525     uint16_t devsta;
526     uint32_t error_status;
527     bool unsupported_request;
528     bool log_overflow;
529     PCIEAERMsg msg;
530 } PCIEAERInject;
531 
532 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
533                                       uint32_t uncor_status,
534                                       bool is_advisory_nonfatal)
535 {
536     PCIDevice *dev = inj->dev;
537 
538     inj->devsta |= PCI_EXP_DEVSTA_CED;
539     if (inj->unsupported_request) {
540         inj->devsta |= PCI_EXP_DEVSTA_URD;
541     }
542     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
543 
544     if (inj->aer_cap) {
545         uint32_t mask;
546         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
547                                    inj->error_status);
548         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
549         if (mask & inj->error_status) {
550             return false;
551         }
552         if (is_advisory_nonfatal) {
553             uint32_t uncor_mask =
554                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
555             if (!(uncor_mask & uncor_status)) {
556                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
557             }
558             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
559                                        uncor_status);
560         }
561     }
562 
563     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
564         return false;
565     }
566     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
567         return false;
568     }
569 
570     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
571     return true;
572 }
573 
574 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
575 {
576     PCIDevice *dev = inj->dev;
577     uint16_t cmd;
578 
579     if (is_fatal) {
580         inj->devsta |= PCI_EXP_DEVSTA_FED;
581     } else {
582         inj->devsta |= PCI_EXP_DEVSTA_NFED;
583     }
584     if (inj->unsupported_request) {
585         inj->devsta |= PCI_EXP_DEVSTA_URD;
586     }
587     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
588 
589     if (inj->aer_cap) {
590         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
591         if (mask & inj->error_status) {
592             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
593                                        inj->error_status);
594             return false;
595         }
596 
597         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
598         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
599                                    inj->error_status);
600     }
601 
602     cmd = pci_get_word(dev->config + PCI_COMMAND);
603     if (inj->unsupported_request &&
604         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
605         return false;
606     }
607     if (is_fatal) {
608         if (!((cmd & PCI_COMMAND_SERR) ||
609               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
610             return false;
611         }
612         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
613     } else {
614         if (!((cmd & PCI_COMMAND_SERR) ||
615               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
616             return false;
617         }
618         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
619     }
620     return true;
621 }
622 
623 /*
624  * non-Function specific error must be recorded in all functions.
625  * It is the responsibility of the caller of this function.
626  * It is also caller's responsibility to determine which function should
627  * report the error.
628  *
629  * 6.2.4 Error Logging
630  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
631  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
632  *             Operations
633  */
634 static int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
635 {
636     uint8_t *aer_cap = NULL;
637     uint16_t devctl = 0;
638     uint16_t devsta = 0;
639     uint32_t error_status = err->status;
640     PCIEAERInject inj;
641 
642     if (!pci_is_express(dev)) {
643         return -ENOSYS;
644     }
645 
646     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
647         error_status &= PCI_ERR_COR_SUPPORTED;
648     } else {
649         error_status &= PCI_ERR_UNC_SUPPORTED;
650     }
651 
652     /* invalid status bit. one and only one bit must be set */
653     if (!error_status || (error_status & (error_status - 1))) {
654         return -EINVAL;
655     }
656 
657     if (dev->exp.aer_cap) {
658         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
659         aer_cap = dev->config + dev->exp.aer_cap;
660         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
661         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
662     }
663 
664     inj.dev = dev;
665     inj.aer_cap = aer_cap;
666     inj.err = err;
667     inj.devctl = devctl;
668     inj.devsta = devsta;
669     inj.error_status = error_status;
670     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
671         err->status == PCI_ERR_UNC_UNSUP;
672     inj.log_overflow = false;
673 
674     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
675         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
676             return 0;
677         }
678     } else {
679         bool is_fatal =
680             pcie_aer_uncor_default_severity(error_status) ==
681             PCI_ERR_ROOT_CMD_FATAL_EN;
682         if (aer_cap) {
683             is_fatal =
684                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
685         }
686         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
687             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
688             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
689                 return 0;
690             }
691         } else {
692             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
693                 return 0;
694             }
695         }
696     }
697 
698     /* send up error message */
699     inj.msg.source_id = err->source_id;
700     pcie_aer_msg(dev, &inj.msg);
701 
702     if (inj.log_overflow) {
703         PCIEAERErr header_log_overflow = {
704             .status = PCI_ERR_COR_HL_OVERFLOW,
705             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
706         };
707         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
708         assert(!ret);
709     }
710     return 0;
711 }
712 
713 void pcie_aer_write_config(PCIDevice *dev,
714                            uint32_t addr, uint32_t val, int len)
715 {
716     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
717     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
718     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
719     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
720 
721     /* uncorrectable error */
722     if (!(uncorsta & first_error)) {
723         /* the bit that corresponds to the first error is cleared */
724         pcie_aer_clear_error(dev);
725     } else if (errcap & PCI_ERR_CAP_MHRE) {
726         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
727          * nothing should happen. So we have to revert the modification to
728          * the register.
729          */
730         pcie_aer_update_uncor_status(dev);
731     } else {
732         /* capability & control
733          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
734          */
735         aer_log_clear_all_err(&dev->exp.aer_log);
736     }
737 }
738 
739 void pcie_aer_root_init(PCIDevice *dev)
740 {
741     uint16_t pos = dev->exp.aer_cap;
742 
743     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
744                  PCI_ERR_ROOT_CMD_EN_MASK);
745     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
746                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
747     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
748      * device-specific method.
749      */
750     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
751                  ~PCI_ERR_ROOT_IRQ);
752 }
753 
754 void pcie_aer_root_reset(PCIDevice *dev)
755 {
756     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
757 
758     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
759 
760     /*
761      * Advanced Error Interrupt Message Number in Root Error Status Register
762      * must be updated by chip dependent code because it's chip dependent
763      * which number is used.
764      */
765 }
766 
767 void pcie_aer_root_write_config(PCIDevice *dev,
768                                 uint32_t addr, uint32_t val, int len,
769                                 uint32_t root_cmd_prev)
770 {
771     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
772     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
773     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
774     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
775     /* 6.2.4.1.2 Interrupt Generation */
776     if (!msix_enabled(dev) && !msi_enabled(dev)) {
777         if (pci_intx(dev) != -1) {
778             pci_set_irq(dev, !!(root_cmd & enabled_cmd));
779         }
780         return;
781     }
782 
783     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
784         /* Send MSI on transition from false to true. */
785         return;
786     }
787 
788     pcie_aer_root_notify(dev);
789 }
790 
791 static const VMStateDescription vmstate_pcie_aer_err = {
792     .name = "PCIE_AER_ERROR",
793     .version_id = 1,
794     .minimum_version_id = 1,
795     .fields = (VMStateField[]) {
796         VMSTATE_UINT32(status, PCIEAERErr),
797         VMSTATE_UINT16(source_id, PCIEAERErr),
798         VMSTATE_UINT16(flags, PCIEAERErr),
799         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
800         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
801         VMSTATE_END_OF_LIST()
802     }
803 };
804 
805 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
806 {
807     PCIEAERLog *s = opaque;
808 
809     return s->log_num <= s->log_max;
810 }
811 
812 const VMStateDescription vmstate_pcie_aer_log = {
813     .name = "PCIE_AER_ERROR_LOG",
814     .version_id = 1,
815     .minimum_version_id = 1,
816     .fields = (VMStateField[]) {
817         VMSTATE_UINT16(log_num, PCIEAERLog),
818         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog, NULL),
819         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
820         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
821                               vmstate_pcie_aer_err, PCIEAERErr),
822         VMSTATE_END_OF_LIST()
823     }
824 };
825 
826 typedef struct PCIEAERErrorName {
827     const char *name;
828     uint32_t val;
829     bool correctable;
830 } PCIEAERErrorName;
831 
832 /*
833  * AER error name -> value conversion table
834  * This naming scheme is same to linux aer-injection tool.
835  */
836 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
837     {
838         .name = "DLP",
839         .val = PCI_ERR_UNC_DLP,
840         .correctable = false,
841     }, {
842         .name = "SDN",
843         .val = PCI_ERR_UNC_SDN,
844         .correctable = false,
845     }, {
846         .name = "POISON_TLP",
847         .val = PCI_ERR_UNC_POISON_TLP,
848         .correctable = false,
849     }, {
850         .name = "FCP",
851         .val = PCI_ERR_UNC_FCP,
852         .correctable = false,
853     }, {
854         .name = "COMP_TIME",
855         .val = PCI_ERR_UNC_COMP_TIME,
856         .correctable = false,
857     }, {
858         .name = "COMP_ABORT",
859         .val = PCI_ERR_UNC_COMP_ABORT,
860         .correctable = false,
861     }, {
862         .name = "UNX_COMP",
863         .val = PCI_ERR_UNC_UNX_COMP,
864         .correctable = false,
865     }, {
866         .name = "RX_OVER",
867         .val = PCI_ERR_UNC_RX_OVER,
868         .correctable = false,
869     }, {
870         .name = "MALF_TLP",
871         .val = PCI_ERR_UNC_MALF_TLP,
872         .correctable = false,
873     }, {
874         .name = "ECRC",
875         .val = PCI_ERR_UNC_ECRC,
876         .correctable = false,
877     }, {
878         .name = "UNSUP",
879         .val = PCI_ERR_UNC_UNSUP,
880         .correctable = false,
881     }, {
882         .name = "ACSV",
883         .val = PCI_ERR_UNC_ACSV,
884         .correctable = false,
885     }, {
886         .name = "INTN",
887         .val = PCI_ERR_UNC_INTN,
888         .correctable = false,
889     }, {
890         .name = "MCBTLP",
891         .val = PCI_ERR_UNC_MCBTLP,
892         .correctable = false,
893     }, {
894         .name = "ATOP_EBLOCKED",
895         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
896         .correctable = false,
897     }, {
898         .name = "TLP_PRF_BLOCKED",
899         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
900         .correctable = false,
901     }, {
902         .name = "RCVR",
903         .val = PCI_ERR_COR_RCVR,
904         .correctable = true,
905     }, {
906         .name = "BAD_TLP",
907         .val = PCI_ERR_COR_BAD_TLP,
908         .correctable = true,
909     }, {
910         .name = "BAD_DLLP",
911         .val = PCI_ERR_COR_BAD_DLLP,
912         .correctable = true,
913     }, {
914         .name = "REP_ROLL",
915         .val = PCI_ERR_COR_REP_ROLL,
916         .correctable = true,
917     }, {
918         .name = "REP_TIMER",
919         .val = PCI_ERR_COR_REP_TIMER,
920         .correctable = true,
921     }, {
922         .name = "ADV_NONFATAL",
923         .val = PCI_ERR_COR_ADV_NONFATAL,
924         .correctable = true,
925     }, {
926         .name = "INTERNAL",
927         .val = PCI_ERR_COR_INTERNAL,
928         .correctable = true,
929     }, {
930         .name = "HL_OVERFLOW",
931         .val = PCI_ERR_COR_HL_OVERFLOW,
932         .correctable = true,
933     },
934 };
935 
936 static int pcie_aer_parse_error_string(const char *error_name,
937                                        uint32_t *status, bool *correctable)
938 {
939     int i;
940 
941     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
942         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
943         if (strcmp(error_name, e->name)) {
944             continue;
945         }
946 
947         *status = e->val;
948         *correctable = e->correctable;
949         return 0;
950     }
951     return -EINVAL;
952 }
953 
954 /*
955  * Inject an error described by @qdict.
956  * On success, set @details to show where error was sent.
957  * Return negative errno if injection failed and a message was emitted.
958  */
959 static int do_pcie_aer_inject_error(Monitor *mon,
960                                     const QDict *qdict,
961                                     PCIEErrorDetails *details)
962 {
963     const char *id = qdict_get_str(qdict, "id");
964     const char *error_name;
965     uint32_t error_status;
966     bool correctable;
967     PCIDevice *dev;
968     PCIEAERErr err;
969     int ret;
970 
971     ret = pci_qdev_find_device(id, &dev);
972     if (ret < 0) {
973         monitor_printf(mon,
974                        "id or pci device path is invalid or device not "
975                        "found. %s\n", id);
976         return ret;
977     }
978     if (!pci_is_express(dev)) {
979         monitor_printf(mon, "the device doesn't support pci express. %s\n",
980                        id);
981         return -ENOSYS;
982     }
983 
984     error_name = qdict_get_str(qdict, "error_status");
985     if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
986         char *e = NULL;
987         error_status = strtoul(error_name, &e, 0);
988         correctable = qdict_get_try_bool(qdict, "correctable", false);
989         if (!e || *e != '\0') {
990             monitor_printf(mon, "invalid error status value. \"%s\"",
991                            error_name);
992             return -EINVAL;
993         }
994     }
995     err.status = error_status;
996     err.source_id = pci_requester_id(dev);
997 
998     err.flags = 0;
999     if (correctable) {
1000         err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
1001     }
1002     if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
1003         err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
1004     }
1005     if (qdict_haskey(qdict, "header0")) {
1006         err.flags |= PCIE_AER_ERR_HEADER_VALID;
1007     }
1008     if (qdict_haskey(qdict, "prefix0")) {
1009         err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
1010     }
1011 
1012     err.header[0] = qdict_get_try_int(qdict, "header0", 0);
1013     err.header[1] = qdict_get_try_int(qdict, "header1", 0);
1014     err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1015     err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1016 
1017     err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1018     err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1019     err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1020     err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1021 
1022     ret = pcie_aer_inject_error(dev, &err);
1023     if (ret < 0) {
1024         monitor_printf(mon, "failed to inject error: %s\n",
1025                        strerror(-ret));
1026         return ret;
1027     }
1028     details->id = id;
1029     details->root_bus = pci_root_bus_path(dev);
1030     details->bus = pci_dev_bus_num(dev);
1031     details->devfn = dev->devfn;
1032 
1033     return 0;
1034 }
1035 
1036 void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1037 {
1038     PCIEErrorDetails data;
1039 
1040     if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1041         return;
1042     }
1043 
1044     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1045                    data.id, data.root_bus, data.bus,
1046                    PCI_SLOT(data.devfn), PCI_FUNC(data.devfn));
1047 }
1048