xref: /qemu/hw/pci/pcie_aer.c (revision 869e9aec)
1 /*
2  * pcie_aer.c
3  *
4  * Copyright (c) 2010 Isaku Yamahata <yamahata at valinux co jp>
5  *                    VA Linux Systems Japan K.K.
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License along
18  * with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include "qemu/osdep.h"
22 #include "sysemu/sysemu.h"
23 #include "qapi/qmp/types.h"
24 #include "qapi/qmp/qjson.h"
25 #include "monitor/monitor.h"
26 #include "hw/pci/pci_bridge.h"
27 #include "hw/pci/pcie.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/msi.h"
30 #include "hw/pci/pci_bus.h"
31 #include "hw/pci/pcie_regs.h"
32 #include "qapi/error.h"
33 
34 //#define DEBUG_PCIE
35 #ifdef DEBUG_PCIE
36 # define PCIE_DPRINTF(fmt, ...)                                         \
37     fprintf(stderr, "%s:%d " fmt, __func__, __LINE__, ## __VA_ARGS__)
38 #else
39 # define PCIE_DPRINTF(fmt, ...) do {} while (0)
40 #endif
41 #define PCIE_DEV_PRINTF(dev, fmt, ...)                                  \
42     PCIE_DPRINTF("%s:%x "fmt, (dev)->name, (dev)->devfn, ## __VA_ARGS__)
43 
44 #define PCI_ERR_SRC_COR_OFFS    0
45 #define PCI_ERR_SRC_UNCOR_OFFS  2
46 
47 /* From 6.2.7 Error Listing and Rules. Table 6-2, 6-3 and 6-4 */
48 static uint32_t pcie_aer_uncor_default_severity(uint32_t status)
49 {
50     switch (status) {
51     case PCI_ERR_UNC_INTN:
52     case PCI_ERR_UNC_DLP:
53     case PCI_ERR_UNC_SDN:
54     case PCI_ERR_UNC_RX_OVER:
55     case PCI_ERR_UNC_FCP:
56     case PCI_ERR_UNC_MALF_TLP:
57         return PCI_ERR_ROOT_CMD_FATAL_EN;
58     case PCI_ERR_UNC_POISON_TLP:
59     case PCI_ERR_UNC_ECRC:
60     case PCI_ERR_UNC_UNSUP:
61     case PCI_ERR_UNC_COMP_TIME:
62     case PCI_ERR_UNC_COMP_ABORT:
63     case PCI_ERR_UNC_UNX_COMP:
64     case PCI_ERR_UNC_ACSV:
65     case PCI_ERR_UNC_MCBTLP:
66     case PCI_ERR_UNC_ATOP_EBLOCKED:
67     case PCI_ERR_UNC_TLP_PRF_BLOCKED:
68         return PCI_ERR_ROOT_CMD_NONFATAL_EN;
69     default:
70         abort();
71         break;
72     }
73     return PCI_ERR_ROOT_CMD_FATAL_EN;
74 }
75 
76 static int aer_log_add_err(PCIEAERLog *aer_log, const PCIEAERErr *err)
77 {
78     if (aer_log->log_num == aer_log->log_max) {
79         return -1;
80     }
81     memcpy(&aer_log->log[aer_log->log_num], err, sizeof *err);
82     aer_log->log_num++;
83     return 0;
84 }
85 
86 static void aer_log_del_err(PCIEAERLog *aer_log, PCIEAERErr *err)
87 {
88     assert(aer_log->log_num);
89     *err = aer_log->log[0];
90     aer_log->log_num--;
91     memmove(&aer_log->log[0], &aer_log->log[1],
92             aer_log->log_num * sizeof *err);
93 }
94 
95 static void aer_log_clear_all_err(PCIEAERLog *aer_log)
96 {
97     aer_log->log_num = 0;
98 }
99 
100 int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset,
101                   uint16_t size, Error **errp)
102 {
103     pcie_add_capability(dev, PCI_EXT_CAP_ID_ERR, cap_ver,
104                         offset, size);
105     dev->exp.aer_cap = offset;
106 
107     /* clip down the value to avoid unreasonable memory usage */
108     if (dev->exp.aer_log.log_max > PCIE_AER_LOG_MAX_LIMIT) {
109         error_setg(errp, "Invalid aer_log_max %d. The max number of aer log "
110                 "is %d", dev->exp.aer_log.log_max, PCIE_AER_LOG_MAX_LIMIT);
111         return -EINVAL;
112     }
113     dev->exp.aer_log.log = g_malloc0(sizeof dev->exp.aer_log.log[0] *
114                                         dev->exp.aer_log.log_max);
115 
116     pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS,
117                  PCI_ERR_UNC_SUPPORTED);
118 
119     pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER,
120                  PCI_ERR_UNC_SEVERITY_DEFAULT);
121     pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_SEVER,
122                  PCI_ERR_UNC_SUPPORTED);
123 
124     pci_long_test_and_set_mask(dev->w1cmask + offset + PCI_ERR_COR_STATUS,
125                                PCI_ERR_COR_SUPPORTED);
126 
127     pci_set_long(dev->config + offset + PCI_ERR_COR_MASK,
128                  PCI_ERR_COR_MASK_DEFAULT);
129     pci_set_long(dev->wmask + offset + PCI_ERR_COR_MASK,
130                  PCI_ERR_COR_SUPPORTED);
131 
132     /* capabilities and control. multiple header logging is supported */
133     if (dev->exp.aer_log.log_max > 0) {
134         pci_set_long(dev->config + offset + PCI_ERR_CAP,
135                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC |
136                      PCI_ERR_CAP_MHRC);
137         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
138                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE |
139                      PCI_ERR_CAP_MHRE);
140     } else {
141         pci_set_long(dev->config + offset + PCI_ERR_CAP,
142                      PCI_ERR_CAP_ECRC_GENC | PCI_ERR_CAP_ECRC_CHKC);
143         pci_set_long(dev->wmask + offset + PCI_ERR_CAP,
144                      PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
145     }
146 
147     switch (pcie_cap_get_type(dev)) {
148     case PCI_EXP_TYPE_ROOT_PORT:
149         /* this case will be set by pcie_aer_root_init() */
150         /* fallthrough */
151     case PCI_EXP_TYPE_DOWNSTREAM:
152     case PCI_EXP_TYPE_UPSTREAM:
153         pci_word_test_and_set_mask(dev->wmask + PCI_BRIDGE_CONTROL,
154                                    PCI_BRIDGE_CTL_SERR);
155         pci_long_test_and_set_mask(dev->w1cmask + PCI_STATUS,
156                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
157         break;
158     default:
159         /* nothing */
160         break;
161     }
162     return 0;
163 }
164 
165 void pcie_aer_exit(PCIDevice *dev)
166 {
167     g_free(dev->exp.aer_log.log);
168 }
169 
170 static void pcie_aer_update_uncor_status(PCIDevice *dev)
171 {
172     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
173     PCIEAERLog *aer_log = &dev->exp.aer_log;
174 
175     uint16_t i;
176     for (i = 0; i < aer_log->log_num; i++) {
177         pci_long_test_and_set_mask(aer_cap + PCI_ERR_UNCOR_STATUS,
178                                    dev->exp.aer_log.log[i].status);
179     }
180 }
181 
182 /*
183  * return value:
184  * true: error message needs to be sent up
185  * false: error message is masked
186  *
187  * 6.2.6 Error Message Control
188  * Figure 6-3
189  * all pci express devices part
190  */
191 static bool
192 pcie_aer_msg_alldev(PCIDevice *dev, const PCIEAERMsg *msg)
193 {
194     if (!(pcie_aer_msg_is_uncor(msg) &&
195           (pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_SERR))) {
196         return false;
197     }
198 
199     /* Signaled System Error
200      *
201      * 7.5.1.1 Command register
202      * Bit 8 SERR# Enable
203      *
204      * When Set, this bit enables reporting of Non-fatal and Fatal
205      * errors detected by the Function to the Root Complex. Note that
206      * errors are reported if enabled either through this bit or through
207      * the PCI Express specific bits in the Device Control register (see
208      * Section 7.8.4).
209      */
210     pci_word_test_and_set_mask(dev->config + PCI_STATUS,
211                                PCI_STATUS_SIG_SYSTEM_ERROR);
212 
213     if (!(msg->severity &
214           pci_get_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCTL))) {
215         return false;
216     }
217 
218     /* send up error message */
219     return true;
220 }
221 
222 /*
223  * return value:
224  * true: error message is sent up
225  * false: error message is masked
226  *
227  * 6.2.6 Error Message Control
228  * Figure 6-3
229  * virtual pci bridge part
230  */
231 static bool pcie_aer_msg_vbridge(PCIDevice *dev, const PCIEAERMsg *msg)
232 {
233     uint16_t bridge_control = pci_get_word(dev->config + PCI_BRIDGE_CONTROL);
234 
235     if (pcie_aer_msg_is_uncor(msg)) {
236         /* Received System Error */
237         pci_word_test_and_set_mask(dev->config + PCI_SEC_STATUS,
238                                    PCI_SEC_STATUS_RCV_SYSTEM_ERROR);
239     }
240 
241     if (!(bridge_control & PCI_BRIDGE_CTL_SERR)) {
242         return false;
243     }
244     return true;
245 }
246 
247 void pcie_aer_root_set_vector(PCIDevice *dev, unsigned int vector)
248 {
249     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
250     assert(vector < PCI_ERR_ROOT_IRQ_MAX);
251     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_ROOT_STATUS,
252                                  PCI_ERR_ROOT_IRQ);
253     pci_long_test_and_set_mask(aer_cap + PCI_ERR_ROOT_STATUS,
254                                vector << PCI_ERR_ROOT_IRQ_SHIFT);
255 }
256 
257 static unsigned int pcie_aer_root_get_vector(PCIDevice *dev)
258 {
259     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
260     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
261     return (root_status & PCI_ERR_ROOT_IRQ) >> PCI_ERR_ROOT_IRQ_SHIFT;
262 }
263 
264 /* Given a status register, get corresponding bits in the command register */
265 static uint32_t pcie_aer_status_to_cmd(uint32_t status)
266 {
267     uint32_t cmd = 0;
268     if (status & PCI_ERR_ROOT_COR_RCV) {
269         cmd |= PCI_ERR_ROOT_CMD_COR_EN;
270     }
271     if (status & PCI_ERR_ROOT_NONFATAL_RCV) {
272         cmd |= PCI_ERR_ROOT_CMD_NONFATAL_EN;
273     }
274     if (status & PCI_ERR_ROOT_FATAL_RCV) {
275         cmd |= PCI_ERR_ROOT_CMD_FATAL_EN;
276     }
277     return cmd;
278 }
279 
280 static void pcie_aer_root_notify(PCIDevice *dev)
281 {
282     if (msix_enabled(dev)) {
283         msix_notify(dev, pcie_aer_root_get_vector(dev));
284     } else if (msi_enabled(dev)) {
285         msi_notify(dev, pcie_aer_root_get_vector(dev));
286     } else {
287         pci_irq_assert(dev);
288     }
289 }
290 
291 /*
292  * 6.2.6 Error Message Control
293  * Figure 6-3
294  * root port part
295  */
296 static void pcie_aer_msg_root_port(PCIDevice *dev, const PCIEAERMsg *msg)
297 {
298     uint16_t cmd;
299     uint8_t *aer_cap;
300     uint32_t root_cmd;
301     uint32_t root_status, prev_status;
302 
303     cmd = pci_get_word(dev->config + PCI_COMMAND);
304     aer_cap = dev->config + dev->exp.aer_cap;
305     root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
306     prev_status = root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
307 
308     if (cmd & PCI_COMMAND_SERR) {
309         /* System Error.
310          *
311          * The way to report System Error is platform specific and
312          * it isn't implemented in qemu right now.
313          * So just discard the error for now.
314          * OS which cares of aer would receive errors via
315          * native aer mechanims, so this wouldn't matter.
316          */
317     }
318 
319     /* Errro Message Received: Root Error Status register */
320     switch (msg->severity) {
321     case PCI_ERR_ROOT_CMD_COR_EN:
322         if (root_status & PCI_ERR_ROOT_COR_RCV) {
323             root_status |= PCI_ERR_ROOT_MULTI_COR_RCV;
324         } else {
325             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC + PCI_ERR_SRC_COR_OFFS,
326                          msg->source_id);
327         }
328         root_status |= PCI_ERR_ROOT_COR_RCV;
329         break;
330     case PCI_ERR_ROOT_CMD_NONFATAL_EN:
331         root_status |= PCI_ERR_ROOT_NONFATAL_RCV;
332         break;
333     case PCI_ERR_ROOT_CMD_FATAL_EN:
334         if (!(root_status & PCI_ERR_ROOT_UNCOR_RCV)) {
335             root_status |= PCI_ERR_ROOT_FIRST_FATAL;
336         }
337         root_status |= PCI_ERR_ROOT_FATAL_RCV;
338         break;
339     default:
340         abort();
341         break;
342     }
343     if (pcie_aer_msg_is_uncor(msg)) {
344         if (root_status & PCI_ERR_ROOT_UNCOR_RCV) {
345             root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV;
346         } else {
347             pci_set_word(aer_cap + PCI_ERR_ROOT_ERR_SRC +
348                          PCI_ERR_SRC_UNCOR_OFFS, msg->source_id);
349         }
350         root_status |= PCI_ERR_ROOT_UNCOR_RCV;
351     }
352     pci_set_long(aer_cap + PCI_ERR_ROOT_STATUS, root_status);
353 
354     /* 6.2.4.1.2 Interrupt Generation */
355     /* All the above did was set some bits in the status register.
356      * Specifically these that match message severity.
357      * The below code relies on this fact. */
358     if (!(root_cmd & msg->severity) ||
359         (pcie_aer_status_to_cmd(prev_status) & root_cmd)) {
360         /* Condition is not being set or was already true so nothing to do. */
361         return;
362     }
363 
364     pcie_aer_root_notify(dev);
365 }
366 
367 /*
368  * 6.2.6 Error Message Control Figure 6-3
369  *
370  * Walk up the bus tree from the device, propagate the error message.
371  */
372 void pcie_aer_msg(PCIDevice *dev, const PCIEAERMsg *msg)
373 {
374     uint8_t type;
375 
376     while (dev) {
377         if (!pci_is_express(dev)) {
378             /* just ignore it */
379             /* TODO: Shouldn't we set PCI_STATUS_SIG_SYSTEM_ERROR?
380              * Consider e.g. a PCI bridge above a PCI Express device. */
381             return;
382         }
383 
384         type = pcie_cap_get_type(dev);
385         if ((type == PCI_EXP_TYPE_ROOT_PORT ||
386             type == PCI_EXP_TYPE_UPSTREAM ||
387             type == PCI_EXP_TYPE_DOWNSTREAM) &&
388             !pcie_aer_msg_vbridge(dev, msg)) {
389                 return;
390         }
391         if (!pcie_aer_msg_alldev(dev, msg)) {
392             return;
393         }
394         if (type == PCI_EXP_TYPE_ROOT_PORT) {
395             pcie_aer_msg_root_port(dev, msg);
396             /* Root port can notify system itself,
397                or send the error message to root complex event collector. */
398             /*
399              * if root port is associated with an event collector,
400              * return the root complex event collector here.
401              * For now root complex event collector isn't supported.
402              */
403             return;
404         }
405         dev = pci_bridge_get_device(dev->bus);
406     }
407 }
408 
409 static void pcie_aer_update_log(PCIDevice *dev, const PCIEAERErr *err)
410 {
411     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
412     uint8_t first_bit = ctz32(err->status);
413     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
414     int i;
415 
416     assert(err->status);
417     assert(!(err->status & (err->status - 1)));
418 
419     errcap &= ~(PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
420     errcap |= PCI_ERR_CAP_FEP(first_bit);
421 
422     if (err->flags & PCIE_AER_ERR_HEADER_VALID) {
423         for (i = 0; i < ARRAY_SIZE(err->header); ++i) {
424             /* 7.10.8 Header Log Register */
425             uint8_t *header_log =
426                 aer_cap + PCI_ERR_HEADER_LOG + i * sizeof err->header[0];
427             stl_be_p(header_log, err->header[i]);
428         }
429     } else {
430         assert(!(err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT));
431         memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
432     }
433 
434     if ((err->flags & PCIE_AER_ERR_TLP_PREFIX_PRESENT) &&
435         (pci_get_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVCAP2) &
436          PCI_EXP_DEVCAP2_EETLPP)) {
437         for (i = 0; i < ARRAY_SIZE(err->prefix); ++i) {
438             /* 7.10.12 tlp prefix log register */
439             uint8_t *prefix_log =
440                 aer_cap + PCI_ERR_TLP_PREFIX_LOG + i * sizeof err->prefix[0];
441             stl_be_p(prefix_log, err->prefix[i]);
442         }
443         errcap |= PCI_ERR_CAP_TLP;
444     } else {
445         memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0,
446                PCI_ERR_TLP_PREFIX_LOG_SIZE);
447     }
448     pci_set_long(aer_cap + PCI_ERR_CAP, errcap);
449 }
450 
451 static void pcie_aer_clear_log(PCIDevice *dev)
452 {
453     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
454 
455     pci_long_test_and_clear_mask(aer_cap + PCI_ERR_CAP,
456                                  PCI_ERR_CAP_FEP_MASK | PCI_ERR_CAP_TLP);
457 
458     memset(aer_cap + PCI_ERR_HEADER_LOG, 0, PCI_ERR_HEADER_LOG_SIZE);
459     memset(aer_cap + PCI_ERR_TLP_PREFIX_LOG, 0, PCI_ERR_TLP_PREFIX_LOG_SIZE);
460 }
461 
462 static void pcie_aer_clear_error(PCIDevice *dev)
463 {
464     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
465     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
466     PCIEAERLog *aer_log = &dev->exp.aer_log;
467     PCIEAERErr err;
468 
469     if (!(errcap & PCI_ERR_CAP_MHRE) || !aer_log->log_num) {
470         pcie_aer_clear_log(dev);
471         return;
472     }
473 
474     /*
475      * If more errors are queued, set corresponding bits in uncorrectable
476      * error status.
477      * We emulate uncorrectable error status register as W1CS.
478      * So set bit in uncorrectable error status here again for multiple
479      * error recording support.
480      *
481      * 6.2.4.2 Multiple Error Handling(Advanced Error Reporting Capability)
482      */
483     pcie_aer_update_uncor_status(dev);
484 
485     aer_log_del_err(aer_log, &err);
486     pcie_aer_update_log(dev, &err);
487 }
488 
489 static int pcie_aer_record_error(PCIDevice *dev,
490                                  const PCIEAERErr *err)
491 {
492     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
493     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
494     int fep = PCI_ERR_CAP_FEP(errcap);
495 
496     assert(err->status);
497     assert(!(err->status & (err->status - 1)));
498 
499     if (errcap & PCI_ERR_CAP_MHRE &&
500         (pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS) & (1U << fep))) {
501         /*  Not first error. queue error */
502         if (aer_log_add_err(&dev->exp.aer_log, err) < 0) {
503             /* overflow */
504             return -1;
505         }
506         return 0;
507     }
508 
509     pcie_aer_update_log(dev, err);
510     return 0;
511 }
512 
513 typedef struct PCIEAERInject {
514     PCIDevice *dev;
515     uint8_t *aer_cap;
516     const PCIEAERErr *err;
517     uint16_t devctl;
518     uint16_t devsta;
519     uint32_t error_status;
520     bool unsupported_request;
521     bool log_overflow;
522     PCIEAERMsg msg;
523 } PCIEAERInject;
524 
525 static bool pcie_aer_inject_cor_error(PCIEAERInject *inj,
526                                       uint32_t uncor_status,
527                                       bool is_advisory_nonfatal)
528 {
529     PCIDevice *dev = inj->dev;
530 
531     inj->devsta |= PCI_EXP_DEVSTA_CED;
532     if (inj->unsupported_request) {
533         inj->devsta |= PCI_EXP_DEVSTA_URD;
534     }
535     pci_set_word(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
536 
537     if (inj->aer_cap) {
538         uint32_t mask;
539         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_COR_STATUS,
540                                    inj->error_status);
541         mask = pci_get_long(inj->aer_cap + PCI_ERR_COR_MASK);
542         if (mask & inj->error_status) {
543             return false;
544         }
545         if (is_advisory_nonfatal) {
546             uint32_t uncor_mask =
547                 pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
548             if (!(uncor_mask & uncor_status)) {
549                 inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
550             }
551             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
552                                        uncor_status);
553         }
554     }
555 
556     if (inj->unsupported_request && !(inj->devctl & PCI_EXP_DEVCTL_URRE)) {
557         return false;
558     }
559     if (!(inj->devctl & PCI_EXP_DEVCTL_CERE)) {
560         return false;
561     }
562 
563     inj->msg.severity = PCI_ERR_ROOT_CMD_COR_EN;
564     return true;
565 }
566 
567 static bool pcie_aer_inject_uncor_error(PCIEAERInject *inj, bool is_fatal)
568 {
569     PCIDevice *dev = inj->dev;
570     uint16_t cmd;
571 
572     if (is_fatal) {
573         inj->devsta |= PCI_EXP_DEVSTA_FED;
574     } else {
575         inj->devsta |= PCI_EXP_DEVSTA_NFED;
576     }
577     if (inj->unsupported_request) {
578         inj->devsta |= PCI_EXP_DEVSTA_URD;
579     }
580     pci_set_long(dev->config + dev->exp.exp_cap + PCI_EXP_DEVSTA, inj->devsta);
581 
582     if (inj->aer_cap) {
583         uint32_t mask = pci_get_long(inj->aer_cap + PCI_ERR_UNCOR_MASK);
584         if (mask & inj->error_status) {
585             pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
586                                        inj->error_status);
587             return false;
588         }
589 
590         inj->log_overflow = !!pcie_aer_record_error(dev, inj->err);
591         pci_long_test_and_set_mask(inj->aer_cap + PCI_ERR_UNCOR_STATUS,
592                                    inj->error_status);
593     }
594 
595     cmd = pci_get_word(dev->config + PCI_COMMAND);
596     if (inj->unsupported_request &&
597         !(inj->devctl & PCI_EXP_DEVCTL_URRE) && !(cmd & PCI_COMMAND_SERR)) {
598         return false;
599     }
600     if (is_fatal) {
601         if (!((cmd & PCI_COMMAND_SERR) ||
602               (inj->devctl & PCI_EXP_DEVCTL_FERE))) {
603             return false;
604         }
605         inj->msg.severity = PCI_ERR_ROOT_CMD_FATAL_EN;
606     } else {
607         if (!((cmd & PCI_COMMAND_SERR) ||
608               (inj->devctl & PCI_EXP_DEVCTL_NFERE))) {
609             return false;
610         }
611         inj->msg.severity = PCI_ERR_ROOT_CMD_NONFATAL_EN;
612     }
613     return true;
614 }
615 
616 /*
617  * non-Function specific error must be recorded in all functions.
618  * It is the responsibility of the caller of this function.
619  * It is also caller's responsibility to determine which function should
620  * report the error.
621  *
622  * 6.2.4 Error Logging
623  * 6.2.5 Sequence of Device Error Signaling and Logging Operations
624  * Figure 6-2: Flowchart Showing Sequence of Device Error Signaling and Logging
625  *             Operations
626  */
627 int pcie_aer_inject_error(PCIDevice *dev, const PCIEAERErr *err)
628 {
629     uint8_t *aer_cap = NULL;
630     uint16_t devctl = 0;
631     uint16_t devsta = 0;
632     uint32_t error_status = err->status;
633     PCIEAERInject inj;
634 
635     if (!pci_is_express(dev)) {
636         return -ENOSYS;
637     }
638 
639     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
640         error_status &= PCI_ERR_COR_SUPPORTED;
641     } else {
642         error_status &= PCI_ERR_UNC_SUPPORTED;
643     }
644 
645     /* invalid status bit. one and only one bit must be set */
646     if (!error_status || (error_status & (error_status - 1))) {
647         return -EINVAL;
648     }
649 
650     if (dev->exp.aer_cap) {
651         uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
652         aer_cap = dev->config + dev->exp.aer_cap;
653         devctl = pci_get_long(exp_cap + PCI_EXP_DEVCTL);
654         devsta = pci_get_long(exp_cap + PCI_EXP_DEVSTA);
655     }
656 
657     inj.dev = dev;
658     inj.aer_cap = aer_cap;
659     inj.err = err;
660     inj.devctl = devctl;
661     inj.devsta = devsta;
662     inj.error_status = error_status;
663     inj.unsupported_request = !(err->flags & PCIE_AER_ERR_IS_CORRECTABLE) &&
664         err->status == PCI_ERR_UNC_UNSUP;
665     inj.log_overflow = false;
666 
667     if (err->flags & PCIE_AER_ERR_IS_CORRECTABLE) {
668         if (!pcie_aer_inject_cor_error(&inj, 0, false)) {
669             return 0;
670         }
671     } else {
672         bool is_fatal =
673             pcie_aer_uncor_default_severity(error_status) ==
674             PCI_ERR_ROOT_CMD_FATAL_EN;
675         if (aer_cap) {
676             is_fatal =
677                 error_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
678         }
679         if (!is_fatal && (err->flags & PCIE_AER_ERR_MAYBE_ADVISORY)) {
680             inj.error_status = PCI_ERR_COR_ADV_NONFATAL;
681             if (!pcie_aer_inject_cor_error(&inj, error_status, true)) {
682                 return 0;
683             }
684         } else {
685             if (!pcie_aer_inject_uncor_error(&inj, is_fatal)) {
686                 return 0;
687             }
688         }
689     }
690 
691     /* send up error message */
692     inj.msg.source_id = err->source_id;
693     pcie_aer_msg(dev, &inj.msg);
694 
695     if (inj.log_overflow) {
696         PCIEAERErr header_log_overflow = {
697             .status = PCI_ERR_COR_HL_OVERFLOW,
698             .flags = PCIE_AER_ERR_IS_CORRECTABLE,
699         };
700         int ret = pcie_aer_inject_error(dev, &header_log_overflow);
701         assert(!ret);
702     }
703     return 0;
704 }
705 
706 void pcie_aer_write_config(PCIDevice *dev,
707                            uint32_t addr, uint32_t val, int len)
708 {
709     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
710     uint32_t errcap = pci_get_long(aer_cap + PCI_ERR_CAP);
711     uint32_t first_error = 1U << PCI_ERR_CAP_FEP(errcap);
712     uint32_t uncorsta = pci_get_long(aer_cap + PCI_ERR_UNCOR_STATUS);
713 
714     /* uncorrectable error */
715     if (!(uncorsta & first_error)) {
716         /* the bit that corresponds to the first error is cleared */
717         pcie_aer_clear_error(dev);
718     } else if (errcap & PCI_ERR_CAP_MHRE) {
719         /* When PCI_ERR_CAP_MHRE is enabled and the first error isn't cleared
720          * nothing should happen. So we have to revert the modification to
721          * the register.
722          */
723         pcie_aer_update_uncor_status(dev);
724     } else {
725         /* capability & control
726          * PCI_ERR_CAP_MHRE might be cleared, so clear of header log.
727          */
728         aer_log_clear_all_err(&dev->exp.aer_log);
729     }
730 }
731 
732 void pcie_aer_root_init(PCIDevice *dev)
733 {
734     uint16_t pos = dev->exp.aer_cap;
735 
736     pci_set_long(dev->wmask + pos + PCI_ERR_ROOT_COMMAND,
737                  PCI_ERR_ROOT_CMD_EN_MASK);
738     pci_set_long(dev->w1cmask + pos + PCI_ERR_ROOT_STATUS,
739                  PCI_ERR_ROOT_STATUS_REPORT_MASK);
740     /* PCI_ERR_ROOT_IRQ is RO but devices change it using a
741      * device-specific method.
742      */
743     pci_set_long(dev->cmask + pos + PCI_ERR_ROOT_STATUS,
744                  ~PCI_ERR_ROOT_IRQ);
745 }
746 
747 void pcie_aer_root_reset(PCIDevice *dev)
748 {
749     uint8_t* aer_cap = dev->config + dev->exp.aer_cap;
750 
751     pci_set_long(aer_cap + PCI_ERR_ROOT_COMMAND, 0);
752 
753     /*
754      * Advanced Error Interrupt Message Number in Root Error Status Register
755      * must be updated by chip dependent code because it's chip dependent
756      * which number is used.
757      */
758 }
759 
760 void pcie_aer_root_write_config(PCIDevice *dev,
761                                 uint32_t addr, uint32_t val, int len,
762                                 uint32_t root_cmd_prev)
763 {
764     uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
765     uint32_t root_status = pci_get_long(aer_cap + PCI_ERR_ROOT_STATUS);
766     uint32_t enabled_cmd = pcie_aer_status_to_cmd(root_status);
767     uint32_t root_cmd = pci_get_long(aer_cap + PCI_ERR_ROOT_COMMAND);
768     /* 6.2.4.1.2 Interrupt Generation */
769     if (!msix_enabled(dev) && !msi_enabled(dev)) {
770         pci_set_irq(dev, !!(root_cmd & enabled_cmd));
771         return;
772     }
773 
774     if ((root_cmd_prev & enabled_cmd) || !(root_cmd & enabled_cmd)) {
775         /* Send MSI on transition from false to true. */
776         return;
777     }
778 
779     pcie_aer_root_notify(dev);
780 }
781 
782 static const VMStateDescription vmstate_pcie_aer_err = {
783     .name = "PCIE_AER_ERROR",
784     .version_id = 1,
785     .minimum_version_id = 1,
786     .fields = (VMStateField[]) {
787         VMSTATE_UINT32(status, PCIEAERErr),
788         VMSTATE_UINT16(source_id, PCIEAERErr),
789         VMSTATE_UINT16(flags, PCIEAERErr),
790         VMSTATE_UINT32_ARRAY(header, PCIEAERErr, 4),
791         VMSTATE_UINT32_ARRAY(prefix, PCIEAERErr, 4),
792         VMSTATE_END_OF_LIST()
793     }
794 };
795 
796 static bool pcie_aer_state_log_num_valid(void *opaque, int version_id)
797 {
798     PCIEAERLog *s = opaque;
799 
800     return s->log_num <= s->log_max;
801 }
802 
803 const VMStateDescription vmstate_pcie_aer_log = {
804     .name = "PCIE_AER_ERROR_LOG",
805     .version_id = 1,
806     .minimum_version_id = 1,
807     .fields = (VMStateField[]) {
808         VMSTATE_UINT16(log_num, PCIEAERLog),
809         VMSTATE_UINT16_EQUAL(log_max, PCIEAERLog),
810         VMSTATE_VALIDATE("log_num <= log_max", pcie_aer_state_log_num_valid),
811         VMSTATE_STRUCT_VARRAY_POINTER_UINT16(log, PCIEAERLog, log_num,
812                               vmstate_pcie_aer_err, PCIEAERErr),
813         VMSTATE_END_OF_LIST()
814     }
815 };
816 
817 typedef struct PCIEAERErrorName {
818     const char *name;
819     uint32_t val;
820     bool correctable;
821 } PCIEAERErrorName;
822 
823 /*
824  * AER error name -> value conversion table
825  * This naming scheme is same to linux aer-injection tool.
826  */
827 static const struct PCIEAERErrorName pcie_aer_error_list[] = {
828     {
829         .name = "DLP",
830         .val = PCI_ERR_UNC_DLP,
831         .correctable = false,
832     }, {
833         .name = "SDN",
834         .val = PCI_ERR_UNC_SDN,
835         .correctable = false,
836     }, {
837         .name = "POISON_TLP",
838         .val = PCI_ERR_UNC_POISON_TLP,
839         .correctable = false,
840     }, {
841         .name = "FCP",
842         .val = PCI_ERR_UNC_FCP,
843         .correctable = false,
844     }, {
845         .name = "COMP_TIME",
846         .val = PCI_ERR_UNC_COMP_TIME,
847         .correctable = false,
848     }, {
849         .name = "COMP_ABORT",
850         .val = PCI_ERR_UNC_COMP_ABORT,
851         .correctable = false,
852     }, {
853         .name = "UNX_COMP",
854         .val = PCI_ERR_UNC_UNX_COMP,
855         .correctable = false,
856     }, {
857         .name = "RX_OVER",
858         .val = PCI_ERR_UNC_RX_OVER,
859         .correctable = false,
860     }, {
861         .name = "MALF_TLP",
862         .val = PCI_ERR_UNC_MALF_TLP,
863         .correctable = false,
864     }, {
865         .name = "ECRC",
866         .val = PCI_ERR_UNC_ECRC,
867         .correctable = false,
868     }, {
869         .name = "UNSUP",
870         .val = PCI_ERR_UNC_UNSUP,
871         .correctable = false,
872     }, {
873         .name = "ACSV",
874         .val = PCI_ERR_UNC_ACSV,
875         .correctable = false,
876     }, {
877         .name = "INTN",
878         .val = PCI_ERR_UNC_INTN,
879         .correctable = false,
880     }, {
881         .name = "MCBTLP",
882         .val = PCI_ERR_UNC_MCBTLP,
883         .correctable = false,
884     }, {
885         .name = "ATOP_EBLOCKED",
886         .val = PCI_ERR_UNC_ATOP_EBLOCKED,
887         .correctable = false,
888     }, {
889         .name = "TLP_PRF_BLOCKED",
890         .val = PCI_ERR_UNC_TLP_PRF_BLOCKED,
891         .correctable = false,
892     }, {
893         .name = "RCVR",
894         .val = PCI_ERR_COR_RCVR,
895         .correctable = true,
896     }, {
897         .name = "BAD_TLP",
898         .val = PCI_ERR_COR_BAD_TLP,
899         .correctable = true,
900     }, {
901         .name = "BAD_DLLP",
902         .val = PCI_ERR_COR_BAD_DLLP,
903         .correctable = true,
904     }, {
905         .name = "REP_ROLL",
906         .val = PCI_ERR_COR_REP_ROLL,
907         .correctable = true,
908     }, {
909         .name = "REP_TIMER",
910         .val = PCI_ERR_COR_REP_TIMER,
911         .correctable = true,
912     }, {
913         .name = "ADV_NONFATAL",
914         .val = PCI_ERR_COR_ADV_NONFATAL,
915         .correctable = true,
916     }, {
917         .name = "INTERNAL",
918         .val = PCI_ERR_COR_INTERNAL,
919         .correctable = true,
920     }, {
921         .name = "HL_OVERFLOW",
922         .val = PCI_ERR_COR_HL_OVERFLOW,
923         .correctable = true,
924     },
925 };
926 
927 static int pcie_aer_parse_error_string(const char *error_name,
928                                        uint32_t *status, bool *correctable)
929 {
930     int i;
931 
932     for (i = 0; i < ARRAY_SIZE(pcie_aer_error_list); i++) {
933         const  PCIEAERErrorName *e = &pcie_aer_error_list[i];
934         if (strcmp(error_name, e->name)) {
935             continue;
936         }
937 
938         *status = e->val;
939         *correctable = e->correctable;
940         return 0;
941     }
942     return -EINVAL;
943 }
944 
945 static int do_pcie_aer_inject_error(Monitor *mon,
946                                     const QDict *qdict, QObject **ret_data)
947 {
948     const char *id = qdict_get_str(qdict, "id");
949     const char *error_name;
950     uint32_t error_status;
951     bool correctable;
952     PCIDevice *dev;
953     PCIEAERErr err;
954     int ret;
955 
956     ret = pci_qdev_find_device(id, &dev);
957     if (ret < 0) {
958         monitor_printf(mon,
959                        "id or pci device path is invalid or device not "
960                        "found. %s\n", id);
961         return ret;
962     }
963     if (!pci_is_express(dev)) {
964         monitor_printf(mon, "the device doesn't support pci express. %s\n",
965                        id);
966         return -ENOSYS;
967     }
968 
969     error_name = qdict_get_str(qdict, "error_status");
970     if (pcie_aer_parse_error_string(error_name, &error_status, &correctable)) {
971         char *e = NULL;
972         error_status = strtoul(error_name, &e, 0);
973         correctable = qdict_get_try_bool(qdict, "correctable", false);
974         if (!e || *e != '\0') {
975             monitor_printf(mon, "invalid error status value. \"%s\"",
976                            error_name);
977             return -EINVAL;
978         }
979     }
980     err.status = error_status;
981     err.source_id = pci_requester_id(dev);
982 
983     err.flags = 0;
984     if (correctable) {
985         err.flags |= PCIE_AER_ERR_IS_CORRECTABLE;
986     }
987     if (qdict_get_try_bool(qdict, "advisory_non_fatal", false)) {
988         err.flags |= PCIE_AER_ERR_MAYBE_ADVISORY;
989     }
990     if (qdict_haskey(qdict, "header0")) {
991         err.flags |= PCIE_AER_ERR_HEADER_VALID;
992     }
993     if (qdict_haskey(qdict, "prefix0")) {
994         err.flags |= PCIE_AER_ERR_TLP_PREFIX_PRESENT;
995     }
996 
997     err.header[0] = qdict_get_try_int(qdict, "header0", 0);
998     err.header[1] = qdict_get_try_int(qdict, "header1", 0);
999     err.header[2] = qdict_get_try_int(qdict, "header2", 0);
1000     err.header[3] = qdict_get_try_int(qdict, "header3", 0);
1001 
1002     err.prefix[0] = qdict_get_try_int(qdict, "prefix0", 0);
1003     err.prefix[1] = qdict_get_try_int(qdict, "prefix1", 0);
1004     err.prefix[2] = qdict_get_try_int(qdict, "prefix2", 0);
1005     err.prefix[3] = qdict_get_try_int(qdict, "prefix3", 0);
1006 
1007     ret = pcie_aer_inject_error(dev, &err);
1008     *ret_data = qobject_from_jsonf("{'id': %s, "
1009                                    "'root_bus': %s, 'bus': %d, 'devfn': %d, "
1010                                    "'ret': %d}",
1011                                    id, pci_root_bus_path(dev),
1012                                    pci_bus_num(dev->bus), dev->devfn,
1013                                    ret);
1014     assert(*ret_data);
1015 
1016     return 0;
1017 }
1018 
1019 void hmp_pcie_aer_inject_error(Monitor *mon, const QDict *qdict)
1020 {
1021     QObject *data;
1022     int devfn;
1023 
1024     if (do_pcie_aer_inject_error(mon, qdict, &data) < 0) {
1025         return;
1026     }
1027 
1028     assert(qobject_type(data) == QTYPE_QDICT);
1029     qdict = qobject_to_qdict(data);
1030 
1031     devfn = (int)qdict_get_int(qdict, "devfn");
1032     monitor_printf(mon, "OK id: %s root bus: %s, bus: %x devfn: %x.%x\n",
1033                    qdict_get_str(qdict, "id"),
1034                    qdict_get_str(qdict, "root_bus"),
1035                    (int) qdict_get_int(qdict, "bus"),
1036                    PCI_SLOT(devfn), PCI_FUNC(devfn));
1037 }
1038