xref: /qemu/hw/pci-host/pnv_phb4.c (revision b355f08a)
1 /*
2  * QEMU PowerPC PowerNV (POWER9) PHB4 model
3  *
4  * Copyright (c) 2018-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "qemu-common.h"
14 #include "monitor/monitor.h"
15 #include "target/ppc/cpu.h"
16 #include "hw/pci-host/pnv_phb4_regs.h"
17 #include "hw/pci-host/pnv_phb4.h"
18 #include "hw/pci/pcie_host.h"
19 #include "hw/pci/pcie_port.h"
20 #include "hw/ppc/pnv.h"
21 #include "hw/ppc/pnv_xscom.h"
22 #include "hw/irq.h"
23 #include "hw/qdev-properties.h"
24 #include "qom/object.h"
25 #include "trace.h"
26 
27 #define phb_error(phb, fmt, ...)                                        \
28     qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
29                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
30 
31 /*
32  * QEMU version of the GETFIELD/SETFIELD macros
33  *
34  * These are common with the PnvXive model.
35  */
36 static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
37 {
38     return (word & mask) >> ctz64(mask);
39 }
40 
41 static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
42                                 uint64_t value)
43 {
44     return (word & ~mask) | ((value << ctz64(mask)) & mask);
45 }
46 
47 static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
48 {
49     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
50     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
51     uint8_t bus, devfn;
52 
53     if (!(addr >> 63)) {
54         return NULL;
55     }
56     bus = (addr >> 52) & 0xff;
57     devfn = (addr >> 44) & 0xff;
58 
59     /* We don't access the root complex this way */
60     if (bus == 0 && devfn == 0) {
61         return NULL;
62     }
63     return pci_find_device(pci->bus, bus, devfn);
64 }
65 
66 /*
67  * The CONFIG_DATA register expects little endian accesses, but as the
68  * region is big endian, we have to swap the value.
69  */
70 static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
71                                   unsigned size, uint64_t val)
72 {
73     uint32_t cfg_addr, limit;
74     PCIDevice *pdev;
75 
76     pdev = pnv_phb4_find_cfg_dev(phb);
77     if (!pdev) {
78         return;
79     }
80     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
81     cfg_addr |= off;
82     limit = pci_config_size(pdev);
83     if (limit <= cfg_addr) {
84         /*
85          * conventional pci device can be behind pcie-to-pci bridge.
86          * 256 <= addr < 4K has no effects.
87          */
88         return;
89     }
90     switch (size) {
91     case 1:
92         break;
93     case 2:
94         val = bswap16(val);
95         break;
96     case 4:
97         val = bswap32(val);
98         break;
99     default:
100         g_assert_not_reached();
101     }
102     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
103 }
104 
105 static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
106                                      unsigned size)
107 {
108     uint32_t cfg_addr, limit;
109     PCIDevice *pdev;
110     uint64_t val;
111 
112     pdev = pnv_phb4_find_cfg_dev(phb);
113     if (!pdev) {
114         return ~0ull;
115     }
116     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
117     cfg_addr |= off;
118     limit = pci_config_size(pdev);
119     if (limit <= cfg_addr) {
120         /*
121          * conventional pci device can be behind pcie-to-pci bridge.
122          * 256 <= addr < 4K has no effects.
123          */
124         return ~0ull;
125     }
126     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
127     switch (size) {
128     case 1:
129         return val;
130     case 2:
131         return bswap16(val);
132     case 4:
133         return bswap32(val);
134     default:
135         g_assert_not_reached();
136     }
137 }
138 
139 /*
140  * Root complex register accesses are memory mapped.
141  */
142 static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
143                                      unsigned size, uint64_t val)
144 {
145     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
146     PCIDevice *pdev;
147 
148     if (size != 4) {
149         phb_error(phb, "rc_config_write invalid size %d\n", size);
150         return;
151     }
152 
153     pdev = pci_find_device(pci->bus, 0, 0);
154     assert(pdev);
155 
156     pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
157                                  bswap32(val), 4);
158 }
159 
160 static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
161                                         unsigned size)
162 {
163     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
164     PCIDevice *pdev;
165     uint64_t val;
166 
167     if (size != 4) {
168         phb_error(phb, "rc_config_read invalid size %d\n", size);
169         return ~0ull;
170     }
171 
172     pdev = pci_find_device(pci->bus, 0, 0);
173     assert(pdev);
174 
175     val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
176     return bswap32(val);
177 }
178 
179 static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
180 {
181     uint64_t base, start, size, mbe0, mbe1;
182     MemoryRegion *parent;
183     char name[64];
184 
185     /* Unmap first */
186     if (memory_region_is_mapped(&phb->mr_mmio[index])) {
187         /* Should we destroy it in RCU friendly way... ? */
188         memory_region_del_subregion(phb->mr_mmio[index].container,
189                                     &phb->mr_mmio[index]);
190     }
191 
192     /* Get table entry */
193     mbe0 = phb->ioda_MBT[(index << 1)];
194     mbe1 = phb->ioda_MBT[(index << 1) + 1];
195 
196     if (!(mbe0 & IODA3_MBT0_ENABLE)) {
197         return;
198     }
199 
200     /* Grab geometry from registers */
201     base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
202     size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
203     size |= 0xff00000000000000ull;
204     size = ~size + 1;
205 
206     /* Calculate PCI side start address based on M32/M64 window type */
207     if (mbe0 & IODA3_MBT0_TYPE_M32) {
208         start = phb->regs[PHB_M32_START_ADDR >> 3];
209         if ((start + size) > 0x100000000ull) {
210             phb_error(phb, "M32 set beyond 4GB boundary !");
211             size = 0x100000000 - start;
212         }
213     } else {
214         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
215     }
216 
217     /* TODO: Figure out how to implemet/decode AOMASK */
218 
219     /* Check if it matches an enabled MMIO region in the PEC stack */
220     if (memory_region_is_mapped(&phb->stack->mmbar0) &&
221         base >= phb->stack->mmio0_base &&
222         (base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
223         parent = &phb->stack->mmbar0;
224         base -= phb->stack->mmio0_base;
225     } else if (memory_region_is_mapped(&phb->stack->mmbar1) &&
226         base >= phb->stack->mmio1_base &&
227         (base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
228         parent = &phb->stack->mmbar1;
229         base -= phb->stack->mmio1_base;
230     } else {
231         phb_error(phb, "PHB MBAR %d out of parent bounds", index);
232         return;
233     }
234 
235     /* Create alias (better name ?) */
236     snprintf(name, sizeof(name), "phb4-mbar%d", index);
237     memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
238                              &phb->pci_mmio, start, size);
239     memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
240 }
241 
242 static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
243 {
244     uint64_t i;
245     uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
246         PNV_PHB4_MIN_MMIO_WINDOWS;
247 
248     for (i = 0; i < num_windows; i++) {
249         pnv_phb4_check_mbt(phb, i);
250     }
251 }
252 
253 static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
254                                       unsigned *out_table, unsigned *out_idx)
255 {
256     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
257     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
258     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
259     unsigned int mask;
260     uint64_t *tptr = NULL;
261 
262     switch (table) {
263     case IODA3_TBL_LIST:
264         tptr = phb->ioda_LIST;
265         mask = 7;
266         break;
267     case IODA3_TBL_MIST:
268         tptr = phb->ioda_MIST;
269         mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
270         mask -= 1;
271         break;
272     case IODA3_TBL_RCAM:
273         mask = phb->big_phb ? 127 : 63;
274         break;
275     case IODA3_TBL_MRT:
276         mask = phb->big_phb ? 15 : 7;
277         break;
278     case IODA3_TBL_PESTA:
279     case IODA3_TBL_PESTB:
280         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
281         mask -= 1;
282         break;
283     case IODA3_TBL_TVT:
284         tptr = phb->ioda_TVT;
285         mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
286         mask -= 1;
287         break;
288     case IODA3_TBL_TCR:
289     case IODA3_TBL_TDR:
290         mask = phb->big_phb ? 1023 : 511;
291         break;
292     case IODA3_TBL_MBT:
293         tptr = phb->ioda_MBT;
294         mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
295         mask -= 1;
296         break;
297     case IODA3_TBL_MDT:
298         tptr = phb->ioda_MDT;
299         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
300         mask -= 1;
301         break;
302     case IODA3_TBL_PEEV:
303         tptr = phb->ioda_PEEV;
304         mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
305         mask -= 1;
306         break;
307     default:
308         phb_error(phb, "invalid IODA table %d", table);
309         return NULL;
310     }
311     index &= mask;
312     if (out_idx) {
313         *out_idx = index;
314     }
315     if (out_table) {
316         *out_table = table;
317     }
318     if (tptr) {
319         tptr += index;
320     }
321     if (adreg & PHB_IODA_AD_AUTOINC) {
322         index = (index + 1) & mask;
323         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
324     }
325 
326     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
327     return tptr;
328 }
329 
330 static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
331 {
332     unsigned table, idx;
333     uint64_t *tptr;
334 
335     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
336     if (!tptr) {
337         /* Special PESTA case */
338         if (table == IODA3_TBL_PESTA) {
339             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
340         } else if (table == IODA3_TBL_PESTB) {
341             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
342         }
343         /* Return 0 on unsupported tables, not ff's */
344         return 0;
345     }
346     return *tptr;
347 }
348 
349 static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
350 {
351     unsigned table, idx;
352     uint64_t *tptr;
353 
354     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
355     if (!tptr) {
356         /* Special PESTA case */
357         if (table == IODA3_TBL_PESTA) {
358             phb->ioda_PEST_AB[idx] &= ~1;
359             phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
360         } else if (table == IODA3_TBL_PESTB) {
361             phb->ioda_PEST_AB[idx] &= ~2;
362             phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
363         }
364         return;
365     }
366 
367     /* Handle side effects */
368     switch (table) {
369     case IODA3_TBL_LIST:
370         break;
371     case IODA3_TBL_MIST: {
372         /* Special mask for MIST partial write */
373         uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
374         uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
375         uint64_t v = *tptr;
376         if (mmask == 0) {
377             mmask = 0xf;
378         }
379         if (mmask & 8) {
380             v &= 0x0000ffffffffffffull;
381             v |= 0xcfff000000000000ull & val;
382         }
383         if (mmask & 4) {
384             v &= 0xffff0000ffffffffull;
385             v |= 0x0000cfff00000000ull & val;
386         }
387         if (mmask & 2) {
388             v &= 0xffffffff0000ffffull;
389             v |= 0x00000000cfff0000ull & val;
390         }
391         if (mmask & 1) {
392             v &= 0xffffffffffff0000ull;
393             v |= 0x000000000000cfffull & val;
394         }
395         *tptr = v;
396         break;
397     }
398     case IODA3_TBL_MBT:
399         *tptr = val;
400 
401         /* Copy accross the valid bit to the other half */
402         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
403         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
404 
405         /* Update mappings */
406         pnv_phb4_check_mbt(phb, idx >> 1);
407         break;
408     default:
409         *tptr = val;
410     }
411 }
412 
413 static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
414 {
415     PnvPhb4DMASpace *ds;
416 
417     /* Always invalidate all for now ... */
418     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
419         ds->pe_num = PHB_INVALID_PE;
420     }
421 }
422 
423 static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
424 {
425     uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
426 
427     if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
428         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
429             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
430                                         0xffff0000, &ds->msi32_mr);
431         }
432     } else {
433         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
434             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
435                                         &ds->msi32_mr);
436         }
437     }
438 
439     if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
440         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
441             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
442                                         (1ull << 60), &ds->msi64_mr);
443         }
444     } else {
445         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
446             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
447                                         &ds->msi64_mr);
448         }
449     }
450 }
451 
452 static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
453 {
454     PnvPhb4DMASpace *ds;
455 
456     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
457         pnv_phb4_update_msi_regions(ds);
458     }
459 }
460 
461 static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
462 {
463     int shift, flags, i, lsi_base;
464     XiveSource *xsrc = &phb->xsrc;
465 
466     /* The XIVE source characteristics can be set at run time */
467     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
468         shift = XIVE_ESB_64K;
469     } else {
470         shift = XIVE_ESB_4K;
471     }
472     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
473         flags = XIVE_SRC_STORE_EOI;
474     } else {
475         flags = 0;
476     }
477 
478     phb->xsrc.esb_shift = shift;
479     phb->xsrc.esb_flags = flags;
480 
481     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
482     lsi_base <<= 3;
483 
484     /* TODO: handle reset values of PHB_LSI_SRC_ID */
485     if (!lsi_base) {
486         return;
487     }
488 
489     /* TODO: need a xive_source_irq_reset_lsi() */
490     bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
491 
492     for (i = 0; i < xsrc->nr_irqs; i++) {
493         bool msi = (i < lsi_base || i >= (lsi_base + 8));
494         if (!msi) {
495             xive_source_irq_set_lsi(xsrc, i);
496         }
497     }
498 }
499 
500 static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
501                                unsigned size)
502 {
503     PnvPHB4 *phb = PNV_PHB4(opaque);
504     bool changed;
505 
506     /* Special case outbound configuration data */
507     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
508         pnv_phb4_config_write(phb, off & 0x3, size, val);
509         return;
510     }
511 
512     /* Special case RC configuration space */
513     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
514         pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
515         return;
516     }
517 
518     /* Other registers are 64-bit only */
519     if (size != 8 || off & 0x7) {
520         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
521                    off, size);
522         return;
523     }
524 
525     /* Handle masking */
526     switch (off) {
527     case PHB_LSI_SOURCE_ID:
528         val &= PHB_LSI_SRC_ID;
529         break;
530     case PHB_M64_UPPER_BITS:
531         val &= 0xff00000000000000ull;
532         break;
533     /* TCE Kill */
534     case PHB_TCE_KILL:
535         /* Clear top 3 bits which HW does to indicate successful queuing */
536         val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
537         break;
538     case PHB_Q_DMA_R:
539         /*
540          * This is enough logic to make SW happy but we aren't
541          * actually quiescing the DMAs
542          */
543         if (val & PHB_Q_DMA_R_AUTORESET) {
544             val = 0;
545         } else {
546             val &= PHB_Q_DMA_R_QUIESCE_DMA;
547         }
548         break;
549     /* LEM stuff */
550     case PHB_LEM_FIR_AND_MASK:
551         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
552         return;
553     case PHB_LEM_FIR_OR_MASK:
554         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
555         return;
556     case PHB_LEM_ERROR_AND_MASK:
557         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
558         return;
559     case PHB_LEM_ERROR_OR_MASK:
560         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
561         return;
562     case PHB_LEM_WOF:
563         val = 0;
564         break;
565     /* TODO: More regs ..., maybe create a table with masks... */
566 
567     /* Read only registers */
568     case PHB_CPU_LOADSTORE_STATUS:
569     case PHB_ETU_ERR_SUMMARY:
570     case PHB_PHB4_GEN_CAP:
571     case PHB_PHB4_TCE_CAP:
572     case PHB_PHB4_IRQ_CAP:
573     case PHB_PHB4_EEH_CAP:
574         return;
575     }
576 
577     /* Record whether it changed */
578     changed = phb->regs[off >> 3] != val;
579 
580     /* Store in register cache first */
581     phb->regs[off >> 3] = val;
582 
583     /* Handle side effects */
584     switch (off) {
585     case PHB_PHB4_CONFIG:
586         if (changed) {
587             pnv_phb4_update_all_msi_regions(phb);
588         }
589         break;
590     case PHB_M32_START_ADDR:
591     case PHB_M64_UPPER_BITS:
592         if (changed) {
593             pnv_phb4_check_all_mbt(phb);
594         }
595         break;
596 
597     /* IODA table accesses */
598     case PHB_IODA_DATA0:
599         pnv_phb4_ioda_write(phb, val);
600         break;
601 
602     /* RTC invalidation */
603     case PHB_RTC_INVALIDATE:
604         pnv_phb4_rtc_invalidate(phb, val);
605         break;
606 
607     /* PHB Control (Affects XIVE source) */
608     case PHB_CTRLR:
609     case PHB_LSI_SOURCE_ID:
610         pnv_phb4_update_xsrc(phb);
611         break;
612 
613     /* Silent simple writes */
614     case PHB_ASN_CMPM:
615     case PHB_CONFIG_ADDRESS:
616     case PHB_IODA_ADDR:
617     case PHB_TCE_KILL:
618     case PHB_TCE_SPEC_CTL:
619     case PHB_PEST_BAR:
620     case PHB_PELTV_BAR:
621     case PHB_RTT_BAR:
622     case PHB_LEM_FIR_ACCUM:
623     case PHB_LEM_ERROR_MASK:
624     case PHB_LEM_ACTION0:
625     case PHB_LEM_ACTION1:
626     case PHB_TCE_TAG_ENABLE:
627     case PHB_INT_NOTIFY_ADDR:
628     case PHB_INT_NOTIFY_INDEX:
629     case PHB_DMARD_SYNC:
630        break;
631 
632     /* Noise on anything else */
633     default:
634         qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
635                       off, val);
636     }
637 }
638 
639 static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
640 {
641     PnvPHB4 *phb = PNV_PHB4(opaque);
642     uint64_t val;
643 
644     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
645         return pnv_phb4_config_read(phb, off & 0x3, size);
646     }
647 
648     /* Special case RC configuration space */
649     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
650         return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
651     }
652 
653     /* Other registers are 64-bit only */
654     if (size != 8 || off & 0x7) {
655         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
656                    off, size);
657         return ~0ull;
658     }
659 
660     /* Default read from cache */
661     val = phb->regs[off >> 3];
662 
663     switch (off) {
664     case PHB_VERSION:
665         return phb->version;
666 
667         /* Read-only */
668     case PHB_PHB4_GEN_CAP:
669         return 0xe4b8000000000000ull;
670     case PHB_PHB4_TCE_CAP:
671         return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
672     case PHB_PHB4_IRQ_CAP:
673         return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
674     case PHB_PHB4_EEH_CAP:
675         return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
676 
677     /* IODA table accesses */
678     case PHB_IODA_DATA0:
679         return pnv_phb4_ioda_read(phb);
680 
681     /* Link training always appears trained */
682     case PHB_PCIE_DLP_TRAIN_CTL:
683         /* TODO: Do something sensible with speed ? */
684         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
685 
686     /* DMA read sync: make it look like it's complete */
687     case PHB_DMARD_SYNC:
688         return PHB_DMARD_SYNC_COMPLETE;
689 
690     /* Silent simple reads */
691     case PHB_LSI_SOURCE_ID:
692     case PHB_CPU_LOADSTORE_STATUS:
693     case PHB_ASN_CMPM:
694     case PHB_PHB4_CONFIG:
695     case PHB_M32_START_ADDR:
696     case PHB_CONFIG_ADDRESS:
697     case PHB_IODA_ADDR:
698     case PHB_RTC_INVALIDATE:
699     case PHB_TCE_KILL:
700     case PHB_TCE_SPEC_CTL:
701     case PHB_PEST_BAR:
702     case PHB_PELTV_BAR:
703     case PHB_RTT_BAR:
704     case PHB_M64_UPPER_BITS:
705     case PHB_CTRLR:
706     case PHB_LEM_FIR_ACCUM:
707     case PHB_LEM_ERROR_MASK:
708     case PHB_LEM_ACTION0:
709     case PHB_LEM_ACTION1:
710     case PHB_TCE_TAG_ENABLE:
711     case PHB_INT_NOTIFY_ADDR:
712     case PHB_INT_NOTIFY_INDEX:
713     case PHB_Q_DMA_R:
714     case PHB_ETU_ERR_SUMMARY:
715         break;
716 
717     /* Noise on anything else */
718     default:
719         qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
720                       off, val);
721     }
722     return val;
723 }
724 
725 static const MemoryRegionOps pnv_phb4_reg_ops = {
726     .read = pnv_phb4_reg_read,
727     .write = pnv_phb4_reg_write,
728     .valid.min_access_size = 1,
729     .valid.max_access_size = 8,
730     .impl.min_access_size = 1,
731     .impl.max_access_size = 8,
732     .endianness = DEVICE_BIG_ENDIAN,
733 };
734 
735 static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
736 {
737     PnvPHB4 *phb = PNV_PHB4(opaque);
738     uint32_t reg = addr >> 3;
739     uint64_t val;
740     hwaddr offset;
741 
742     switch (reg) {
743     case PHB_SCOM_HV_IND_ADDR:
744         return phb->scom_hv_ind_addr_reg;
745 
746     case PHB_SCOM_HV_IND_DATA:
747         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
748             phb_error(phb, "Invalid indirect address");
749             return ~0ull;
750         }
751         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
752         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
753         val = pnv_phb4_reg_read(phb, offset, size);
754         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
755             offset += size;
756             offset &= 0x3fff;
757             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
758                                                  phb->scom_hv_ind_addr_reg,
759                                                  offset);
760         }
761         return val;
762     case PHB_SCOM_ETU_LEM_FIR:
763     case PHB_SCOM_ETU_LEM_FIR_AND:
764     case PHB_SCOM_ETU_LEM_FIR_OR:
765     case PHB_SCOM_ETU_LEM_FIR_MSK:
766     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
767     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
768     case PHB_SCOM_ETU_LEM_ACT0:
769     case PHB_SCOM_ETU_LEM_ACT1:
770     case PHB_SCOM_ETU_LEM_WOF:
771         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
772         return pnv_phb4_reg_read(phb, offset, size);
773     case PHB_SCOM_ETU_PMON_CONFIG:
774     case PHB_SCOM_ETU_PMON_CTR0:
775     case PHB_SCOM_ETU_PMON_CTR1:
776     case PHB_SCOM_ETU_PMON_CTR2:
777     case PHB_SCOM_ETU_PMON_CTR3:
778         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
779         return pnv_phb4_reg_read(phb, offset, size);
780 
781     default:
782         qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
783         return ~0ull;
784     }
785 }
786 
787 static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
788                                  uint64_t val, unsigned size)
789 {
790     PnvPHB4 *phb = PNV_PHB4(opaque);
791     uint32_t reg = addr >> 3;
792     hwaddr offset;
793 
794     switch (reg) {
795     case PHB_SCOM_HV_IND_ADDR:
796         phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
797         break;
798     case PHB_SCOM_HV_IND_DATA:
799         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
800             phb_error(phb, "Invalid indirect address");
801             break;
802         }
803         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
804         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
805         pnv_phb4_reg_write(phb, offset, val, size);
806         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
807             offset += size;
808             offset &= 0x3fff;
809             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
810                                                  phb->scom_hv_ind_addr_reg,
811                                                  offset);
812         }
813         break;
814     case PHB_SCOM_ETU_LEM_FIR:
815     case PHB_SCOM_ETU_LEM_FIR_AND:
816     case PHB_SCOM_ETU_LEM_FIR_OR:
817     case PHB_SCOM_ETU_LEM_FIR_MSK:
818     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
819     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
820     case PHB_SCOM_ETU_LEM_ACT0:
821     case PHB_SCOM_ETU_LEM_ACT1:
822     case PHB_SCOM_ETU_LEM_WOF:
823         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
824         pnv_phb4_reg_write(phb, offset, val, size);
825         break;
826     case PHB_SCOM_ETU_PMON_CONFIG:
827     case PHB_SCOM_ETU_PMON_CTR0:
828     case PHB_SCOM_ETU_PMON_CTR1:
829     case PHB_SCOM_ETU_PMON_CTR2:
830     case PHB_SCOM_ETU_PMON_CTR3:
831         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
832         pnv_phb4_reg_write(phb, offset, val, size);
833         break;
834     default:
835         qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
836                       "=%"PRIx64"\n", addr, val);
837     }
838 }
839 
840 const MemoryRegionOps pnv_phb4_xscom_ops = {
841     .read = pnv_phb4_xscom_read,
842     .write = pnv_phb4_xscom_write,
843     .valid.min_access_size = 8,
844     .valid.max_access_size = 8,
845     .impl.min_access_size = 8,
846     .impl.max_access_size = 8,
847     .endianness = DEVICE_BIG_ENDIAN,
848 };
849 
850 static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
851 {
852     /* Check that out properly ... */
853     return irq_num & 3;
854 }
855 
856 static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
857 {
858     PnvPHB4 *phb = PNV_PHB4(opaque);
859     uint32_t lsi_base;
860 
861     /* LSI only ... */
862     if (irq_num > 3) {
863         phb_error(phb, "IRQ %x is not an LSI", irq_num);
864     }
865     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
866     lsi_base <<= 3;
867     qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
868 }
869 
870 static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
871 {
872     uint64_t rtt, addr;
873     uint16_t rte;
874     int bus_num;
875     int num_PEs;
876 
877     /* Already resolved ? */
878     if (ds->pe_num != PHB_INVALID_PE) {
879         return true;
880     }
881 
882     /* We need to lookup the RTT */
883     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
884     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
885         phb_error(ds->phb, "DMA with RTT BAR disabled !");
886         /* Set error bits ? fence ? ... */
887         return false;
888     }
889 
890     /* Read RTE */
891     bus_num = pci_bus_num(ds->bus);
892     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
893     addr += 2 * PCI_BUILD_BDF(bus_num, ds->devfn);
894     if (dma_memory_read(&address_space_memory, addr, &rte, sizeof(rte))) {
895         phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
896         /* Set error bits ? fence ? ... */
897         return false;
898     }
899     rte = be16_to_cpu(rte);
900 
901     /* Fail upon reading of invalid PE# */
902     num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
903     if (rte >= num_PEs) {
904         phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
905         rte &= num_PEs - 1;
906     }
907     ds->pe_num = rte;
908     return true;
909 }
910 
911 static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
912                                    bool is_write, uint64_t tve,
913                                    IOMMUTLBEntry *tlb)
914 {
915     uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
916     int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
917     uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
918     uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
919 
920     /* Invalid levels */
921     if (lev > 4) {
922         phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
923         return;
924     }
925 
926     /* Invalid entry */
927     if (tts == 0) {
928         phb_error(ds->phb, "Access to invalid TVE");
929         return;
930     }
931 
932     /* IO Page Size of 0 means untranslated, else use TCEs */
933     if (tps == 0) {
934         /* TODO: Handle boundaries */
935 
936         /* Use 4k pages like q35 ... for now */
937         tlb->iova = addr & 0xfffffffffffff000ull;
938         tlb->translated_addr = addr & 0x0003fffffffff000ull;
939         tlb->addr_mask = 0xfffull;
940         tlb->perm = IOMMU_RW;
941     } else {
942         uint32_t tce_shift, tbl_shift, sh;
943         uint64_t base, taddr, tce, tce_mask;
944 
945         /* Address bits per bottom level TCE entry */
946         tce_shift = tps + 11;
947 
948         /* Address bits per table level */
949         tbl_shift = tts + 8;
950 
951         /* Top level table base address */
952         base = tta << 12;
953 
954         /* Total shift to first level */
955         sh = tbl_shift * lev + tce_shift;
956 
957         /* TODO: Limit to support IO page sizes */
958 
959         /* TODO: Multi-level untested */
960         while ((lev--) >= 0) {
961             /* Grab the TCE address */
962             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
963             if (dma_memory_read(&address_space_memory, taddr, &tce,
964                                 sizeof(tce))) {
965                 phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
966                 return;
967             }
968             tce = be64_to_cpu(tce);
969 
970             /* Check permission for indirect TCE */
971             if ((lev >= 0) && !(tce & 3)) {
972                 phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
973                 phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
974                            is_write ? 'W' : 'R', tve);
975                 phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
976                            tta, lev, tts, tps);
977                 return;
978             }
979             sh -= tbl_shift;
980             base = tce & ~0xfffull;
981         }
982 
983         /* We exit the loop with TCE being the final TCE */
984         tce_mask = ~((1ull << tce_shift) - 1);
985         tlb->iova = addr & tce_mask;
986         tlb->translated_addr = tce & tce_mask;
987         tlb->addr_mask = ~tce_mask;
988         tlb->perm = tce & 3;
989         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
990             phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
991             phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
992                        is_write ? 'W' : 'R', tve);
993             phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
994                        tta, lev, tts, tps);
995         }
996     }
997 }
998 
999 static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
1000                                               hwaddr addr,
1001                                               IOMMUAccessFlags flag,
1002                                               int iommu_idx)
1003 {
1004     PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
1005     int tve_sel;
1006     uint64_t tve, cfg;
1007     IOMMUTLBEntry ret = {
1008         .target_as = &address_space_memory,
1009         .iova = addr,
1010         .translated_addr = 0,
1011         .addr_mask = ~(hwaddr)0,
1012         .perm = IOMMU_NONE,
1013     };
1014 
1015     /* Resolve PE# */
1016     if (!pnv_phb4_resolve_pe(ds)) {
1017         phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1018                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1019         return ret;
1020     }
1021 
1022     /* Check top bits */
1023     switch (addr >> 60) {
1024     case 00:
1025         /* DMA or 32-bit MSI ? */
1026         cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
1027         if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
1028             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
1029             phb_error(ds->phb, "xlate on 32-bit MSI region");
1030             return ret;
1031         }
1032         /* Choose TVE XXX Use PHB4 Control Register */
1033         tve_sel = (addr >> 59) & 1;
1034         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
1035         pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
1036         break;
1037     case 01:
1038         phb_error(ds->phb, "xlate on 64-bit MSI region");
1039         break;
1040     default:
1041         phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
1042     }
1043     return ret;
1044 }
1045 
1046 #define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
1047 DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, PNV_PHB4_IOMMU_MEMORY_REGION,
1048                          TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
1049 
1050 static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
1051                                                     void *data)
1052 {
1053     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1054 
1055     imrc->translate = pnv_phb4_translate_iommu;
1056 }
1057 
1058 static const TypeInfo pnv_phb4_iommu_memory_region_info = {
1059     .parent = TYPE_IOMMU_MEMORY_REGION,
1060     .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1061     .class_init = pnv_phb4_iommu_memory_region_class_init,
1062 };
1063 
1064 /*
1065  * MSI/MSIX memory region implementation.
1066  * The handler handles both MSI and MSIX.
1067  */
1068 static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
1069                                uint64_t data, unsigned size)
1070 {
1071     PnvPhb4DMASpace *ds = opaque;
1072     PnvPHB4 *phb = ds->phb;
1073 
1074     uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
1075 
1076     /* Resolve PE# */
1077     if (!pnv_phb4_resolve_pe(ds)) {
1078         phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1079                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1080         return;
1081     }
1082 
1083     /* TODO: Check it doesn't collide with LSIs */
1084     if (src >= phb->xsrc.nr_irqs) {
1085         phb_error(phb, "MSI %d out of bounds", src);
1086         return;
1087     }
1088 
1089     /* TODO: check PE/MSI assignement */
1090 
1091     qemu_irq_pulse(phb->qirqs[src]);
1092 }
1093 
1094 /* There is no .read as the read result is undefined by PCI spec */
1095 static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
1096 {
1097     PnvPhb4DMASpace *ds = opaque;
1098 
1099     phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
1100     return -1;
1101 }
1102 
1103 static const MemoryRegionOps pnv_phb4_msi_ops = {
1104     .read = pnv_phb4_msi_read,
1105     .write = pnv_phb4_msi_write,
1106     .endianness = DEVICE_LITTLE_ENDIAN
1107 };
1108 
1109 static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
1110 {
1111     PnvPhb4DMASpace *ds;
1112 
1113     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
1114         if (ds->bus == bus && ds->devfn == devfn) {
1115             break;
1116         }
1117     }
1118     return ds;
1119 }
1120 
1121 static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1122 {
1123     PnvPHB4 *phb = opaque;
1124     PnvPhb4DMASpace *ds;
1125     char name[32];
1126 
1127     ds = pnv_phb4_dma_find(phb, bus, devfn);
1128 
1129     if (ds == NULL) {
1130         ds = g_malloc0(sizeof(PnvPhb4DMASpace));
1131         ds->bus = bus;
1132         ds->devfn = devfn;
1133         ds->pe_num = PHB_INVALID_PE;
1134         ds->phb = phb;
1135         snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
1136                  phb->phb_id);
1137         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
1138                                  TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1139                                  OBJECT(phb), name, UINT64_MAX);
1140         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
1141                            name);
1142         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1143                               ds, "msi32", 0x10000);
1144         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1145                               ds, "msi64", 0x100000);
1146         pnv_phb4_update_msi_regions(ds);
1147 
1148         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
1149     }
1150     return &ds->dma_as;
1151 }
1152 
1153 static void pnv_phb4_instance_init(Object *obj)
1154 {
1155     PnvPHB4 *phb = PNV_PHB4(obj);
1156 
1157     QLIST_INIT(&phb->dma_spaces);
1158 
1159     /* XIVE interrupt source object */
1160     object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE);
1161 
1162     /* Root Port */
1163     object_initialize_child(obj, "root", &phb->root, TYPE_PNV_PHB4_ROOT_PORT);
1164 
1165     qdev_prop_set_int32(DEVICE(&phb->root), "addr", PCI_DEVFN(0, 0));
1166     qdev_prop_set_bit(DEVICE(&phb->root), "multifunction", false);
1167 }
1168 
1169 static void pnv_phb4_realize(DeviceState *dev, Error **errp)
1170 {
1171     PnvPHB4 *phb = PNV_PHB4(dev);
1172     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
1173     XiveSource *xsrc = &phb->xsrc;
1174     int nr_irqs;
1175     char name[32];
1176 
1177     assert(phb->stack);
1178 
1179     /* Set the "big_phb" flag */
1180     phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
1181 
1182     /* Controller Registers */
1183     snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
1184              phb->phb_id);
1185     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
1186                           name, 0x2000);
1187 
1188     /*
1189      * PHB4 doesn't support IO space. However, qemu gets very upset if
1190      * we don't have an IO region to anchor IO BARs onto so we just
1191      * initialize one which we never hook up to anything
1192      */
1193 
1194     snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
1195              phb->phb_id);
1196     memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
1197 
1198     snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
1199              phb->phb_id);
1200     memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
1201                        PCI_MMIO_TOTAL_SIZE);
1202 
1203     pci->bus = pci_register_root_bus(dev, "root-bus",
1204                                      pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
1205                                      &phb->pci_mmio, &phb->pci_io,
1206                                      0, 4, TYPE_PNV_PHB4_ROOT_BUS);
1207     pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
1208 
1209     /* Add a single Root port */
1210     qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", phb->chip_id);
1211     qdev_prop_set_uint16(DEVICE(&phb->root), "slot", phb->phb_id);
1212     qdev_realize(DEVICE(&phb->root), BUS(pci->bus), &error_fatal);
1213 
1214     /* Setup XIVE Source */
1215     if (phb->big_phb) {
1216         nr_irqs = PNV_PHB4_MAX_INTs;
1217     } else {
1218         nr_irqs = PNV_PHB4_MAX_INTs >> 1;
1219     }
1220     object_property_set_int(OBJECT(xsrc), "nr-irqs", nr_irqs, &error_fatal);
1221     object_property_set_link(OBJECT(xsrc), "xive", OBJECT(phb), &error_fatal);
1222     if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
1223         return;
1224     }
1225 
1226     pnv_phb4_update_xsrc(phb);
1227 
1228     phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
1229 }
1230 
1231 static void pnv_phb4_reset(DeviceState *dev)
1232 {
1233     PnvPHB4 *phb = PNV_PHB4(dev);
1234     PCIDevice *root_dev = PCI_DEVICE(&phb->root);
1235 
1236     /*
1237      * Configure PCI device id at reset using a property.
1238      */
1239     pci_config_set_vendor_id(root_dev->config, PCI_VENDOR_ID_IBM);
1240     pci_config_set_device_id(root_dev->config, phb->device_id);
1241 }
1242 
1243 static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
1244                                           PCIBus *rootbus)
1245 {
1246     PnvPHB4 *phb = PNV_PHB4(host_bridge);
1247 
1248     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1249              phb->chip_id, phb->phb_id);
1250     return phb->bus_path;
1251 }
1252 
1253 static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno)
1254 {
1255     PnvPHB4 *phb = PNV_PHB4(xf);
1256     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1257     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1258     uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
1259     MemTxResult result;
1260 
1261     trace_pnv_phb4_xive_notify(notif_port, data);
1262 
1263     address_space_stq_be(&address_space_memory, notif_port, data,
1264                          MEMTXATTRS_UNSPECIFIED, &result);
1265     if (result != MEMTX_OK) {
1266         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
1267         return;
1268     }
1269 }
1270 
1271 static Property pnv_phb4_properties[] = {
1272         DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
1273         DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
1274         DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
1275         DEFINE_PROP_UINT16("device-id", PnvPHB4, device_id, 0),
1276         DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
1277                          PnvPhb4PecStack *),
1278         DEFINE_PROP_END_OF_LIST(),
1279 };
1280 
1281 static void pnv_phb4_class_init(ObjectClass *klass, void *data)
1282 {
1283     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1284     DeviceClass *dc = DEVICE_CLASS(klass);
1285     XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
1286 
1287     hc->root_bus_path   = pnv_phb4_root_bus_path;
1288     dc->realize         = pnv_phb4_realize;
1289     device_class_set_props(dc, pnv_phb4_properties);
1290     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1291     dc->user_creatable  = false;
1292     dc->reset           = pnv_phb4_reset;
1293 
1294     xfc->notify         = pnv_phb4_xive_notify;
1295 }
1296 
1297 static const TypeInfo pnv_phb4_type_info = {
1298     .name          = TYPE_PNV_PHB4,
1299     .parent        = TYPE_PCIE_HOST_BRIDGE,
1300     .instance_init = pnv_phb4_instance_init,
1301     .instance_size = sizeof(PnvPHB4),
1302     .class_init    = pnv_phb4_class_init,
1303     .interfaces = (InterfaceInfo[]) {
1304             { TYPE_XIVE_NOTIFIER },
1305             { },
1306     }
1307 };
1308 
1309 static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
1310 {
1311     BusClass *k = BUS_CLASS(klass);
1312 
1313     /*
1314      * PHB4 has only a single root complex. Enforce the limit on the
1315      * parent bus
1316      */
1317     k->max_dev = 1;
1318 }
1319 
1320 static const TypeInfo pnv_phb4_root_bus_info = {
1321     .name = TYPE_PNV_PHB4_ROOT_BUS,
1322     .parent = TYPE_PCIE_BUS,
1323     .class_init = pnv_phb4_root_bus_class_init,
1324     .interfaces = (InterfaceInfo[]) {
1325         { INTERFACE_PCIE_DEVICE },
1326         { }
1327     },
1328 };
1329 
1330 static void pnv_phb4_root_port_reset(DeviceState *dev)
1331 {
1332     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1333     PCIDevice *d = PCI_DEVICE(dev);
1334     uint8_t *conf = d->config;
1335 
1336     rpc->parent_reset(dev);
1337 
1338     pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
1339                                PCI_IO_RANGE_MASK & 0xff);
1340     pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
1341                                  PCI_IO_RANGE_MASK & 0xff);
1342     pci_set_word(conf + PCI_MEMORY_BASE, 0);
1343     pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
1344     pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
1345     pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
1346     pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
1347     pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
1348 }
1349 
1350 static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
1351 {
1352     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1353     Error *local_err = NULL;
1354 
1355     rpc->parent_realize(dev, &local_err);
1356     if (local_err) {
1357         error_propagate(errp, local_err);
1358         return;
1359     }
1360 }
1361 
1362 static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
1363 {
1364     DeviceClass *dc = DEVICE_CLASS(klass);
1365     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1366     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1367 
1368     dc->desc     = "IBM PHB4 PCIE Root Port";
1369     dc->user_creatable = false;
1370 
1371     device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
1372                                     &rpc->parent_realize);
1373     device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
1374                                   &rpc->parent_reset);
1375 
1376     k->vendor_id = PCI_VENDOR_ID_IBM;
1377     k->device_id = PNV_PHB4_DEVICE_ID;
1378     k->revision  = 0;
1379 
1380     rpc->exp_offset = 0x48;
1381     rpc->aer_offset = 0x100;
1382 
1383     dc->reset = &pnv_phb4_root_port_reset;
1384 }
1385 
1386 static const TypeInfo pnv_phb4_root_port_info = {
1387     .name          = TYPE_PNV_PHB4_ROOT_PORT,
1388     .parent        = TYPE_PCIE_ROOT_PORT,
1389     .instance_size = sizeof(PnvPHB4RootPort),
1390     .class_init    = pnv_phb4_root_port_class_init,
1391 };
1392 
1393 static void pnv_phb4_register_types(void)
1394 {
1395     type_register_static(&pnv_phb4_root_bus_info);
1396     type_register_static(&pnv_phb4_root_port_info);
1397     type_register_static(&pnv_phb4_type_info);
1398     type_register_static(&pnv_phb4_iommu_memory_region_info);
1399 }
1400 
1401 type_init(pnv_phb4_register_types);
1402 
1403 void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
1404 {
1405     PnvPHB4 *phb = &stack->phb;
1406 
1407     /* Unmap first always */
1408     if (memory_region_is_mapped(&phb->mr_regs)) {
1409         memory_region_del_subregion(&stack->phbbar, &phb->mr_regs);
1410     }
1411     if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
1412         memory_region_del_subregion(&stack->intbar, &phb->xsrc.esb_mmio);
1413     }
1414 
1415     /* Map registers if enabled */
1416     if (memory_region_is_mapped(&stack->phbbar)) {
1417         memory_region_add_subregion(&stack->phbbar, 0, &phb->mr_regs);
1418     }
1419 
1420     /* Map ESB if enabled */
1421     if (memory_region_is_mapped(&stack->intbar)) {
1422         memory_region_add_subregion(&stack->intbar, 0, &phb->xsrc.esb_mmio);
1423     }
1424 
1425     /* Check/update m32 */
1426     pnv_phb4_check_all_mbt(phb);
1427 }
1428 
1429 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
1430 {
1431     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1432 
1433     monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x\n",
1434                    phb->chip_id, phb->phb_id,
1435                    offset, offset + phb->xsrc.nr_irqs - 1);
1436     xive_source_pic_print_info(&phb->xsrc, 0, mon);
1437 }
1438