xref: /qemu/hw/pci-host/pnv_phb4.c (revision d45c8332)
1 /*
2  * QEMU PowerPC PowerNV (POWER9) PHB4 model
3  *
4  * Copyright (c) 2018-2020, IBM Corporation.
5  *
6  * This code is licensed under the GPL version 2 or later. See the
7  * COPYING file in the top-level directory.
8  */
9 #include "qemu/osdep.h"
10 #include "qemu/log.h"
11 #include "qapi/visitor.h"
12 #include "qapi/error.h"
13 #include "monitor/monitor.h"
14 #include "target/ppc/cpu.h"
15 #include "hw/pci-host/pnv_phb4_regs.h"
16 #include "hw/pci-host/pnv_phb4.h"
17 #include "hw/pci/pcie_host.h"
18 #include "hw/pci/pcie_port.h"
19 #include "hw/ppc/pnv.h"
20 #include "hw/ppc/pnv_xscom.h"
21 #include "hw/irq.h"
22 #include "hw/qdev-properties.h"
23 #include "qom/object.h"
24 #include "trace.h"
25 
26 #define phb_error(phb, fmt, ...)                                        \
27     qemu_log_mask(LOG_GUEST_ERROR, "phb4[%d:%d]: " fmt "\n",            \
28                   (phb)->chip_id, (phb)->phb_id, ## __VA_ARGS__)
29 
30 #define phb_pec_error(pec, fmt, ...)                                    \
31     qemu_log_mask(LOG_GUEST_ERROR, "phb4_pec[%d:%d]: " fmt "\n",        \
32                   (pec)->chip_id, (pec)->index, ## __VA_ARGS__)
33 
34 /*
35  * QEMU version of the GETFIELD/SETFIELD macros
36  *
37  * These are common with the PnvXive model.
38  */
39 static inline uint64_t GETFIELD(uint64_t mask, uint64_t word)
40 {
41     return (word & mask) >> ctz64(mask);
42 }
43 
44 static inline uint64_t SETFIELD(uint64_t mask, uint64_t word,
45                                 uint64_t value)
46 {
47     return (word & ~mask) | ((value << ctz64(mask)) & mask);
48 }
49 
50 static PCIDevice *pnv_phb4_find_cfg_dev(PnvPHB4 *phb)
51 {
52     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
53     uint64_t addr = phb->regs[PHB_CONFIG_ADDRESS >> 3];
54     uint8_t bus, devfn;
55 
56     if (!(addr >> 63)) {
57         return NULL;
58     }
59     bus = (addr >> 52) & 0xff;
60     devfn = (addr >> 44) & 0xff;
61 
62     /* We don't access the root complex this way */
63     if (bus == 0 && devfn == 0) {
64         return NULL;
65     }
66     return pci_find_device(pci->bus, bus, devfn);
67 }
68 
69 /*
70  * The CONFIG_DATA register expects little endian accesses, but as the
71  * region is big endian, we have to swap the value.
72  */
73 static void pnv_phb4_config_write(PnvPHB4 *phb, unsigned off,
74                                   unsigned size, uint64_t val)
75 {
76     uint32_t cfg_addr, limit;
77     PCIDevice *pdev;
78 
79     pdev = pnv_phb4_find_cfg_dev(phb);
80     if (!pdev) {
81         return;
82     }
83     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
84     cfg_addr |= off;
85     limit = pci_config_size(pdev);
86     if (limit <= cfg_addr) {
87         /*
88          * conventional pci device can be behind pcie-to-pci bridge.
89          * 256 <= addr < 4K has no effects.
90          */
91         return;
92     }
93     switch (size) {
94     case 1:
95         break;
96     case 2:
97         val = bswap16(val);
98         break;
99     case 4:
100         val = bswap32(val);
101         break;
102     default:
103         g_assert_not_reached();
104     }
105     pci_host_config_write_common(pdev, cfg_addr, limit, val, size);
106 }
107 
108 static uint64_t pnv_phb4_config_read(PnvPHB4 *phb, unsigned off,
109                                      unsigned size)
110 {
111     uint32_t cfg_addr, limit;
112     PCIDevice *pdev;
113     uint64_t val;
114 
115     pdev = pnv_phb4_find_cfg_dev(phb);
116     if (!pdev) {
117         return ~0ull;
118     }
119     cfg_addr = (phb->regs[PHB_CONFIG_ADDRESS >> 3] >> 32) & 0xffc;
120     cfg_addr |= off;
121     limit = pci_config_size(pdev);
122     if (limit <= cfg_addr) {
123         /*
124          * conventional pci device can be behind pcie-to-pci bridge.
125          * 256 <= addr < 4K has no effects.
126          */
127         return ~0ull;
128     }
129     val = pci_host_config_read_common(pdev, cfg_addr, limit, size);
130     switch (size) {
131     case 1:
132         return val;
133     case 2:
134         return bswap16(val);
135     case 4:
136         return bswap32(val);
137     default:
138         g_assert_not_reached();
139     }
140 }
141 
142 /*
143  * Root complex register accesses are memory mapped.
144  */
145 static void pnv_phb4_rc_config_write(PnvPHB4 *phb, unsigned off,
146                                      unsigned size, uint64_t val)
147 {
148     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
149     PCIDevice *pdev;
150 
151     if (size != 4) {
152         phb_error(phb, "rc_config_write invalid size %d\n", size);
153         return;
154     }
155 
156     pdev = pci_find_device(pci->bus, 0, 0);
157     if (!pdev) {
158         phb_error(phb, "rc_config_write device not found\n");
159         return;
160     }
161 
162     pci_host_config_write_common(pdev, off, PHB_RC_CONFIG_SIZE,
163                                  bswap32(val), 4);
164 }
165 
166 static uint64_t pnv_phb4_rc_config_read(PnvPHB4 *phb, unsigned off,
167                                         unsigned size)
168 {
169     PCIHostState *pci = PCI_HOST_BRIDGE(phb);
170     PCIDevice *pdev;
171     uint64_t val;
172 
173     if (size != 4) {
174         phb_error(phb, "rc_config_read invalid size %d\n", size);
175         return ~0ull;
176     }
177 
178     pdev = pci_find_device(pci->bus, 0, 0);
179     if (!pdev) {
180         phb_error(phb, "rc_config_read device not found\n");
181         return ~0ull;
182     }
183 
184     val = pci_host_config_read_common(pdev, off, PHB_RC_CONFIG_SIZE, 4);
185     return bswap32(val);
186 }
187 
188 static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t index)
189 {
190     uint64_t base, start, size, mbe0, mbe1;
191     MemoryRegion *parent;
192     char name[64];
193 
194     /* Unmap first */
195     if (memory_region_is_mapped(&phb->mr_mmio[index])) {
196         /* Should we destroy it in RCU friendly way... ? */
197         memory_region_del_subregion(phb->mr_mmio[index].container,
198                                     &phb->mr_mmio[index]);
199     }
200 
201     /* Get table entry */
202     mbe0 = phb->ioda_MBT[(index << 1)];
203     mbe1 = phb->ioda_MBT[(index << 1) + 1];
204 
205     if (!(mbe0 & IODA3_MBT0_ENABLE)) {
206         return;
207     }
208 
209     /* Grab geometry from registers */
210     base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbe0) << 12;
211     size = GETFIELD(IODA3_MBT1_MASK, mbe1) << 12;
212     size |= 0xff00000000000000ull;
213     size = ~size + 1;
214 
215     /* Calculate PCI side start address based on M32/M64 window type */
216     if (mbe0 & IODA3_MBT0_TYPE_M32) {
217         start = phb->regs[PHB_M32_START_ADDR >> 3];
218         if ((start + size) > 0x100000000ull) {
219             phb_error(phb, "M32 set beyond 4GB boundary !");
220             size = 0x100000000 - start;
221         }
222     } else {
223         start = base | (phb->regs[PHB_M64_UPPER_BITS >> 3]);
224     }
225 
226     /* TODO: Figure out how to implemet/decode AOMASK */
227 
228     /* Check if it matches an enabled MMIO region in the PEC stack */
229     if (memory_region_is_mapped(&phb->mmbar0) &&
230         base >= phb->mmio0_base &&
231         (base + size) <= (phb->mmio0_base + phb->mmio0_size)) {
232         parent = &phb->mmbar0;
233         base -= phb->mmio0_base;
234     } else if (memory_region_is_mapped(&phb->mmbar1) &&
235         base >= phb->mmio1_base &&
236         (base + size) <= (phb->mmio1_base + phb->mmio1_size)) {
237         parent = &phb->mmbar1;
238         base -= phb->mmio1_base;
239     } else {
240         phb_error(phb, "PHB MBAR %d out of parent bounds", index);
241         return;
242     }
243 
244     /* Create alias (better name ?) */
245     snprintf(name, sizeof(name), "phb4-mbar%d", index);
246     memory_region_init_alias(&phb->mr_mmio[index], OBJECT(phb), name,
247                              &phb->pci_mmio, start, size);
248     memory_region_add_subregion(parent, base, &phb->mr_mmio[index]);
249 }
250 
251 static void pnv_phb4_check_all_mbt(PnvPHB4 *phb)
252 {
253     uint64_t i;
254     uint32_t num_windows = phb->big_phb ? PNV_PHB4_MAX_MMIO_WINDOWS :
255         PNV_PHB4_MIN_MMIO_WINDOWS;
256 
257     for (i = 0; i < num_windows; i++) {
258         pnv_phb4_check_mbt(phb, i);
259     }
260 }
261 
262 static uint64_t *pnv_phb4_ioda_access(PnvPHB4 *phb,
263                                       unsigned *out_table, unsigned *out_idx)
264 {
265     uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
266     unsigned int index = GETFIELD(PHB_IODA_AD_TADR, adreg);
267     unsigned int table = GETFIELD(PHB_IODA_AD_TSEL, adreg);
268     unsigned int mask;
269     uint64_t *tptr = NULL;
270 
271     switch (table) {
272     case IODA3_TBL_LIST:
273         tptr = phb->ioda_LIST;
274         mask = 7;
275         break;
276     case IODA3_TBL_MIST:
277         tptr = phb->ioda_MIST;
278         mask = phb->big_phb ? PNV_PHB4_MAX_MIST : (PNV_PHB4_MAX_MIST >> 1);
279         mask -= 1;
280         break;
281     case IODA3_TBL_RCAM:
282         mask = phb->big_phb ? 127 : 63;
283         break;
284     case IODA3_TBL_MRT:
285         mask = phb->big_phb ? 15 : 7;
286         break;
287     case IODA3_TBL_PESTA:
288     case IODA3_TBL_PESTB:
289         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
290         mask -= 1;
291         break;
292     case IODA3_TBL_TVT:
293         tptr = phb->ioda_TVT;
294         mask = phb->big_phb ? PNV_PHB4_MAX_TVEs : (PNV_PHB4_MAX_TVEs >> 1);
295         mask -= 1;
296         break;
297     case IODA3_TBL_TCR:
298     case IODA3_TBL_TDR:
299         mask = phb->big_phb ? 1023 : 511;
300         break;
301     case IODA3_TBL_MBT:
302         tptr = phb->ioda_MBT;
303         mask = phb->big_phb ? PNV_PHB4_MAX_MBEs : (PNV_PHB4_MAX_MBEs >> 1);
304         mask -= 1;
305         break;
306     case IODA3_TBL_MDT:
307         tptr = phb->ioda_MDT;
308         mask = phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
309         mask -= 1;
310         break;
311     case IODA3_TBL_PEEV:
312         tptr = phb->ioda_PEEV;
313         mask = phb->big_phb ? PNV_PHB4_MAX_PEEVs : (PNV_PHB4_MAX_PEEVs >> 1);
314         mask -= 1;
315         break;
316     default:
317         phb_error(phb, "invalid IODA table %d", table);
318         return NULL;
319     }
320     index &= mask;
321     if (out_idx) {
322         *out_idx = index;
323     }
324     if (out_table) {
325         *out_table = table;
326     }
327     if (tptr) {
328         tptr += index;
329     }
330     if (adreg & PHB_IODA_AD_AUTOINC) {
331         index = (index + 1) & mask;
332         adreg = SETFIELD(PHB_IODA_AD_TADR, adreg, index);
333     }
334 
335     phb->regs[PHB_IODA_ADDR >> 3] = adreg;
336     return tptr;
337 }
338 
339 static uint64_t pnv_phb4_ioda_read(PnvPHB4 *phb)
340 {
341     unsigned table, idx;
342     uint64_t *tptr;
343 
344     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
345     if (!tptr) {
346         /* Special PESTA case */
347         if (table == IODA3_TBL_PESTA) {
348             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 1)) << 63;
349         } else if (table == IODA3_TBL_PESTB) {
350             return ((uint64_t)(phb->ioda_PEST_AB[idx] & 2)) << 62;
351         }
352         /* Return 0 on unsupported tables, not ff's */
353         return 0;
354     }
355     return *tptr;
356 }
357 
358 static void pnv_phb4_ioda_write(PnvPHB4 *phb, uint64_t val)
359 {
360     unsigned table, idx;
361     uint64_t *tptr;
362 
363     tptr = pnv_phb4_ioda_access(phb, &table, &idx);
364     if (!tptr) {
365         /* Special PESTA case */
366         if (table == IODA3_TBL_PESTA) {
367             phb->ioda_PEST_AB[idx] &= ~1;
368             phb->ioda_PEST_AB[idx] |= (val >> 63) & 1;
369         } else if (table == IODA3_TBL_PESTB) {
370             phb->ioda_PEST_AB[idx] &= ~2;
371             phb->ioda_PEST_AB[idx] |= (val >> 62) & 2;
372         }
373         return;
374     }
375 
376     /* Handle side effects */
377     switch (table) {
378     case IODA3_TBL_LIST:
379         break;
380     case IODA3_TBL_MIST: {
381         /* Special mask for MIST partial write */
382         uint64_t adreg = phb->regs[PHB_IODA_ADDR >> 3];
383         uint32_t mmask = GETFIELD(PHB_IODA_AD_MIST_PWV, adreg);
384         uint64_t v = *tptr;
385         if (mmask == 0) {
386             mmask = 0xf;
387         }
388         if (mmask & 8) {
389             v &= 0x0000ffffffffffffull;
390             v |= 0xcfff000000000000ull & val;
391         }
392         if (mmask & 4) {
393             v &= 0xffff0000ffffffffull;
394             v |= 0x0000cfff00000000ull & val;
395         }
396         if (mmask & 2) {
397             v &= 0xffffffff0000ffffull;
398             v |= 0x00000000cfff0000ull & val;
399         }
400         if (mmask & 1) {
401             v &= 0xffffffffffff0000ull;
402             v |= 0x000000000000cfffull & val;
403         }
404         *tptr = v;
405         break;
406     }
407     case IODA3_TBL_MBT:
408         *tptr = val;
409 
410         /* Copy accross the valid bit to the other half */
411         phb->ioda_MBT[idx ^ 1] &= 0x7fffffffffffffffull;
412         phb->ioda_MBT[idx ^ 1] |= 0x8000000000000000ull & val;
413 
414         /* Update mappings */
415         pnv_phb4_check_mbt(phb, idx >> 1);
416         break;
417     default:
418         *tptr = val;
419     }
420 }
421 
422 static void pnv_phb4_rtc_invalidate(PnvPHB4 *phb, uint64_t val)
423 {
424     PnvPhb4DMASpace *ds;
425 
426     /* Always invalidate all for now ... */
427     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
428         ds->pe_num = PHB_INVALID_PE;
429     }
430 }
431 
432 static void pnv_phb4_update_msi_regions(PnvPhb4DMASpace *ds)
433 {
434     uint64_t cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
435 
436     if (cfg & PHB_PHB4C_32BIT_MSI_EN) {
437         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
438             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
439                                         0xffff0000, &ds->msi32_mr);
440         }
441     } else {
442         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi32_mr))) {
443             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
444                                         &ds->msi32_mr);
445         }
446     }
447 
448     if (cfg & PHB_PHB4C_64BIT_MSI_EN) {
449         if (!memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
450             memory_region_add_subregion(MEMORY_REGION(&ds->dma_mr),
451                                         (1ull << 60), &ds->msi64_mr);
452         }
453     } else {
454         if (memory_region_is_mapped(MEMORY_REGION(&ds->msi64_mr))) {
455             memory_region_del_subregion(MEMORY_REGION(&ds->dma_mr),
456                                         &ds->msi64_mr);
457         }
458     }
459 }
460 
461 static void pnv_phb4_update_all_msi_regions(PnvPHB4 *phb)
462 {
463     PnvPhb4DMASpace *ds;
464 
465     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
466         pnv_phb4_update_msi_regions(ds);
467     }
468 }
469 
470 static void pnv_phb4_update_xsrc(PnvPHB4 *phb)
471 {
472     int shift, flags, i, lsi_base;
473     XiveSource *xsrc = &phb->xsrc;
474 
475     /* The XIVE source characteristics can be set at run time */
476     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PGSZ_64K) {
477         shift = XIVE_ESB_64K;
478     } else {
479         shift = XIVE_ESB_4K;
480     }
481     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_STORE_EOI) {
482         flags = XIVE_SRC_STORE_EOI;
483     } else {
484         flags = 0;
485     }
486 
487     /*
488      * When the PQ disable configuration bit is set, the check on the
489      * PQ state bits is disabled on the PHB side (for MSI only) and it
490      * is performed on the IC side instead.
491      */
492     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_PQ_DISABLE) {
493         flags |= XIVE_SRC_PQ_DISABLE;
494     }
495 
496     phb->xsrc.esb_shift = shift;
497     phb->xsrc.esb_flags = flags;
498 
499     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
500     lsi_base <<= 3;
501 
502     /* TODO: handle reset values of PHB_LSI_SRC_ID */
503     if (!lsi_base) {
504         return;
505     }
506 
507     /* TODO: need a xive_source_irq_reset_lsi() */
508     bitmap_zero(xsrc->lsi_map, xsrc->nr_irqs);
509 
510     for (i = 0; i < xsrc->nr_irqs; i++) {
511         bool msi = (i < lsi_base || i >= (lsi_base + 8));
512         if (!msi) {
513             xive_source_irq_set_lsi(xsrc, i);
514         }
515     }
516 }
517 
518 static void pnv_phb4_reg_write(void *opaque, hwaddr off, uint64_t val,
519                                unsigned size)
520 {
521     PnvPHB4 *phb = PNV_PHB4(opaque);
522     bool changed;
523 
524     /* Special case outbound configuration data */
525     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
526         pnv_phb4_config_write(phb, off & 0x3, size, val);
527         return;
528     }
529 
530     /* Special case RC configuration space */
531     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
532         pnv_phb4_rc_config_write(phb, off & 0x7ff, size, val);
533         return;
534     }
535 
536     /* Other registers are 64-bit only */
537     if (size != 8 || off & 0x7) {
538         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
539                    off, size);
540         return;
541     }
542 
543     /* Handle masking */
544     switch (off) {
545     case PHB_LSI_SOURCE_ID:
546         val &= PHB_LSI_SRC_ID;
547         break;
548     case PHB_M64_UPPER_BITS:
549         val &= 0xff00000000000000ull;
550         break;
551     /* TCE Kill */
552     case PHB_TCE_KILL:
553         /* Clear top 3 bits which HW does to indicate successful queuing */
554         val &= ~(PHB_TCE_KILL_ALL | PHB_TCE_KILL_PE | PHB_TCE_KILL_ONE);
555         break;
556     case PHB_Q_DMA_R:
557         /*
558          * This is enough logic to make SW happy but we aren't
559          * actually quiescing the DMAs
560          */
561         if (val & PHB_Q_DMA_R_AUTORESET) {
562             val = 0;
563         } else {
564             val &= PHB_Q_DMA_R_QUIESCE_DMA;
565         }
566         break;
567     /* LEM stuff */
568     case PHB_LEM_FIR_AND_MASK:
569         phb->regs[PHB_LEM_FIR_ACCUM >> 3] &= val;
570         return;
571     case PHB_LEM_FIR_OR_MASK:
572         phb->regs[PHB_LEM_FIR_ACCUM >> 3] |= val;
573         return;
574     case PHB_LEM_ERROR_AND_MASK:
575         phb->regs[PHB_LEM_ERROR_MASK >> 3] &= val;
576         return;
577     case PHB_LEM_ERROR_OR_MASK:
578         phb->regs[PHB_LEM_ERROR_MASK >> 3] |= val;
579         return;
580     case PHB_LEM_WOF:
581         val = 0;
582         break;
583     /* TODO: More regs ..., maybe create a table with masks... */
584 
585     /* Read only registers */
586     case PHB_CPU_LOADSTORE_STATUS:
587     case PHB_ETU_ERR_SUMMARY:
588     case PHB_PHB4_GEN_CAP:
589     case PHB_PHB4_TCE_CAP:
590     case PHB_PHB4_IRQ_CAP:
591     case PHB_PHB4_EEH_CAP:
592         return;
593     }
594 
595     /* Record whether it changed */
596     changed = phb->regs[off >> 3] != val;
597 
598     /* Store in register cache first */
599     phb->regs[off >> 3] = val;
600 
601     /* Handle side effects */
602     switch (off) {
603     case PHB_PHB4_CONFIG:
604         if (changed) {
605             pnv_phb4_update_all_msi_regions(phb);
606         }
607         break;
608     case PHB_M32_START_ADDR:
609     case PHB_M64_UPPER_BITS:
610         if (changed) {
611             pnv_phb4_check_all_mbt(phb);
612         }
613         break;
614 
615     /* IODA table accesses */
616     case PHB_IODA_DATA0:
617         pnv_phb4_ioda_write(phb, val);
618         break;
619 
620     /* RTC invalidation */
621     case PHB_RTC_INVALIDATE:
622         pnv_phb4_rtc_invalidate(phb, val);
623         break;
624 
625     /* PHB Control (Affects XIVE source) */
626     case PHB_CTRLR:
627     case PHB_LSI_SOURCE_ID:
628         pnv_phb4_update_xsrc(phb);
629         break;
630 
631     /* Silent simple writes */
632     case PHB_ASN_CMPM:
633     case PHB_CONFIG_ADDRESS:
634     case PHB_IODA_ADDR:
635     case PHB_TCE_KILL:
636     case PHB_TCE_SPEC_CTL:
637     case PHB_PEST_BAR:
638     case PHB_PELTV_BAR:
639     case PHB_RTT_BAR:
640     case PHB_LEM_FIR_ACCUM:
641     case PHB_LEM_ERROR_MASK:
642     case PHB_LEM_ACTION0:
643     case PHB_LEM_ACTION1:
644     case PHB_TCE_TAG_ENABLE:
645     case PHB_INT_NOTIFY_ADDR:
646     case PHB_INT_NOTIFY_INDEX:
647     case PHB_DMARD_SYNC:
648        break;
649 
650     /* Noise on anything else */
651     default:
652         qemu_log_mask(LOG_UNIMP, "phb4: reg_write 0x%"PRIx64"=%"PRIx64"\n",
653                       off, val);
654     }
655 }
656 
657 static uint64_t pnv_phb4_reg_read(void *opaque, hwaddr off, unsigned size)
658 {
659     PnvPHB4 *phb = PNV_PHB4(opaque);
660     uint64_t val;
661 
662     if ((off & 0xfffc) == PHB_CONFIG_DATA) {
663         return pnv_phb4_config_read(phb, off & 0x3, size);
664     }
665 
666     /* Special case RC configuration space */
667     if ((off & 0xf800) == PHB_RC_CONFIG_BASE) {
668         return pnv_phb4_rc_config_read(phb, off & 0x7ff, size);
669     }
670 
671     /* Other registers are 64-bit only */
672     if (size != 8 || off & 0x7) {
673         phb_error(phb, "Invalid register access, offset: 0x%"PRIx64" size: %d",
674                    off, size);
675         return ~0ull;
676     }
677 
678     /* Default read from cache */
679     val = phb->regs[off >> 3];
680 
681     switch (off) {
682     case PHB_VERSION:
683         return PNV_PHB4_PEC_GET_CLASS(phb->pec)->version;
684 
685         /* Read-only */
686     case PHB_PHB4_GEN_CAP:
687         return 0xe4b8000000000000ull;
688     case PHB_PHB4_TCE_CAP:
689         return phb->big_phb ? 0x4008440000000400ull : 0x2008440000000200ull;
690     case PHB_PHB4_IRQ_CAP:
691         return phb->big_phb ? 0x0800000000001000ull : 0x0800000000000800ull;
692     case PHB_PHB4_EEH_CAP:
693         return phb->big_phb ? 0x2000000000000000ull : 0x1000000000000000ull;
694 
695     /* IODA table accesses */
696     case PHB_IODA_DATA0:
697         return pnv_phb4_ioda_read(phb);
698 
699     /* Link training always appears trained */
700     case PHB_PCIE_DLP_TRAIN_CTL:
701         /* TODO: Do something sensible with speed ? */
702         return PHB_PCIE_DLP_INBAND_PRESENCE | PHB_PCIE_DLP_TL_LINKACT;
703 
704     /* DMA read sync: make it look like it's complete */
705     case PHB_DMARD_SYNC:
706         return PHB_DMARD_SYNC_COMPLETE;
707 
708     /* Silent simple reads */
709     case PHB_LSI_SOURCE_ID:
710     case PHB_CPU_LOADSTORE_STATUS:
711     case PHB_ASN_CMPM:
712     case PHB_PHB4_CONFIG:
713     case PHB_M32_START_ADDR:
714     case PHB_CONFIG_ADDRESS:
715     case PHB_IODA_ADDR:
716     case PHB_RTC_INVALIDATE:
717     case PHB_TCE_KILL:
718     case PHB_TCE_SPEC_CTL:
719     case PHB_PEST_BAR:
720     case PHB_PELTV_BAR:
721     case PHB_RTT_BAR:
722     case PHB_M64_UPPER_BITS:
723     case PHB_CTRLR:
724     case PHB_LEM_FIR_ACCUM:
725     case PHB_LEM_ERROR_MASK:
726     case PHB_LEM_ACTION0:
727     case PHB_LEM_ACTION1:
728     case PHB_TCE_TAG_ENABLE:
729     case PHB_INT_NOTIFY_ADDR:
730     case PHB_INT_NOTIFY_INDEX:
731     case PHB_Q_DMA_R:
732     case PHB_ETU_ERR_SUMMARY:
733         break;
734 
735     /* Noise on anything else */
736     default:
737         qemu_log_mask(LOG_UNIMP, "phb4: reg_read 0x%"PRIx64"=%"PRIx64"\n",
738                       off, val);
739     }
740     return val;
741 }
742 
743 static const MemoryRegionOps pnv_phb4_reg_ops = {
744     .read = pnv_phb4_reg_read,
745     .write = pnv_phb4_reg_write,
746     .valid.min_access_size = 1,
747     .valid.max_access_size = 8,
748     .impl.min_access_size = 1,
749     .impl.max_access_size = 8,
750     .endianness = DEVICE_BIG_ENDIAN,
751 };
752 
753 static uint64_t pnv_phb4_xscom_read(void *opaque, hwaddr addr, unsigned size)
754 {
755     PnvPHB4 *phb = PNV_PHB4(opaque);
756     uint32_t reg = addr >> 3;
757     uint64_t val;
758     hwaddr offset;
759 
760     switch (reg) {
761     case PHB_SCOM_HV_IND_ADDR:
762         return phb->scom_hv_ind_addr_reg;
763 
764     case PHB_SCOM_HV_IND_DATA:
765         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
766             phb_error(phb, "Invalid indirect address");
767             return ~0ull;
768         }
769         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
770         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
771         val = pnv_phb4_reg_read(phb, offset, size);
772         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
773             offset += size;
774             offset &= 0x3fff;
775             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
776                                                  phb->scom_hv_ind_addr_reg,
777                                                  offset);
778         }
779         return val;
780     case PHB_SCOM_ETU_LEM_FIR:
781     case PHB_SCOM_ETU_LEM_FIR_AND:
782     case PHB_SCOM_ETU_LEM_FIR_OR:
783     case PHB_SCOM_ETU_LEM_FIR_MSK:
784     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
785     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
786     case PHB_SCOM_ETU_LEM_ACT0:
787     case PHB_SCOM_ETU_LEM_ACT1:
788     case PHB_SCOM_ETU_LEM_WOF:
789         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
790         return pnv_phb4_reg_read(phb, offset, size);
791     case PHB_SCOM_ETU_PMON_CONFIG:
792     case PHB_SCOM_ETU_PMON_CTR0:
793     case PHB_SCOM_ETU_PMON_CTR1:
794     case PHB_SCOM_ETU_PMON_CTR2:
795     case PHB_SCOM_ETU_PMON_CTR3:
796         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
797         return pnv_phb4_reg_read(phb, offset, size);
798 
799     default:
800         qemu_log_mask(LOG_UNIMP, "phb4: xscom_read 0x%"HWADDR_PRIx"\n", addr);
801         return ~0ull;
802     }
803 }
804 
805 static void pnv_phb4_xscom_write(void *opaque, hwaddr addr,
806                                  uint64_t val, unsigned size)
807 {
808     PnvPHB4 *phb = PNV_PHB4(opaque);
809     uint32_t reg = addr >> 3;
810     hwaddr offset;
811 
812     switch (reg) {
813     case PHB_SCOM_HV_IND_ADDR:
814         phb->scom_hv_ind_addr_reg = val & 0xe000000000001fff;
815         break;
816     case PHB_SCOM_HV_IND_DATA:
817         if (!(phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_VALID)) {
818             phb_error(phb, "Invalid indirect address");
819             break;
820         }
821         size = (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_4B) ? 4 : 8;
822         offset = GETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR, phb->scom_hv_ind_addr_reg);
823         pnv_phb4_reg_write(phb, offset, val, size);
824         if (phb->scom_hv_ind_addr_reg & PHB_SCOM_HV_IND_ADDR_AUTOINC) {
825             offset += size;
826             offset &= 0x3fff;
827             phb->scom_hv_ind_addr_reg = SETFIELD(PHB_SCOM_HV_IND_ADDR_ADDR,
828                                                  phb->scom_hv_ind_addr_reg,
829                                                  offset);
830         }
831         break;
832     case PHB_SCOM_ETU_LEM_FIR:
833     case PHB_SCOM_ETU_LEM_FIR_AND:
834     case PHB_SCOM_ETU_LEM_FIR_OR:
835     case PHB_SCOM_ETU_LEM_FIR_MSK:
836     case PHB_SCOM_ETU_LEM_ERR_MSK_AND:
837     case PHB_SCOM_ETU_LEM_ERR_MSK_OR:
838     case PHB_SCOM_ETU_LEM_ACT0:
839     case PHB_SCOM_ETU_LEM_ACT1:
840     case PHB_SCOM_ETU_LEM_WOF:
841         offset = ((reg - PHB_SCOM_ETU_LEM_FIR) << 3) + PHB_LEM_FIR_ACCUM;
842         pnv_phb4_reg_write(phb, offset, val, size);
843         break;
844     case PHB_SCOM_ETU_PMON_CONFIG:
845     case PHB_SCOM_ETU_PMON_CTR0:
846     case PHB_SCOM_ETU_PMON_CTR1:
847     case PHB_SCOM_ETU_PMON_CTR2:
848     case PHB_SCOM_ETU_PMON_CTR3:
849         offset = ((reg - PHB_SCOM_ETU_PMON_CONFIG) << 3) + PHB_PERFMON_CONFIG;
850         pnv_phb4_reg_write(phb, offset, val, size);
851         break;
852     default:
853         qemu_log_mask(LOG_UNIMP, "phb4: xscom_write 0x%"HWADDR_PRIx
854                       "=%"PRIx64"\n", addr, val);
855     }
856 }
857 
858 const MemoryRegionOps pnv_phb4_xscom_ops = {
859     .read = pnv_phb4_xscom_read,
860     .write = pnv_phb4_xscom_write,
861     .valid.min_access_size = 8,
862     .valid.max_access_size = 8,
863     .impl.min_access_size = 8,
864     .impl.max_access_size = 8,
865     .endianness = DEVICE_BIG_ENDIAN,
866 };
867 
868 static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, hwaddr addr,
869                                             unsigned size)
870 {
871     PnvPHB4 *phb = PNV_PHB4(opaque);
872     uint32_t reg = addr >> 3;
873 
874     /* TODO: add list of allowed registers and error out if not */
875     return phb->nest_regs[reg];
876 }
877 
878 /*
879  * Return the 'stack_no' of a PHB4. 'stack_no' is the order
880  * the PHB4 occupies in the PEC. This is the reverse of what
881  * pnv_phb4_pec_get_phb_id() does.
882  *
883  * E.g. a phb with phb_id = 4 and pec->index = 1 (PEC1) will
884  * be the second phb (stack_no = 1) of the PEC.
885  */
886 static int pnv_phb4_get_phb_stack_no(PnvPHB4 *phb)
887 {
888     PnvPhb4PecState *pec = phb->pec;
889     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
890     int index = pec->index;
891     int stack_no = phb->phb_id;
892 
893     while (index--) {
894         stack_no -= pecc->num_phbs[index];
895     }
896 
897     return stack_no;
898 }
899 
900 static void pnv_phb4_update_regions(PnvPHB4 *phb)
901 {
902     /* Unmap first always */
903     if (memory_region_is_mapped(&phb->mr_regs)) {
904         memory_region_del_subregion(&phb->phbbar, &phb->mr_regs);
905     }
906     if (memory_region_is_mapped(&phb->xsrc.esb_mmio)) {
907         memory_region_del_subregion(&phb->intbar, &phb->xsrc.esb_mmio);
908     }
909 
910     /* Map registers if enabled */
911     if (memory_region_is_mapped(&phb->phbbar)) {
912         memory_region_add_subregion(&phb->phbbar, 0, &phb->mr_regs);
913     }
914 
915     /* Map ESB if enabled */
916     if (memory_region_is_mapped(&phb->intbar)) {
917         memory_region_add_subregion(&phb->intbar, 0, &phb->xsrc.esb_mmio);
918     }
919 
920     /* Check/update m32 */
921     pnv_phb4_check_all_mbt(phb);
922 }
923 
924 static void pnv_pec_phb_update_map(PnvPHB4 *phb)
925 {
926     PnvPhb4PecState *pec = phb->pec;
927     MemoryRegion *sysmem = get_system_memory();
928     uint64_t bar_en = phb->nest_regs[PEC_NEST_STK_BAR_EN];
929     int stack_no = pnv_phb4_get_phb_stack_no(phb);
930     uint64_t bar, mask, size;
931     char name[64];
932 
933     /*
934      * NOTE: This will really not work well if those are remapped
935      * after the PHB has created its sub regions. We could do better
936      * if we had a way to resize regions but we don't really care
937      * that much in practice as the stuff below really only happens
938      * once early during boot
939      */
940 
941     /* Handle unmaps */
942     if (memory_region_is_mapped(&phb->mmbar0) &&
943         !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
944         memory_region_del_subregion(sysmem, &phb->mmbar0);
945     }
946     if (memory_region_is_mapped(&phb->mmbar1) &&
947         !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
948         memory_region_del_subregion(sysmem, &phb->mmbar1);
949     }
950     if (memory_region_is_mapped(&phb->phbbar) &&
951         !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
952         memory_region_del_subregion(sysmem, &phb->phbbar);
953     }
954     if (memory_region_is_mapped(&phb->intbar) &&
955         !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
956         memory_region_del_subregion(sysmem, &phb->intbar);
957     }
958 
959     /* Update PHB */
960     pnv_phb4_update_regions(phb);
961 
962     /* Handle maps */
963     if (!memory_region_is_mapped(&phb->mmbar0) &&
964         (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
965         bar = phb->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
966         mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
967         size = ((~mask) >> 8) + 1;
968         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio0",
969                  pec->chip_id, pec->index, stack_no);
970         memory_region_init(&phb->mmbar0, OBJECT(phb), name, size);
971         memory_region_add_subregion(sysmem, bar, &phb->mmbar0);
972         phb->mmio0_base = bar;
973         phb->mmio0_size = size;
974     }
975     if (!memory_region_is_mapped(&phb->mmbar1) &&
976         (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
977         bar = phb->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
978         mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
979         size = ((~mask) >> 8) + 1;
980         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio1",
981                  pec->chip_id, pec->index, stack_no);
982         memory_region_init(&phb->mmbar1, OBJECT(phb), name, size);
983         memory_region_add_subregion(sysmem, bar, &phb->mmbar1);
984         phb->mmio1_base = bar;
985         phb->mmio1_size = size;
986     }
987     if (!memory_region_is_mapped(&phb->phbbar) &&
988         (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
989         bar = phb->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
990         size = PNV_PHB4_NUM_REGS << 3;
991         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d",
992                  pec->chip_id, pec->index, stack_no);
993         memory_region_init(&phb->phbbar, OBJECT(phb), name, size);
994         memory_region_add_subregion(sysmem, bar, &phb->phbbar);
995     }
996     if (!memory_region_is_mapped(&phb->intbar) &&
997         (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
998         bar = phb->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
999         size = PNV_PHB4_MAX_INTs << 16;
1000         snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-int",
1001                  phb->pec->chip_id, phb->pec->index, stack_no);
1002         memory_region_init(&phb->intbar, OBJECT(phb), name, size);
1003         memory_region_add_subregion(sysmem, bar, &phb->intbar);
1004     }
1005 
1006     /* Update PHB */
1007     pnv_phb4_update_regions(phb);
1008 }
1009 
1010 static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
1011                                          uint64_t val, unsigned size)
1012 {
1013     PnvPHB4 *phb = PNV_PHB4(opaque);
1014     PnvPhb4PecState *pec = phb->pec;
1015     uint32_t reg = addr >> 3;
1016 
1017     switch (reg) {
1018     case PEC_NEST_STK_PCI_NEST_FIR:
1019         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] = val;
1020         break;
1021     case PEC_NEST_STK_PCI_NEST_FIR_CLR:
1022         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] &= val;
1023         break;
1024     case PEC_NEST_STK_PCI_NEST_FIR_SET:
1025         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR] |= val;
1026         break;
1027     case PEC_NEST_STK_PCI_NEST_FIR_MSK:
1028         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] = val;
1029         break;
1030     case PEC_NEST_STK_PCI_NEST_FIR_MSKC:
1031         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] &= val;
1032         break;
1033     case PEC_NEST_STK_PCI_NEST_FIR_MSKS:
1034         phb->nest_regs[PEC_NEST_STK_PCI_NEST_FIR_MSK] |= val;
1035         break;
1036     case PEC_NEST_STK_PCI_NEST_FIR_ACT0:
1037     case PEC_NEST_STK_PCI_NEST_FIR_ACT1:
1038         phb->nest_regs[reg] = val;
1039         break;
1040     case PEC_NEST_STK_PCI_NEST_FIR_WOF:
1041         phb->nest_regs[reg] = 0;
1042         break;
1043     case PEC_NEST_STK_ERR_REPORT_0:
1044     case PEC_NEST_STK_ERR_REPORT_1:
1045     case PEC_NEST_STK_PBCQ_GNRL_STATUS:
1046         /* Flag error ? */
1047         break;
1048     case PEC_NEST_STK_PBCQ_MODE:
1049         phb->nest_regs[reg] = val & 0xff00000000000000ull;
1050         break;
1051     case PEC_NEST_STK_MMIO_BAR0:
1052     case PEC_NEST_STK_MMIO_BAR0_MASK:
1053     case PEC_NEST_STK_MMIO_BAR1:
1054     case PEC_NEST_STK_MMIO_BAR1_MASK:
1055         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] &
1056             (PEC_NEST_STK_BAR_EN_MMIO0 |
1057              PEC_NEST_STK_BAR_EN_MMIO1)) {
1058             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1059         }
1060         phb->nest_regs[reg] = val & 0xffffffffff000000ull;
1061         break;
1062     case PEC_NEST_STK_PHB_REGS_BAR:
1063         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_PHB) {
1064             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1065         }
1066         phb->nest_regs[reg] = val & 0xffffffffffc00000ull;
1067         break;
1068     case PEC_NEST_STK_INT_BAR:
1069         if (phb->nest_regs[PEC_NEST_STK_BAR_EN] & PEC_NEST_STK_BAR_EN_INT) {
1070             phb_pec_error(pec, "Changing enabled BAR unsupported\n");
1071         }
1072         phb->nest_regs[reg] = val & 0xfffffff000000000ull;
1073         break;
1074     case PEC_NEST_STK_BAR_EN:
1075         phb->nest_regs[reg] = val & 0xf000000000000000ull;
1076         pnv_pec_phb_update_map(phb);
1077         break;
1078     case PEC_NEST_STK_DATA_FRZ_TYPE:
1079     case PEC_NEST_STK_PBCQ_TUN_BAR:
1080         /* Not used for now */
1081         phb->nest_regs[reg] = val;
1082         break;
1083     default:
1084         qemu_log_mask(LOG_UNIMP, "phb4_pec: nest_xscom_write 0x%"HWADDR_PRIx
1085                       "=%"PRIx64"\n", addr, val);
1086     }
1087 }
1088 
1089 static const MemoryRegionOps pnv_pec_stk_nest_xscom_ops = {
1090     .read = pnv_pec_stk_nest_xscom_read,
1091     .write = pnv_pec_stk_nest_xscom_write,
1092     .valid.min_access_size = 8,
1093     .valid.max_access_size = 8,
1094     .impl.min_access_size = 8,
1095     .impl.max_access_size = 8,
1096     .endianness = DEVICE_BIG_ENDIAN,
1097 };
1098 
1099 static uint64_t pnv_pec_stk_pci_xscom_read(void *opaque, hwaddr addr,
1100                                            unsigned size)
1101 {
1102     PnvPHB4 *phb = PNV_PHB4(opaque);
1103     uint32_t reg = addr >> 3;
1104 
1105     /* TODO: add list of allowed registers and error out if not */
1106     return phb->pci_regs[reg];
1107 }
1108 
1109 static void pnv_pec_stk_pci_xscom_write(void *opaque, hwaddr addr,
1110                                         uint64_t val, unsigned size)
1111 {
1112     PnvPHB4 *phb = PNV_PHB4(opaque);
1113     uint32_t reg = addr >> 3;
1114 
1115     switch (reg) {
1116     case PEC_PCI_STK_PCI_FIR:
1117         phb->pci_regs[reg] = val;
1118         break;
1119     case PEC_PCI_STK_PCI_FIR_CLR:
1120         phb->pci_regs[PEC_PCI_STK_PCI_FIR] &= val;
1121         break;
1122     case PEC_PCI_STK_PCI_FIR_SET:
1123         phb->pci_regs[PEC_PCI_STK_PCI_FIR] |= val;
1124         break;
1125     case PEC_PCI_STK_PCI_FIR_MSK:
1126         phb->pci_regs[reg] = val;
1127         break;
1128     case PEC_PCI_STK_PCI_FIR_MSKC:
1129         phb->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] &= val;
1130         break;
1131     case PEC_PCI_STK_PCI_FIR_MSKS:
1132         phb->pci_regs[PEC_PCI_STK_PCI_FIR_MSK] |= val;
1133         break;
1134     case PEC_PCI_STK_PCI_FIR_ACT0:
1135     case PEC_PCI_STK_PCI_FIR_ACT1:
1136         phb->pci_regs[reg] = val;
1137         break;
1138     case PEC_PCI_STK_PCI_FIR_WOF:
1139         phb->pci_regs[reg] = 0;
1140         break;
1141     case PEC_PCI_STK_ETU_RESET:
1142         phb->pci_regs[reg] = val & 0x8000000000000000ull;
1143         /* TODO: Implement reset */
1144         break;
1145     case PEC_PCI_STK_PBAIB_ERR_REPORT:
1146         break;
1147     case PEC_PCI_STK_PBAIB_TX_CMD_CRED:
1148     case PEC_PCI_STK_PBAIB_TX_DAT_CRED:
1149         phb->pci_regs[reg] = val;
1150         break;
1151     default:
1152         qemu_log_mask(LOG_UNIMP, "phb4_pec_stk: pci_xscom_write 0x%"HWADDR_PRIx
1153                       "=%"PRIx64"\n", addr, val);
1154     }
1155 }
1156 
1157 static const MemoryRegionOps pnv_pec_stk_pci_xscom_ops = {
1158     .read = pnv_pec_stk_pci_xscom_read,
1159     .write = pnv_pec_stk_pci_xscom_write,
1160     .valid.min_access_size = 8,
1161     .valid.max_access_size = 8,
1162     .impl.min_access_size = 8,
1163     .impl.max_access_size = 8,
1164     .endianness = DEVICE_BIG_ENDIAN,
1165 };
1166 
1167 static int pnv_phb4_map_irq(PCIDevice *pci_dev, int irq_num)
1168 {
1169     /* Check that out properly ... */
1170     return irq_num & 3;
1171 }
1172 
1173 static void pnv_phb4_set_irq(void *opaque, int irq_num, int level)
1174 {
1175     PnvPHB4 *phb = PNV_PHB4(opaque);
1176     uint32_t lsi_base;
1177 
1178     /* LSI only ... */
1179     if (irq_num > 3) {
1180         phb_error(phb, "IRQ %x is not an LSI", irq_num);
1181     }
1182     lsi_base = GETFIELD(PHB_LSI_SRC_ID, phb->regs[PHB_LSI_SOURCE_ID >> 3]);
1183     lsi_base <<= 3;
1184     qemu_set_irq(phb->qirqs[lsi_base + irq_num], level);
1185 }
1186 
1187 static bool pnv_phb4_resolve_pe(PnvPhb4DMASpace *ds)
1188 {
1189     uint64_t rtt, addr;
1190     uint16_t rte;
1191     int bus_num;
1192     int num_PEs;
1193 
1194     /* Already resolved ? */
1195     if (ds->pe_num != PHB_INVALID_PE) {
1196         return true;
1197     }
1198 
1199     /* We need to lookup the RTT */
1200     rtt = ds->phb->regs[PHB_RTT_BAR >> 3];
1201     if (!(rtt & PHB_RTT_BAR_ENABLE)) {
1202         phb_error(ds->phb, "DMA with RTT BAR disabled !");
1203         /* Set error bits ? fence ? ... */
1204         return false;
1205     }
1206 
1207     /* Read RTE */
1208     bus_num = pci_bus_num(ds->bus);
1209     addr = rtt & PHB_RTT_BASE_ADDRESS_MASK;
1210     addr += 2 * PCI_BUILD_BDF(bus_num, ds->devfn);
1211     if (dma_memory_read(&address_space_memory, addr, &rte,
1212                         sizeof(rte), MEMTXATTRS_UNSPECIFIED)) {
1213         phb_error(ds->phb, "Failed to read RTT entry at 0x%"PRIx64, addr);
1214         /* Set error bits ? fence ? ... */
1215         return false;
1216     }
1217     rte = be16_to_cpu(rte);
1218 
1219     /* Fail upon reading of invalid PE# */
1220     num_PEs = ds->phb->big_phb ? PNV_PHB4_MAX_PEs : (PNV_PHB4_MAX_PEs >> 1);
1221     if (rte >= num_PEs) {
1222         phb_error(ds->phb, "RTE for RID 0x%x invalid (%04x", ds->devfn, rte);
1223         rte &= num_PEs - 1;
1224     }
1225     ds->pe_num = rte;
1226     return true;
1227 }
1228 
1229 static void pnv_phb4_translate_tve(PnvPhb4DMASpace *ds, hwaddr addr,
1230                                    bool is_write, uint64_t tve,
1231                                    IOMMUTLBEntry *tlb)
1232 {
1233     uint64_t tta = GETFIELD(IODA3_TVT_TABLE_ADDR, tve);
1234     int32_t  lev = GETFIELD(IODA3_TVT_NUM_LEVELS, tve);
1235     uint32_t tts = GETFIELD(IODA3_TVT_TCE_TABLE_SIZE, tve);
1236     uint32_t tps = GETFIELD(IODA3_TVT_IO_PSIZE, tve);
1237 
1238     /* Invalid levels */
1239     if (lev > 4) {
1240         phb_error(ds->phb, "Invalid #levels in TVE %d", lev);
1241         return;
1242     }
1243 
1244     /* Invalid entry */
1245     if (tts == 0) {
1246         phb_error(ds->phb, "Access to invalid TVE");
1247         return;
1248     }
1249 
1250     /* IO Page Size of 0 means untranslated, else use TCEs */
1251     if (tps == 0) {
1252         /* TODO: Handle boundaries */
1253 
1254         /* Use 4k pages like q35 ... for now */
1255         tlb->iova = addr & 0xfffffffffffff000ull;
1256         tlb->translated_addr = addr & 0x0003fffffffff000ull;
1257         tlb->addr_mask = 0xfffull;
1258         tlb->perm = IOMMU_RW;
1259     } else {
1260         uint32_t tce_shift, tbl_shift, sh;
1261         uint64_t base, taddr, tce, tce_mask;
1262 
1263         /* Address bits per bottom level TCE entry */
1264         tce_shift = tps + 11;
1265 
1266         /* Address bits per table level */
1267         tbl_shift = tts + 8;
1268 
1269         /* Top level table base address */
1270         base = tta << 12;
1271 
1272         /* Total shift to first level */
1273         sh = tbl_shift * lev + tce_shift;
1274 
1275         /* TODO: Limit to support IO page sizes */
1276 
1277         /* TODO: Multi-level untested */
1278         do {
1279             lev--;
1280 
1281             /* Grab the TCE address */
1282             taddr = base | (((addr >> sh) & ((1ul << tbl_shift) - 1)) << 3);
1283             if (dma_memory_read(&address_space_memory, taddr, &tce,
1284                                 sizeof(tce), MEMTXATTRS_UNSPECIFIED)) {
1285                 phb_error(ds->phb, "Failed to read TCE at 0x%"PRIx64, taddr);
1286                 return;
1287             }
1288             tce = be64_to_cpu(tce);
1289 
1290             /* Check permission for indirect TCE */
1291             if ((lev >= 0) && !(tce & 3)) {
1292                 phb_error(ds->phb, "Invalid indirect TCE at 0x%"PRIx64, taddr);
1293                 phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
1294                            is_write ? 'W' : 'R', tve);
1295                 phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
1296                            tta, lev, tts, tps);
1297                 return;
1298             }
1299             sh -= tbl_shift;
1300             base = tce & ~0xfffull;
1301         } while (lev >= 0);
1302 
1303         /* We exit the loop with TCE being the final TCE */
1304         if ((is_write & !(tce & 2)) || ((!is_write) && !(tce & 1))) {
1305             phb_error(ds->phb, "TCE access fault at 0x%"PRIx64, taddr);
1306             phb_error(ds->phb, " xlate %"PRIx64":%c TVE=%"PRIx64, addr,
1307                        is_write ? 'W' : 'R', tve);
1308             phb_error(ds->phb, " tta=%"PRIx64" lev=%d tts=%d tps=%d",
1309                        tta, lev, tts, tps);
1310             return;
1311         }
1312         tce_mask = ~((1ull << tce_shift) - 1);
1313         tlb->iova = addr & tce_mask;
1314         tlb->translated_addr = tce & tce_mask;
1315         tlb->addr_mask = ~tce_mask;
1316         tlb->perm = tce & 3;
1317     }
1318 }
1319 
1320 static IOMMUTLBEntry pnv_phb4_translate_iommu(IOMMUMemoryRegion *iommu,
1321                                               hwaddr addr,
1322                                               IOMMUAccessFlags flag,
1323                                               int iommu_idx)
1324 {
1325     PnvPhb4DMASpace *ds = container_of(iommu, PnvPhb4DMASpace, dma_mr);
1326     int tve_sel;
1327     uint64_t tve, cfg;
1328     IOMMUTLBEntry ret = {
1329         .target_as = &address_space_memory,
1330         .iova = addr,
1331         .translated_addr = 0,
1332         .addr_mask = ~(hwaddr)0,
1333         .perm = IOMMU_NONE,
1334     };
1335 
1336     /* Resolve PE# */
1337     if (!pnv_phb4_resolve_pe(ds)) {
1338         phb_error(ds->phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1339                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1340         return ret;
1341     }
1342 
1343     /* Check top bits */
1344     switch (addr >> 60) {
1345     case 00:
1346         /* DMA or 32-bit MSI ? */
1347         cfg = ds->phb->regs[PHB_PHB4_CONFIG >> 3];
1348         if ((cfg & PHB_PHB4C_32BIT_MSI_EN) &&
1349             ((addr & 0xffffffffffff0000ull) == 0xffff0000ull)) {
1350             phb_error(ds->phb, "xlate on 32-bit MSI region");
1351             return ret;
1352         }
1353         /* Choose TVE XXX Use PHB4 Control Register */
1354         tve_sel = (addr >> 59) & 1;
1355         tve = ds->phb->ioda_TVT[ds->pe_num * 2 + tve_sel];
1356         pnv_phb4_translate_tve(ds, addr, flag & IOMMU_WO, tve, &ret);
1357         break;
1358     case 01:
1359         phb_error(ds->phb, "xlate on 64-bit MSI region");
1360         break;
1361     default:
1362         phb_error(ds->phb, "xlate on unsupported address 0x%"PRIx64, addr);
1363     }
1364     return ret;
1365 }
1366 
1367 #define TYPE_PNV_PHB4_IOMMU_MEMORY_REGION "pnv-phb4-iommu-memory-region"
1368 DECLARE_INSTANCE_CHECKER(IOMMUMemoryRegion, PNV_PHB4_IOMMU_MEMORY_REGION,
1369                          TYPE_PNV_PHB4_IOMMU_MEMORY_REGION)
1370 
1371 static void pnv_phb4_iommu_memory_region_class_init(ObjectClass *klass,
1372                                                     void *data)
1373 {
1374     IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
1375 
1376     imrc->translate = pnv_phb4_translate_iommu;
1377 }
1378 
1379 static const TypeInfo pnv_phb4_iommu_memory_region_info = {
1380     .parent = TYPE_IOMMU_MEMORY_REGION,
1381     .name = TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1382     .class_init = pnv_phb4_iommu_memory_region_class_init,
1383 };
1384 
1385 /*
1386  * Return the index/phb-id of a PHB4 that belongs to a
1387  * pec->stacks[stack_index] stack.
1388  */
1389 int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int stack_index)
1390 {
1391     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
1392     int index = pec->index;
1393     int offset = 0;
1394 
1395     while (index--) {
1396         offset += pecc->num_phbs[index];
1397     }
1398 
1399     return offset + stack_index;
1400 }
1401 
1402 /*
1403  * MSI/MSIX memory region implementation.
1404  * The handler handles both MSI and MSIX.
1405  */
1406 static void pnv_phb4_msi_write(void *opaque, hwaddr addr,
1407                                uint64_t data, unsigned size)
1408 {
1409     PnvPhb4DMASpace *ds = opaque;
1410     PnvPHB4 *phb = ds->phb;
1411 
1412     uint32_t src = ((addr >> 4) & 0xffff) | (data & 0x1f);
1413 
1414     /* Resolve PE# */
1415     if (!pnv_phb4_resolve_pe(ds)) {
1416         phb_error(phb, "Failed to resolve PE# for bus @%p (%d) devfn 0x%x",
1417                    ds->bus, pci_bus_num(ds->bus), ds->devfn);
1418         return;
1419     }
1420 
1421     /* TODO: Check it doesn't collide with LSIs */
1422     if (src >= phb->xsrc.nr_irqs) {
1423         phb_error(phb, "MSI %d out of bounds", src);
1424         return;
1425     }
1426 
1427     /* TODO: check PE/MSI assignement */
1428 
1429     qemu_irq_pulse(phb->qirqs[src]);
1430 }
1431 
1432 /* There is no .read as the read result is undefined by PCI spec */
1433 static uint64_t pnv_phb4_msi_read(void *opaque, hwaddr addr, unsigned size)
1434 {
1435     PnvPhb4DMASpace *ds = opaque;
1436 
1437     phb_error(ds->phb, "Invalid MSI read @ 0x%" HWADDR_PRIx, addr);
1438     return -1;
1439 }
1440 
1441 static const MemoryRegionOps pnv_phb4_msi_ops = {
1442     .read = pnv_phb4_msi_read,
1443     .write = pnv_phb4_msi_write,
1444     .endianness = DEVICE_LITTLE_ENDIAN
1445 };
1446 
1447 static PnvPhb4DMASpace *pnv_phb4_dma_find(PnvPHB4 *phb, PCIBus *bus, int devfn)
1448 {
1449     PnvPhb4DMASpace *ds;
1450 
1451     QLIST_FOREACH(ds, &phb->dma_spaces, list) {
1452         if (ds->bus == bus && ds->devfn == devfn) {
1453             break;
1454         }
1455     }
1456     return ds;
1457 }
1458 
1459 static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void *opaque, int devfn)
1460 {
1461     PnvPHB4 *phb = opaque;
1462     PnvPhb4DMASpace *ds;
1463     char name[32];
1464 
1465     ds = pnv_phb4_dma_find(phb, bus, devfn);
1466 
1467     if (ds == NULL) {
1468         ds = g_new0(PnvPhb4DMASpace, 1);
1469         ds->bus = bus;
1470         ds->devfn = devfn;
1471         ds->pe_num = PHB_INVALID_PE;
1472         ds->phb = phb;
1473         snprintf(name, sizeof(name), "phb4-%d.%d-iommu", phb->chip_id,
1474                  phb->phb_id);
1475         memory_region_init_iommu(&ds->dma_mr, sizeof(ds->dma_mr),
1476                                  TYPE_PNV_PHB4_IOMMU_MEMORY_REGION,
1477                                  OBJECT(phb), name, UINT64_MAX);
1478         address_space_init(&ds->dma_as, MEMORY_REGION(&ds->dma_mr),
1479                            name);
1480         memory_region_init_io(&ds->msi32_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1481                               ds, "msi32", 0x10000);
1482         memory_region_init_io(&ds->msi64_mr, OBJECT(phb), &pnv_phb4_msi_ops,
1483                               ds, "msi64", 0x100000);
1484         pnv_phb4_update_msi_regions(ds);
1485 
1486         QLIST_INSERT_HEAD(&phb->dma_spaces, ds, list);
1487     }
1488     return &ds->dma_as;
1489 }
1490 
1491 static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
1492 {
1493     PnvPhb4PecState *pec = phb->pec;
1494     PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
1495     int stack_no = pnv_phb4_get_phb_stack_no(phb);
1496     uint32_t pec_nest_base;
1497     uint32_t pec_pci_base;
1498     char name[64];
1499 
1500     assert(pec);
1501 
1502     /* Initialize the XSCOM regions for the stack registers */
1503     snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-phb-%d",
1504              pec->chip_id, pec->index, stack_no);
1505     pnv_xscom_region_init(&phb->nest_regs_mr, OBJECT(phb),
1506                           &pnv_pec_stk_nest_xscom_ops, phb, name,
1507                           PHB4_PEC_NEST_STK_REGS_COUNT);
1508 
1509     snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
1510              pec->chip_id, pec->index, stack_no);
1511     pnv_xscom_region_init(&phb->pci_regs_mr, OBJECT(phb),
1512                           &pnv_pec_stk_pci_xscom_ops, phb, name,
1513                           PHB4_PEC_PCI_STK_REGS_COUNT);
1514 
1515     /* PHB pass-through */
1516     snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
1517              pec->chip_id, pec->index, stack_no);
1518     pnv_xscom_region_init(&phb->phb_regs_mr, OBJECT(phb),
1519                           &pnv_phb4_xscom_ops, phb, name, 0x40);
1520 
1521     pec_nest_base = pecc->xscom_nest_base(pec);
1522     pec_pci_base = pecc->xscom_pci_base(pec);
1523 
1524     /* Populate the XSCOM address space. */
1525     pnv_xscom_add_subregion(pec->chip,
1526                             pec_nest_base + 0x40 * (stack_no + 1),
1527                             &phb->nest_regs_mr);
1528     pnv_xscom_add_subregion(pec->chip,
1529                             pec_pci_base + 0x40 * (stack_no + 1),
1530                             &phb->pci_regs_mr);
1531     pnv_xscom_add_subregion(pec->chip,
1532                             pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
1533                             0x40 * stack_no,
1534                             &phb->phb_regs_mr);
1535 }
1536 
1537 static void pnv_phb4_instance_init(Object *obj)
1538 {
1539     PnvPHB4 *phb = PNV_PHB4(obj);
1540 
1541     QLIST_INIT(&phb->dma_spaces);
1542 
1543     /* XIVE interrupt source object */
1544     object_initialize_child(obj, "source", &phb->xsrc, TYPE_XIVE_SOURCE);
1545 }
1546 
1547 static void pnv_phb4_realize(DeviceState *dev, Error **errp)
1548 {
1549     PnvPHB4 *phb = PNV_PHB4(dev);
1550     PCIHostState *pci = PCI_HOST_BRIDGE(dev);
1551     XiveSource *xsrc = &phb->xsrc;
1552     int nr_irqs;
1553     char name[32];
1554 
1555     /* Set the "big_phb" flag */
1556     phb->big_phb = phb->phb_id == 0 || phb->phb_id == 3;
1557 
1558     /* Controller Registers */
1559     snprintf(name, sizeof(name), "phb4-%d.%d-regs", phb->chip_id,
1560              phb->phb_id);
1561     memory_region_init_io(&phb->mr_regs, OBJECT(phb), &pnv_phb4_reg_ops, phb,
1562                           name, 0x2000);
1563 
1564     /*
1565      * PHB4 doesn't support IO space. However, qemu gets very upset if
1566      * we don't have an IO region to anchor IO BARs onto so we just
1567      * initialize one which we never hook up to anything
1568      */
1569 
1570     snprintf(name, sizeof(name), "phb4-%d.%d-pci-io", phb->chip_id,
1571              phb->phb_id);
1572     memory_region_init(&phb->pci_io, OBJECT(phb), name, 0x10000);
1573 
1574     snprintf(name, sizeof(name), "phb4-%d.%d-pci-mmio", phb->chip_id,
1575              phb->phb_id);
1576     memory_region_init(&phb->pci_mmio, OBJECT(phb), name,
1577                        PCI_MMIO_TOTAL_SIZE);
1578 
1579     pci->bus = pci_register_root_bus(dev, dev->id,
1580                                      pnv_phb4_set_irq, pnv_phb4_map_irq, phb,
1581                                      &phb->pci_mmio, &phb->pci_io,
1582                                      0, 4, TYPE_PNV_PHB4_ROOT_BUS);
1583     pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb);
1584     pci->bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
1585 
1586     /* Setup XIVE Source */
1587     if (phb->big_phb) {
1588         nr_irqs = PNV_PHB4_MAX_INTs;
1589     } else {
1590         nr_irqs = PNV_PHB4_MAX_INTs >> 1;
1591     }
1592     object_property_set_int(OBJECT(xsrc), "nr-irqs", nr_irqs, &error_fatal);
1593     object_property_set_link(OBJECT(xsrc), "xive", OBJECT(phb), &error_fatal);
1594     if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
1595         return;
1596     }
1597 
1598     pnv_phb4_update_xsrc(phb);
1599 
1600     phb->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
1601 
1602     pnv_phb4_xscom_realize(phb);
1603 }
1604 
1605 static const char *pnv_phb4_root_bus_path(PCIHostState *host_bridge,
1606                                           PCIBus *rootbus)
1607 {
1608     PnvPHB4 *phb = PNV_PHB4(host_bridge);
1609 
1610     snprintf(phb->bus_path, sizeof(phb->bus_path), "00%02x:%02x",
1611              phb->chip_id, phb->phb_id);
1612     return phb->bus_path;
1613 }
1614 
1615 /*
1616  * Address base trigger mode (POWER10)
1617  *
1618  * Trigger directly the IC ESB page
1619  */
1620 static void pnv_phb4_xive_notify_abt(PnvPHB4 *phb, uint32_t srcno,
1621                                      bool pq_checked)
1622 {
1623     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1624     uint64_t data = 0; /* trigger data : don't care */
1625     hwaddr addr;
1626     MemTxResult result;
1627     int esb_shift;
1628 
1629     if (notif_port & PHB_INT_NOTIFY_ADDR_64K) {
1630         esb_shift = 16;
1631     } else {
1632         esb_shift = 12;
1633     }
1634 
1635     /* Compute the address of the IC ESB management page */
1636     addr = (notif_port & ~PHB_INT_NOTIFY_ADDR_64K);
1637     addr |= (1ull << (esb_shift + 1)) * srcno;
1638     addr |= (1ull << esb_shift);
1639 
1640     /*
1641      * When the PQ state bits are checked on the PHB, the associated
1642      * PQ state bits on the IC should be ignored. Use the unconditional
1643      * trigger offset to inject a trigger on the IC. This is always
1644      * the case for LSIs
1645      */
1646     if (pq_checked) {
1647         addr |= XIVE_ESB_INJECT;
1648     }
1649 
1650     trace_pnv_phb4_xive_notify_ic(addr, data);
1651 
1652     address_space_stq_be(&address_space_memory, addr, data,
1653                          MEMTXATTRS_UNSPECIFIED, &result);
1654     if (result != MEMTX_OK) {
1655         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", addr);
1656         return;
1657     }
1658 }
1659 
1660 static void pnv_phb4_xive_notify_ic(PnvPHB4 *phb, uint32_t srcno,
1661                                     bool pq_checked)
1662 {
1663     uint64_t notif_port = phb->regs[PHB_INT_NOTIFY_ADDR >> 3];
1664     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1665     uint64_t data = offset | srcno;
1666     MemTxResult result;
1667 
1668     if (pq_checked) {
1669         data |= XIVE_TRIGGER_PQ;
1670     }
1671 
1672     trace_pnv_phb4_xive_notify_ic(notif_port, data);
1673 
1674     address_space_stq_be(&address_space_memory, notif_port, data,
1675                          MEMTXATTRS_UNSPECIFIED, &result);
1676     if (result != MEMTX_OK) {
1677         phb_error(phb, "trigger failed @%"HWADDR_PRIx "\n", notif_port);
1678         return;
1679     }
1680 }
1681 
1682 static void pnv_phb4_xive_notify(XiveNotifier *xf, uint32_t srcno,
1683                                  bool pq_checked)
1684 {
1685     PnvPHB4 *phb = PNV_PHB4(xf);
1686 
1687     if (phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_ABT_MODE) {
1688         pnv_phb4_xive_notify_abt(phb, srcno, pq_checked);
1689     } else {
1690         pnv_phb4_xive_notify_ic(phb, srcno, pq_checked);
1691     }
1692 }
1693 
1694 static Property pnv_phb4_properties[] = {
1695         DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
1696         DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
1697         DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
1698                          PnvPhb4PecState *),
1699         DEFINE_PROP_END_OF_LIST(),
1700 };
1701 
1702 static void pnv_phb4_class_init(ObjectClass *klass, void *data)
1703 {
1704     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
1705     DeviceClass *dc = DEVICE_CLASS(klass);
1706     XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
1707 
1708     hc->root_bus_path   = pnv_phb4_root_bus_path;
1709     dc->realize         = pnv_phb4_realize;
1710     device_class_set_props(dc, pnv_phb4_properties);
1711     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
1712     dc->user_creatable  = false;
1713 
1714     xfc->notify         = pnv_phb4_xive_notify;
1715 }
1716 
1717 static const TypeInfo pnv_phb4_type_info = {
1718     .name          = TYPE_PNV_PHB4,
1719     .parent        = TYPE_PCIE_HOST_BRIDGE,
1720     .instance_init = pnv_phb4_instance_init,
1721     .instance_size = sizeof(PnvPHB4),
1722     .class_init    = pnv_phb4_class_init,
1723     .interfaces = (InterfaceInfo[]) {
1724             { TYPE_XIVE_NOTIFIER },
1725             { },
1726     }
1727 };
1728 
1729 static const TypeInfo pnv_phb5_type_info = {
1730     .name          = TYPE_PNV_PHB5,
1731     .parent        = TYPE_PNV_PHB4,
1732     .instance_size = sizeof(PnvPHB4),
1733 };
1734 
1735 static void pnv_phb4_root_bus_class_init(ObjectClass *klass, void *data)
1736 {
1737     BusClass *k = BUS_CLASS(klass);
1738 
1739     /*
1740      * PHB4 has only a single root complex. Enforce the limit on the
1741      * parent bus
1742      */
1743     k->max_dev = 1;
1744 }
1745 
1746 static const TypeInfo pnv_phb4_root_bus_info = {
1747     .name = TYPE_PNV_PHB4_ROOT_BUS,
1748     .parent = TYPE_PCIE_BUS,
1749     .class_init = pnv_phb4_root_bus_class_init,
1750     .interfaces = (InterfaceInfo[]) {
1751         { INTERFACE_PCIE_DEVICE },
1752         { }
1753     },
1754 };
1755 
1756 static void pnv_phb4_root_port_reset(DeviceState *dev)
1757 {
1758     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1759     PCIDevice *d = PCI_DEVICE(dev);
1760     uint8_t *conf = d->config;
1761 
1762     rpc->parent_reset(dev);
1763 
1764     pci_byte_test_and_set_mask(conf + PCI_IO_BASE,
1765                                PCI_IO_RANGE_MASK & 0xff);
1766     pci_byte_test_and_clear_mask(conf + PCI_IO_LIMIT,
1767                                  PCI_IO_RANGE_MASK & 0xff);
1768     pci_set_word(conf + PCI_MEMORY_BASE, 0);
1769     pci_set_word(conf + PCI_MEMORY_LIMIT, 0xfff0);
1770     pci_set_word(conf + PCI_PREF_MEMORY_BASE, 0x1);
1771     pci_set_word(conf + PCI_PREF_MEMORY_LIMIT, 0xfff1);
1772     pci_set_long(conf + PCI_PREF_BASE_UPPER32, 0x1); /* Hack */
1773     pci_set_long(conf + PCI_PREF_LIMIT_UPPER32, 0xffffffff);
1774 }
1775 
1776 static void pnv_phb4_root_port_realize(DeviceState *dev, Error **errp)
1777 {
1778     PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(dev);
1779     PCIDevice *pci = PCI_DEVICE(dev);
1780     PCIBus *bus = pci_get_bus(pci);
1781     PnvPHB4 *phb = NULL;
1782     Error *local_err = NULL;
1783 
1784     phb = (PnvPHB4 *) object_dynamic_cast(OBJECT(bus->qbus.parent),
1785                                           TYPE_PNV_PHB4);
1786 
1787     if (!phb) {
1788         error_setg(errp, "%s must be connected to pnv-phb4 buses", dev->id);
1789         return;
1790     }
1791 
1792     /* Set unique chassis/slot values for the root port */
1793     qdev_prop_set_uint8(&pci->qdev, "chassis", phb->chip_id);
1794     qdev_prop_set_uint16(&pci->qdev, "slot", phb->phb_id);
1795 
1796     rpc->parent_realize(dev, &local_err);
1797     if (local_err) {
1798         error_propagate(errp, local_err);
1799         return;
1800     }
1801 }
1802 
1803 static void pnv_phb4_root_port_class_init(ObjectClass *klass, void *data)
1804 {
1805     DeviceClass *dc = DEVICE_CLASS(klass);
1806     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1807     PCIERootPortClass *rpc = PCIE_ROOT_PORT_CLASS(klass);
1808 
1809     dc->desc     = "IBM PHB4 PCIE Root Port";
1810     dc->user_creatable = false;
1811 
1812     device_class_set_parent_realize(dc, pnv_phb4_root_port_realize,
1813                                     &rpc->parent_realize);
1814     device_class_set_parent_reset(dc, pnv_phb4_root_port_reset,
1815                                   &rpc->parent_reset);
1816 
1817     k->vendor_id = PCI_VENDOR_ID_IBM;
1818     k->device_id = PNV_PHB4_DEVICE_ID;
1819     k->revision  = 0;
1820 
1821     rpc->exp_offset = 0x48;
1822     rpc->aer_offset = 0x100;
1823 
1824     dc->reset = &pnv_phb4_root_port_reset;
1825 }
1826 
1827 static const TypeInfo pnv_phb4_root_port_info = {
1828     .name          = TYPE_PNV_PHB4_ROOT_PORT,
1829     .parent        = TYPE_PCIE_ROOT_PORT,
1830     .instance_size = sizeof(PnvPHB4RootPort),
1831     .class_init    = pnv_phb4_root_port_class_init,
1832 };
1833 
1834 static void pnv_phb5_root_port_class_init(ObjectClass *klass, void *data)
1835 {
1836     DeviceClass *dc = DEVICE_CLASS(klass);
1837     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
1838 
1839     dc->desc     = "IBM PHB5 PCIE Root Port";
1840     dc->user_creatable = false;
1841 
1842     k->vendor_id = PCI_VENDOR_ID_IBM;
1843     k->device_id = PNV_PHB5_DEVICE_ID;
1844 }
1845 
1846 static const TypeInfo pnv_phb5_root_port_info = {
1847     .name          = TYPE_PNV_PHB5_ROOT_PORT,
1848     .parent        = TYPE_PNV_PHB4_ROOT_PORT,
1849     .instance_size = sizeof(PnvPHB4RootPort),
1850     .class_init    = pnv_phb5_root_port_class_init,
1851 };
1852 
1853 static void pnv_phb4_register_types(void)
1854 {
1855     type_register_static(&pnv_phb4_root_bus_info);
1856     type_register_static(&pnv_phb5_root_port_info);
1857     type_register_static(&pnv_phb4_root_port_info);
1858     type_register_static(&pnv_phb4_type_info);
1859     type_register_static(&pnv_phb5_type_info);
1860     type_register_static(&pnv_phb4_iommu_memory_region_info);
1861 }
1862 
1863 type_init(pnv_phb4_register_types);
1864 
1865 void pnv_phb4_pic_print_info(PnvPHB4 *phb, Monitor *mon)
1866 {
1867     uint64_t notif_port =
1868         phb->regs[PHB_INT_NOTIFY_ADDR >> 3] & ~PHB_INT_NOTIFY_ADDR_64K;
1869     uint32_t offset = phb->regs[PHB_INT_NOTIFY_INDEX >> 3];
1870     bool abt = !!(phb->regs[PHB_CTRLR >> 3] & PHB_CTRLR_IRQ_ABT_MODE);
1871 
1872     monitor_printf(mon, "PHB4[%x:%x] Source %08x .. %08x %s @%"HWADDR_PRIx"\n",
1873                    phb->chip_id, phb->phb_id,
1874                    offset, offset + phb->xsrc.nr_irqs - 1,
1875                    abt ? "ABT" : "",
1876                    notif_port);
1877     xive_source_pic_print_info(&phb->xsrc, 0, mon);
1878 }
1879