xref: /qemu/hw/ppc/spapr_pci.c (revision 72ac97cd)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 #include "qemu/error-report.h"
36 
37 #include "hw/pci/pci_bus.h"
38 
39 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
40 #define RTAS_QUERY_FN           0
41 #define RTAS_CHANGE_FN          1
42 #define RTAS_RESET_FN           2
43 #define RTAS_CHANGE_MSI_FN      3
44 #define RTAS_CHANGE_MSIX_FN     4
45 
46 /* Interrupt types to return on RTAS_CHANGE_* */
47 #define RTAS_TYPE_MSI           1
48 #define RTAS_TYPE_MSIX          2
49 
50 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
51 {
52     sPAPRPHBState *sphb;
53 
54     QLIST_FOREACH(sphb, &spapr->phbs, list) {
55         if (sphb->buid != buid) {
56             continue;
57         }
58         return sphb;
59     }
60 
61     return NULL;
62 }
63 
64 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
65                            uint32_t config_addr)
66 {
67     sPAPRPHBState *sphb = find_phb(spapr, buid);
68     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
69     int bus_num = (config_addr >> 16) & 0xFF;
70     int devfn = (config_addr >> 8) & 0xFF;
71 
72     if (!phb) {
73         return NULL;
74     }
75 
76     return pci_find_device(phb->bus, bus_num, devfn);
77 }
78 
79 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
80 {
81     /* This handles the encoding of extended config space addresses */
82     return ((arg >> 20) & 0xf00) | (arg & 0xff);
83 }
84 
85 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
86                                    uint32_t addr, uint32_t size,
87                                    target_ulong rets)
88 {
89     PCIDevice *pci_dev;
90     uint32_t val;
91 
92     if ((size != 1) && (size != 2) && (size != 4)) {
93         /* access must be 1, 2 or 4 bytes */
94         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
95         return;
96     }
97 
98     pci_dev = find_dev(spapr, buid, addr);
99     addr = rtas_pci_cfgaddr(addr);
100 
101     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
102         /* Access must be to a valid device, within bounds and
103          * naturally aligned */
104         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
105         return;
106     }
107 
108     val = pci_host_config_read_common(pci_dev, addr,
109                                       pci_config_size(pci_dev), size);
110 
111     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
112     rtas_st(rets, 1, val);
113 }
114 
115 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
116                                      uint32_t token, uint32_t nargs,
117                                      target_ulong args,
118                                      uint32_t nret, target_ulong rets)
119 {
120     uint64_t buid;
121     uint32_t size, addr;
122 
123     if ((nargs != 4) || (nret != 2)) {
124         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
125         return;
126     }
127 
128     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
129     size = rtas_ld(args, 3);
130     addr = rtas_ld(args, 0);
131 
132     finish_read_pci_config(spapr, buid, addr, size, rets);
133 }
134 
135 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
136                                  uint32_t token, uint32_t nargs,
137                                  target_ulong args,
138                                  uint32_t nret, target_ulong rets)
139 {
140     uint32_t size, addr;
141 
142     if ((nargs != 2) || (nret != 2)) {
143         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
144         return;
145     }
146 
147     size = rtas_ld(args, 1);
148     addr = rtas_ld(args, 0);
149 
150     finish_read_pci_config(spapr, 0, addr, size, rets);
151 }
152 
153 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
154                                     uint32_t addr, uint32_t size,
155                                     uint32_t val, target_ulong rets)
156 {
157     PCIDevice *pci_dev;
158 
159     if ((size != 1) && (size != 2) && (size != 4)) {
160         /* access must be 1, 2 or 4 bytes */
161         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
162         return;
163     }
164 
165     pci_dev = find_dev(spapr, buid, addr);
166     addr = rtas_pci_cfgaddr(addr);
167 
168     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
169         /* Access must be to a valid device, within bounds and
170          * naturally aligned */
171         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
172         return;
173     }
174 
175     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
176                                  val, size);
177 
178     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
179 }
180 
181 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
182                                       uint32_t token, uint32_t nargs,
183                                       target_ulong args,
184                                       uint32_t nret, target_ulong rets)
185 {
186     uint64_t buid;
187     uint32_t val, size, addr;
188 
189     if ((nargs != 5) || (nret != 1)) {
190         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
191         return;
192     }
193 
194     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
195     val = rtas_ld(args, 4);
196     size = rtas_ld(args, 3);
197     addr = rtas_ld(args, 0);
198 
199     finish_write_pci_config(spapr, buid, addr, size, val, rets);
200 }
201 
202 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
203                                   uint32_t token, uint32_t nargs,
204                                   target_ulong args,
205                                   uint32_t nret, target_ulong rets)
206 {
207     uint32_t val, size, addr;
208 
209     if ((nargs != 3) || (nret != 1)) {
210         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
211         return;
212     }
213 
214 
215     val = rtas_ld(args, 2);
216     size = rtas_ld(args, 1);
217     addr = rtas_ld(args, 0);
218 
219     finish_write_pci_config(spapr, 0, addr, size, val, rets);
220 }
221 
222 /*
223  * Find an entry with config_addr or returns the empty one if not found AND
224  * alloc_new is set.
225  * At the moment the msi_table entries are never released so there is
226  * no point to look till the end of the list if we need to find the free entry.
227  */
228 static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
229                              bool alloc_new)
230 {
231     int i;
232 
233     for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
234         if (!phb->msi_table[i].nvec) {
235             break;
236         }
237         if (phb->msi_table[i].config_addr == config_addr) {
238             return i;
239         }
240     }
241     if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
242         trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
243         return i;
244     }
245 
246     return -1;
247 }
248 
249 /*
250  * Set MSI/MSIX message data.
251  * This is required for msi_notify()/msix_notify() which
252  * will write at the addresses via spapr_msi_write().
253  */
254 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
255                              unsigned first_irq, unsigned req_num)
256 {
257     unsigned i;
258     MSIMessage msg = { .address = addr, .data = first_irq };
259 
260     if (!msix) {
261         msi_set_message(pdev, msg);
262         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
263         return;
264     }
265 
266     for (i = 0; i < req_num; ++i, ++msg.data) {
267         msix_set_message(pdev, i, msg);
268         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
269     }
270 }
271 
272 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
273                                 uint32_t token, uint32_t nargs,
274                                 target_ulong args, uint32_t nret,
275                                 target_ulong rets)
276 {
277     uint32_t config_addr = rtas_ld(args, 0);
278     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
279     unsigned int func = rtas_ld(args, 3);
280     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
281     unsigned int seq_num = rtas_ld(args, 5);
282     unsigned int ret_intr_type;
283     int ndev, irq;
284     sPAPRPHBState *phb = NULL;
285     PCIDevice *pdev = NULL;
286 
287     switch (func) {
288     case RTAS_CHANGE_MSI_FN:
289     case RTAS_CHANGE_FN:
290         ret_intr_type = RTAS_TYPE_MSI;
291         break;
292     case RTAS_CHANGE_MSIX_FN:
293         ret_intr_type = RTAS_TYPE_MSIX;
294         break;
295     default:
296         error_report("rtas_ibm_change_msi(%u) is not implemented", func);
297         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
298         return;
299     }
300 
301     /* Fins sPAPRPHBState */
302     phb = find_phb(spapr, buid);
303     if (phb) {
304         pdev = find_dev(spapr, buid, config_addr);
305     }
306     if (!phb || !pdev) {
307         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
308         return;
309     }
310 
311     /* Releasing MSIs */
312     if (!req_num) {
313         ndev = spapr_msicfg_find(phb, config_addr, false);
314         if (ndev < 0) {
315             trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
316             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
317             return;
318         }
319         trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
320         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
321         rtas_st(rets, 1, 0);
322         return;
323     }
324 
325     /* Enabling MSI */
326 
327     /* Find a device number in the map to add or reuse the existing one */
328     ndev = spapr_msicfg_find(phb, config_addr, true);
329     if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
330         error_report("No free entry for a new MSI device");
331         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
332         return;
333     }
334     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
335 
336     /* Check if there is an old config and MSI number has not changed */
337     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
338         /* Unexpected behaviour */
339         error_report("Cannot reuse MSI config for device#%d", ndev);
340         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
341         return;
342     }
343 
344     /* There is no cached config, allocate MSIs */
345     if (!phb->msi_table[ndev].nvec) {
346         int max_irqs = 0;
347         if (ret_intr_type == RTAS_TYPE_MSI) {
348             max_irqs = msi_nr_vectors_allocated(pdev);
349         } else if (ret_intr_type == RTAS_TYPE_MSIX) {
350             max_irqs = pdev->msix_entries_nr;
351         }
352         if (!max_irqs) {
353             error_report("Requested interrupt type %d is not enabled for device#%d",
354                          ret_intr_type, ndev);
355             rtas_st(rets, 0, -1); /* Hardware error */
356             return;
357         }
358         if (req_num > max_irqs) {
359             req_num = max_irqs;
360         }
361         irq = spapr_allocate_irq_block(req_num, false,
362                                        ret_intr_type == RTAS_TYPE_MSI);
363         if (irq < 0) {
364             error_report("Cannot allocate MSIs for device#%d", ndev);
365             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
366             return;
367         }
368         phb->msi_table[ndev].irq = irq;
369         phb->msi_table[ndev].nvec = req_num;
370         phb->msi_table[ndev].config_addr = config_addr;
371     }
372 
373     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
374     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
375                      phb->msi_table[ndev].irq, req_num);
376 
377     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
378     rtas_st(rets, 1, req_num);
379     rtas_st(rets, 2, ++seq_num);
380     rtas_st(rets, 3, ret_intr_type);
381 
382     trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
383 }
384 
385 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
386                                                    sPAPREnvironment *spapr,
387                                                    uint32_t token,
388                                                    uint32_t nargs,
389                                                    target_ulong args,
390                                                    uint32_t nret,
391                                                    target_ulong rets)
392 {
393     uint32_t config_addr = rtas_ld(args, 0);
394     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
395     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
396     int ndev;
397     sPAPRPHBState *phb = NULL;
398 
399     /* Fins sPAPRPHBState */
400     phb = find_phb(spapr, buid);
401     if (!phb) {
402         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
403         return;
404     }
405 
406     /* Find device descriptor and start IRQ */
407     ndev = spapr_msicfg_find(phb, config_addr, false);
408     if (ndev < 0) {
409         trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
410         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
411         return;
412     }
413 
414     intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
415     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
416                                                            intr_src_num);
417 
418     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
419     rtas_st(rets, 1, intr_src_num);
420     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
421 }
422 
423 static int pci_spapr_swizzle(int slot, int pin)
424 {
425     return (slot + pin) % PCI_NUM_PINS;
426 }
427 
428 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
429 {
430     /*
431      * Here we need to convert pci_dev + irq_num to some unique value
432      * which is less than number of IRQs on the specific bus (4).  We
433      * use standard PCI swizzling, that is (slot number + pin number)
434      * % 4.
435      */
436     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
437 }
438 
439 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
440 {
441     /*
442      * Here we use the number returned by pci_spapr_map_irq to find a
443      * corresponding qemu_irq.
444      */
445     sPAPRPHBState *phb = opaque;
446 
447     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
448     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
449 }
450 
451 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
452 {
453     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
454     PCIINTxRoute route;
455 
456     route.mode = PCI_INTX_ENABLED;
457     route.irq = sphb->lsi_table[pin].irq;
458 
459     return route;
460 }
461 
462 /*
463  * MSI/MSIX memory region implementation.
464  * The handler handles both MSI and MSIX.
465  * For MSI-X, the vector number is encoded as a part of the address,
466  * data is set to 0.
467  * For MSI, the vector number is encoded in least bits in data.
468  */
469 static void spapr_msi_write(void *opaque, hwaddr addr,
470                             uint64_t data, unsigned size)
471 {
472     uint32_t irq = data;
473 
474     trace_spapr_pci_msi_write(addr, data, irq);
475 
476     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
477 }
478 
479 static const MemoryRegionOps spapr_msi_ops = {
480     /* There is no .read as the read result is undefined by PCI spec */
481     .read = NULL,
482     .write = spapr_msi_write,
483     .endianness = DEVICE_LITTLE_ENDIAN
484 };
485 
486 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
487 {
488     uint64_t window_size = 4096;
489 
490     /*
491      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
492      * we need to allocate some memory to catch those writes coming
493      * from msi_notify()/msix_notify().
494      * As MSIMessage:addr is going to be the same and MSIMessage:data
495      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
496      * be used.
497      *
498      * For KVM we want to ensure that this memory is a full page so that
499      * our memory slot is of page size granularity.
500      */
501 #ifdef CONFIG_KVM
502     if (kvm_enabled()) {
503         window_size = getpagesize();
504     }
505 #endif
506 
507     spapr->msi_win_addr = addr;
508     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
509                           "msi", window_size);
510     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
511                                 &spapr->msiwindow);
512 }
513 
514 /*
515  * PHB PCI device
516  */
517 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
518 {
519     sPAPRPHBState *phb = opaque;
520 
521     return &phb->iommu_as;
522 }
523 
524 static void spapr_phb_realize(DeviceState *dev, Error **errp)
525 {
526     SysBusDevice *s = SYS_BUS_DEVICE(dev);
527     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
528     PCIHostState *phb = PCI_HOST_BRIDGE(s);
529     char *namebuf;
530     int i;
531     PCIBus *bus;
532 
533     if (sphb->index != -1) {
534         hwaddr windows_base;
535 
536         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
537             || (sphb->mem_win_addr != -1)
538             || (sphb->io_win_addr != -1)) {
539             error_setg(errp, "Either \"index\" or other parameters must"
540                        " be specified for PAPR PHB, not both");
541             return;
542         }
543 
544         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
545         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
546 
547         windows_base = SPAPR_PCI_WINDOW_BASE
548             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
549         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
550         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
551     }
552 
553     if (sphb->buid == -1) {
554         error_setg(errp, "BUID not specified for PHB");
555         return;
556     }
557 
558     if (sphb->dma_liobn == -1) {
559         error_setg(errp, "LIOBN not specified for PHB");
560         return;
561     }
562 
563     if (sphb->mem_win_addr == -1) {
564         error_setg(errp, "Memory window address not specified for PHB");
565         return;
566     }
567 
568     if (sphb->io_win_addr == -1) {
569         error_setg(errp, "IO window address not specified for PHB");
570         return;
571     }
572 
573     if (find_phb(spapr, sphb->buid)) {
574         error_setg(errp, "PCI host bridges must have unique BUIDs");
575         return;
576     }
577 
578     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
579 
580     namebuf = alloca(strlen(sphb->dtbusname) + 32);
581 
582     /* Initialize memory regions */
583     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
584     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
585 
586     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
587     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
588                              namebuf, &sphb->memspace,
589                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
590     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
591                                 &sphb->memwindow);
592 
593     /* Initialize IO regions */
594     sprintf(namebuf, "%s.io", sphb->dtbusname);
595     memory_region_init(&sphb->iospace, OBJECT(sphb),
596                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
597 
598     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
599     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
600                              &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
601     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
602                                 &sphb->iowindow);
603 
604     bus = pci_register_bus(dev, NULL,
605                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
606                            &sphb->memspace, &sphb->iospace,
607                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
608     phb->bus = bus;
609 
610     sphb->dma_window_start = 0;
611     sphb->dma_window_size = 0x40000000;
612     sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
613                                      sphb->dma_window_size);
614     if (!sphb->tcet) {
615         error_setg(errp, "Unable to create TCE table for %s",
616                    sphb->dtbusname);
617         return;
618     }
619     address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
620                        sphb->dtbusname);
621 
622     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
623 
624     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
625 
626     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
627 
628     /* Initialize the LSI table */
629     for (i = 0; i < PCI_NUM_PINS; i++) {
630         uint32_t irq;
631 
632         irq = spapr_allocate_lsi(0);
633         if (!irq) {
634             error_setg(errp, "spapr_allocate_lsi failed");
635             return;
636         }
637 
638         sphb->lsi_table[i].irq = irq;
639     }
640 }
641 
642 static void spapr_phb_reset(DeviceState *qdev)
643 {
644     SysBusDevice *s = SYS_BUS_DEVICE(qdev);
645     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
646 
647     /* Reset the IOMMU state */
648     device_reset(DEVICE(sphb->tcet));
649 }
650 
651 static Property spapr_phb_properties[] = {
652     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
653     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
654     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
655     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
656     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
657                        SPAPR_PCI_MMIO_WIN_SIZE),
658     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
659     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
660                        SPAPR_PCI_IO_WIN_SIZE),
661     DEFINE_PROP_END_OF_LIST(),
662 };
663 
664 static const VMStateDescription vmstate_spapr_pci_lsi = {
665     .name = "spapr_pci/lsi",
666     .version_id = 1,
667     .minimum_version_id = 1,
668     .fields = (VMStateField[]) {
669         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
670 
671         VMSTATE_END_OF_LIST()
672     },
673 };
674 
675 static const VMStateDescription vmstate_spapr_pci_msi = {
676     .name = "spapr_pci/lsi",
677     .version_id = 1,
678     .minimum_version_id = 1,
679     .fields = (VMStateField[]) {
680         VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
681         VMSTATE_UINT32(irq, struct spapr_pci_msi),
682         VMSTATE_UINT32(nvec, struct spapr_pci_msi),
683 
684         VMSTATE_END_OF_LIST()
685     },
686 };
687 
688 static const VMStateDescription vmstate_spapr_pci = {
689     .name = "spapr_pci",
690     .version_id = 1,
691     .minimum_version_id = 1,
692     .fields = (VMStateField[]) {
693         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
694         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
695         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
696         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
697         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
698         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
699         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
700                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
701         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
702                              vmstate_spapr_pci_msi, struct spapr_pci_msi),
703 
704         VMSTATE_END_OF_LIST()
705     },
706 };
707 
708 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
709                                            PCIBus *rootbus)
710 {
711     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
712 
713     return sphb->dtbusname;
714 }
715 
716 static void spapr_phb_class_init(ObjectClass *klass, void *data)
717 {
718     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
719     DeviceClass *dc = DEVICE_CLASS(klass);
720 
721     hc->root_bus_path = spapr_phb_root_bus_path;
722     dc->realize = spapr_phb_realize;
723     dc->props = spapr_phb_properties;
724     dc->reset = spapr_phb_reset;
725     dc->vmsd = &vmstate_spapr_pci;
726     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
727     dc->cannot_instantiate_with_device_add_yet = false;
728 }
729 
730 static const TypeInfo spapr_phb_info = {
731     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
732     .parent        = TYPE_PCI_HOST_BRIDGE,
733     .instance_size = sizeof(sPAPRPHBState),
734     .class_init    = spapr_phb_class_init,
735 };
736 
737 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
738 {
739     DeviceState *dev;
740 
741     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
742     qdev_prop_set_uint32(dev, "index", index);
743     qdev_init_nofail(dev);
744 
745     return PCI_HOST_BRIDGE(dev);
746 }
747 
748 /* Macros to operate with address in OF binding to PCI */
749 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
750 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
751 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
752 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
753 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
754 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
755 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
756 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
757 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
758 
759 int spapr_populate_pci_dt(sPAPRPHBState *phb,
760                           uint32_t xics_phandle,
761                           void *fdt)
762 {
763     int bus_off, i, j;
764     char nodename[256];
765     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
766     struct {
767         uint32_t hi;
768         uint64_t child;
769         uint64_t parent;
770         uint64_t size;
771     } QEMU_PACKED ranges[] = {
772         {
773             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
774             cpu_to_be64(phb->io_win_addr),
775             cpu_to_be64(memory_region_size(&phb->iospace)),
776         },
777         {
778             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
779             cpu_to_be64(phb->mem_win_addr),
780             cpu_to_be64(memory_region_size(&phb->memwindow)),
781         },
782     };
783     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
784     uint32_t interrupt_map_mask[] = {
785         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
786     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
787 
788     /* Start populating the FDT */
789     sprintf(nodename, "pci@%" PRIx64, phb->buid);
790     bus_off = fdt_add_subnode(fdt, 0, nodename);
791     if (bus_off < 0) {
792         return bus_off;
793     }
794 
795 #define _FDT(exp) \
796     do { \
797         int ret = (exp);                                           \
798         if (ret < 0) {                                             \
799             return ret;                                            \
800         }                                                          \
801     } while (0)
802 
803     /* Write PHB properties */
804     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
805     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
806     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
807     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
808     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
809     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
810     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
811     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
812     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
813     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
814 
815     /* Build the interrupt-map, this must matches what is done
816      * in pci_spapr_map_irq
817      */
818     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
819                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
820     for (i = 0; i < PCI_SLOT_MAX; i++) {
821         for (j = 0; j < PCI_NUM_PINS; j++) {
822             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
823             int lsi_num = pci_spapr_swizzle(i, j);
824 
825             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
826             irqmap[1] = 0;
827             irqmap[2] = 0;
828             irqmap[3] = cpu_to_be32(j+1);
829             irqmap[4] = cpu_to_be32(xics_phandle);
830             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
831             irqmap[6] = cpu_to_be32(0x8);
832         }
833     }
834     /* Write interrupt map */
835     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
836                      sizeof(interrupt_map)));
837 
838     spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
839                  phb->dma_liobn, phb->dma_window_start,
840                  phb->dma_window_size);
841 
842     return 0;
843 }
844 
845 void spapr_pci_rtas_init(void)
846 {
847     spapr_rtas_register("read-pci-config", rtas_read_pci_config);
848     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
849     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
850     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
851     if (msi_supported) {
852         spapr_rtas_register("ibm,query-interrupt-source-number",
853                             rtas_ibm_query_interrupt_source_number);
854         spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
855     }
856 }
857 
858 static void spapr_pci_register_types(void)
859 {
860     type_register_static(&spapr_phb_info);
861 }
862 
863 type_init(spapr_pci_register_types)
864