xref: /qemu/hw/ppc/spapr_pci.c (revision bae3f92a)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 
36 #include "hw/pci/pci_bus.h"
37 
38 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
39 #define RTAS_QUERY_FN           0
40 #define RTAS_CHANGE_FN          1
41 #define RTAS_RESET_FN           2
42 #define RTAS_CHANGE_MSI_FN      3
43 #define RTAS_CHANGE_MSIX_FN     4
44 
45 /* Interrupt types to return on RTAS_CHANGE_* */
46 #define RTAS_TYPE_MSI           1
47 #define RTAS_TYPE_MSIX          2
48 
49 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
50 {
51     sPAPRPHBState *sphb;
52 
53     QLIST_FOREACH(sphb, &spapr->phbs, list) {
54         if (sphb->buid != buid) {
55             continue;
56         }
57         return sphb;
58     }
59 
60     return NULL;
61 }
62 
63 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
64                            uint32_t config_addr)
65 {
66     sPAPRPHBState *sphb = find_phb(spapr, buid);
67     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
68     int bus_num = (config_addr >> 16) & 0xFF;
69     int devfn = (config_addr >> 8) & 0xFF;
70 
71     if (!phb) {
72         return NULL;
73     }
74 
75     return pci_find_device(phb->bus, bus_num, devfn);
76 }
77 
78 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
79 {
80     /* This handles the encoding of extended config space addresses */
81     return ((arg >> 20) & 0xf00) | (arg & 0xff);
82 }
83 
84 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
85                                    uint32_t addr, uint32_t size,
86                                    target_ulong rets)
87 {
88     PCIDevice *pci_dev;
89     uint32_t val;
90 
91     if ((size != 1) && (size != 2) && (size != 4)) {
92         /* access must be 1, 2 or 4 bytes */
93         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
94         return;
95     }
96 
97     pci_dev = find_dev(spapr, buid, addr);
98     addr = rtas_pci_cfgaddr(addr);
99 
100     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
101         /* Access must be to a valid device, within bounds and
102          * naturally aligned */
103         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
104         return;
105     }
106 
107     val = pci_host_config_read_common(pci_dev, addr,
108                                       pci_config_size(pci_dev), size);
109 
110     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
111     rtas_st(rets, 1, val);
112 }
113 
114 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
115                                      uint32_t token, uint32_t nargs,
116                                      target_ulong args,
117                                      uint32_t nret, target_ulong rets)
118 {
119     uint64_t buid;
120     uint32_t size, addr;
121 
122     if ((nargs != 4) || (nret != 2)) {
123         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
124         return;
125     }
126 
127     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
128     size = rtas_ld(args, 3);
129     addr = rtas_ld(args, 0);
130 
131     finish_read_pci_config(spapr, buid, addr, size, rets);
132 }
133 
134 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
135                                  uint32_t token, uint32_t nargs,
136                                  target_ulong args,
137                                  uint32_t nret, target_ulong rets)
138 {
139     uint32_t size, addr;
140 
141     if ((nargs != 2) || (nret != 2)) {
142         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
143         return;
144     }
145 
146     size = rtas_ld(args, 1);
147     addr = rtas_ld(args, 0);
148 
149     finish_read_pci_config(spapr, 0, addr, size, rets);
150 }
151 
152 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
153                                     uint32_t addr, uint32_t size,
154                                     uint32_t val, target_ulong rets)
155 {
156     PCIDevice *pci_dev;
157 
158     if ((size != 1) && (size != 2) && (size != 4)) {
159         /* access must be 1, 2 or 4 bytes */
160         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
161         return;
162     }
163 
164     pci_dev = find_dev(spapr, buid, addr);
165     addr = rtas_pci_cfgaddr(addr);
166 
167     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
168         /* Access must be to a valid device, within bounds and
169          * naturally aligned */
170         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
171         return;
172     }
173 
174     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
175                                  val, size);
176 
177     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
178 }
179 
180 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
181                                       uint32_t token, uint32_t nargs,
182                                       target_ulong args,
183                                       uint32_t nret, target_ulong rets)
184 {
185     uint64_t buid;
186     uint32_t val, size, addr;
187 
188     if ((nargs != 5) || (nret != 1)) {
189         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
190         return;
191     }
192 
193     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
194     val = rtas_ld(args, 4);
195     size = rtas_ld(args, 3);
196     addr = rtas_ld(args, 0);
197 
198     finish_write_pci_config(spapr, buid, addr, size, val, rets);
199 }
200 
201 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
202                                   uint32_t token, uint32_t nargs,
203                                   target_ulong args,
204                                   uint32_t nret, target_ulong rets)
205 {
206     uint32_t val, size, addr;
207 
208     if ((nargs != 3) || (nret != 1)) {
209         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
210         return;
211     }
212 
213 
214     val = rtas_ld(args, 2);
215     size = rtas_ld(args, 1);
216     addr = rtas_ld(args, 0);
217 
218     finish_write_pci_config(spapr, 0, addr, size, val, rets);
219 }
220 
221 /*
222  * Find an entry with config_addr or returns the empty one if not found AND
223  * alloc_new is set.
224  * At the moment the msi_table entries are never released so there is
225  * no point to look till the end of the list if we need to find the free entry.
226  */
227 static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
228                              bool alloc_new)
229 {
230     int i;
231 
232     for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
233         if (!phb->msi_table[i].nvec) {
234             break;
235         }
236         if (phb->msi_table[i].config_addr == config_addr) {
237             return i;
238         }
239     }
240     if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
241         trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
242         return i;
243     }
244 
245     return -1;
246 }
247 
248 /*
249  * Set MSI/MSIX message data.
250  * This is required for msi_notify()/msix_notify() which
251  * will write at the addresses via spapr_msi_write().
252  */
253 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
254                              unsigned first_irq, unsigned req_num)
255 {
256     unsigned i;
257     MSIMessage msg = { .address = addr, .data = first_irq };
258 
259     if (!msix) {
260         msi_set_message(pdev, msg);
261         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
262         return;
263     }
264 
265     for (i = 0; i < req_num; ++i, ++msg.data) {
266         msix_set_message(pdev, i, msg);
267         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
268     }
269 }
270 
271 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
272                                 uint32_t token, uint32_t nargs,
273                                 target_ulong args, uint32_t nret,
274                                 target_ulong rets)
275 {
276     uint32_t config_addr = rtas_ld(args, 0);
277     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
278     unsigned int func = rtas_ld(args, 3);
279     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
280     unsigned int seq_num = rtas_ld(args, 5);
281     unsigned int ret_intr_type;
282     int ndev, irq;
283     sPAPRPHBState *phb = NULL;
284     PCIDevice *pdev = NULL;
285 
286     switch (func) {
287     case RTAS_CHANGE_MSI_FN:
288     case RTAS_CHANGE_FN:
289         ret_intr_type = RTAS_TYPE_MSI;
290         break;
291     case RTAS_CHANGE_MSIX_FN:
292         ret_intr_type = RTAS_TYPE_MSIX;
293         break;
294     default:
295         fprintf(stderr, "rtas_ibm_change_msi(%u) is not implemented\n", func);
296         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
297         return;
298     }
299 
300     /* Fins sPAPRPHBState */
301     phb = find_phb(spapr, buid);
302     if (phb) {
303         pdev = find_dev(spapr, buid, config_addr);
304     }
305     if (!phb || !pdev) {
306         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
307         return;
308     }
309 
310     /* Releasing MSIs */
311     if (!req_num) {
312         ndev = spapr_msicfg_find(phb, config_addr, false);
313         if (ndev < 0) {
314             trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
315             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
316             return;
317         }
318         trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
319         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
320         rtas_st(rets, 1, 0);
321         return;
322     }
323 
324     /* Enabling MSI */
325 
326     /* Find a device number in the map to add or reuse the existing one */
327     ndev = spapr_msicfg_find(phb, config_addr, true);
328     if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
329         fprintf(stderr, "No free entry for a new MSI device\n");
330         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
331         return;
332     }
333     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
334 
335     /* Check if there is an old config and MSI number has not changed */
336     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
337         /* Unexpected behaviour */
338         fprintf(stderr, "Cannot reuse MSI config for device#%d", ndev);
339         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
340         return;
341     }
342 
343     /* There is no cached config, allocate MSIs */
344     if (!phb->msi_table[ndev].nvec) {
345         irq = spapr_allocate_irq_block(req_num, false,
346                                        ret_intr_type == RTAS_TYPE_MSI);
347         if (irq < 0) {
348             fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
349             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
350             return;
351         }
352         phb->msi_table[ndev].irq = irq;
353         phb->msi_table[ndev].nvec = req_num;
354         phb->msi_table[ndev].config_addr = config_addr;
355     }
356 
357     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
358     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
359                      phb->msi_table[ndev].irq, req_num);
360 
361     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
362     rtas_st(rets, 1, req_num);
363     rtas_st(rets, 2, ++seq_num);
364     rtas_st(rets, 3, ret_intr_type);
365 
366     trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
367 }
368 
369 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
370                                                    sPAPREnvironment *spapr,
371                                                    uint32_t token,
372                                                    uint32_t nargs,
373                                                    target_ulong args,
374                                                    uint32_t nret,
375                                                    target_ulong rets)
376 {
377     uint32_t config_addr = rtas_ld(args, 0);
378     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
379     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
380     int ndev;
381     sPAPRPHBState *phb = NULL;
382 
383     /* Fins sPAPRPHBState */
384     phb = find_phb(spapr, buid);
385     if (!phb) {
386         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
387         return;
388     }
389 
390     /* Find device descriptor and start IRQ */
391     ndev = spapr_msicfg_find(phb, config_addr, false);
392     if (ndev < 0) {
393         trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
394         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
395         return;
396     }
397 
398     intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
399     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
400                                                            intr_src_num);
401 
402     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
403     rtas_st(rets, 1, intr_src_num);
404     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
405 }
406 
407 static int pci_spapr_swizzle(int slot, int pin)
408 {
409     return (slot + pin) % PCI_NUM_PINS;
410 }
411 
412 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
413 {
414     /*
415      * Here we need to convert pci_dev + irq_num to some unique value
416      * which is less than number of IRQs on the specific bus (4).  We
417      * use standard PCI swizzling, that is (slot number + pin number)
418      * % 4.
419      */
420     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
421 }
422 
423 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
424 {
425     /*
426      * Here we use the number returned by pci_spapr_map_irq to find a
427      * corresponding qemu_irq.
428      */
429     sPAPRPHBState *phb = opaque;
430 
431     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
432     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
433 }
434 
435 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
436 {
437     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
438     PCIINTxRoute route;
439 
440     route.mode = PCI_INTX_ENABLED;
441     route.irq = sphb->lsi_table[pin].irq;
442 
443     return route;
444 }
445 
446 /*
447  * MSI/MSIX memory region implementation.
448  * The handler handles both MSI and MSIX.
449  * For MSI-X, the vector number is encoded as a part of the address,
450  * data is set to 0.
451  * For MSI, the vector number is encoded in least bits in data.
452  */
453 static void spapr_msi_write(void *opaque, hwaddr addr,
454                             uint64_t data, unsigned size)
455 {
456     uint32_t irq = data;
457 
458     trace_spapr_pci_msi_write(addr, data, irq);
459 
460     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
461 }
462 
463 static const MemoryRegionOps spapr_msi_ops = {
464     /* There is no .read as the read result is undefined by PCI spec */
465     .read = NULL,
466     .write = spapr_msi_write,
467     .endianness = DEVICE_LITTLE_ENDIAN
468 };
469 
470 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
471 {
472     /*
473      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
474      * we need to allocate some memory to catch those writes coming
475      * from msi_notify()/msix_notify().
476      * As MSIMessage:addr is going to be the same and MSIMessage:data
477      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
478      * be used.
479      */
480     spapr->msi_win_addr = addr;
481     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
482                           "msi", getpagesize());
483     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
484                                 &spapr->msiwindow);
485 }
486 
487 /*
488  * PHB PCI device
489  */
490 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
491 {
492     sPAPRPHBState *phb = opaque;
493 
494     return &phb->iommu_as;
495 }
496 
497 static int spapr_phb_init(SysBusDevice *s)
498 {
499     DeviceState *dev = DEVICE(s);
500     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
501     PCIHostState *phb = PCI_HOST_BRIDGE(s);
502     const char *busname;
503     char *namebuf;
504     int i;
505     PCIBus *bus;
506 
507     if (sphb->index != -1) {
508         hwaddr windows_base;
509 
510         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
511             || (sphb->mem_win_addr != -1)
512             || (sphb->io_win_addr != -1)) {
513             fprintf(stderr, "Either \"index\" or other parameters must"
514                     " be specified for PAPR PHB, not both\n");
515             return -1;
516         }
517 
518         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
519         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
520 
521         windows_base = SPAPR_PCI_WINDOW_BASE
522             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
523         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
524         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
525     }
526 
527     if (sphb->buid == -1) {
528         fprintf(stderr, "BUID not specified for PHB\n");
529         return -1;
530     }
531 
532     if (sphb->dma_liobn == -1) {
533         fprintf(stderr, "LIOBN not specified for PHB\n");
534         return -1;
535     }
536 
537     if (sphb->mem_win_addr == -1) {
538         fprintf(stderr, "Memory window address not specified for PHB\n");
539         return -1;
540     }
541 
542     if (sphb->io_win_addr == -1) {
543         fprintf(stderr, "IO window address not specified for PHB\n");
544         return -1;
545     }
546 
547     if (find_phb(spapr, sphb->buid)) {
548         fprintf(stderr, "PCI host bridges must have unique BUIDs\n");
549         return -1;
550     }
551 
552     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
553 
554     namebuf = alloca(strlen(sphb->dtbusname) + 32);
555 
556     /* Initialize memory regions */
557     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
558     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
559 
560     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
561     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
562                              namebuf, &sphb->memspace,
563                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
564     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
565                                 &sphb->memwindow);
566 
567     /* On ppc, we only have MMIO no specific IO space from the CPU
568      * perspective.  In theory we ought to be able to embed the PCI IO
569      * memory region direction in the system memory space.  However,
570      * if any of the IO BAR subregions use the old_portio mechanism,
571      * that won't be processed properly unless accessed from the
572      * system io address space.  This hack to bounce things via
573      * system_io works around the problem until all the users of
574      * old_portion are updated */
575     sprintf(namebuf, "%s.io", sphb->dtbusname);
576     memory_region_init(&sphb->iospace, OBJECT(sphb),
577                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
578     /* FIXME: fix to support multiple PHBs */
579     memory_region_add_subregion(get_system_io(), 0, &sphb->iospace);
580 
581     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
582     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
583                              get_system_io(), 0, SPAPR_PCI_IO_WIN_SIZE);
584     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
585                                 &sphb->iowindow);
586     /*
587      * Selecting a busname is more complex than you'd think, due to
588      * interacting constraints.  If the user has specified an id
589      * explicitly for the phb , then we want to use the qdev default
590      * of naming the bus based on the bridge device (so the user can
591      * then assign devices to it in the way they expect).  For the
592      * first / default PCI bus (index=0) we want to use just "pci"
593      * because libvirt expects there to be a bus called, simply,
594      * "pci".  Otherwise, we use the same name as in the device tree,
595      * since it's unique by construction, and makes the guest visible
596      * BUID clear.
597      */
598     if (dev->id) {
599         busname = NULL;
600     } else if (sphb->index == 0) {
601         busname = "pci";
602     } else {
603         busname = sphb->dtbusname;
604     }
605     bus = pci_register_bus(dev, busname,
606                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
607                            &sphb->memspace, &sphb->iospace,
608                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
609     phb->bus = bus;
610 
611     sphb->dma_window_start = 0;
612     sphb->dma_window_size = 0x40000000;
613     sphb->tcet = spapr_tce_new_table(dev, sphb->dma_liobn,
614                                      sphb->dma_window_size);
615     if (!sphb->tcet) {
616         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
617         return -1;
618     }
619     address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
620                        sphb->dtbusname);
621 
622     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
623 
624     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
625 
626     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
627 
628     /* Initialize the LSI table */
629     for (i = 0; i < PCI_NUM_PINS; i++) {
630         uint32_t irq;
631 
632         irq = spapr_allocate_lsi(0);
633         if (!irq) {
634             return -1;
635         }
636 
637         sphb->lsi_table[i].irq = irq;
638     }
639 
640     return 0;
641 }
642 
643 static void spapr_phb_reset(DeviceState *qdev)
644 {
645     SysBusDevice *s = SYS_BUS_DEVICE(qdev);
646     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
647 
648     /* Reset the IOMMU state */
649     device_reset(DEVICE(sphb->tcet));
650 }
651 
652 static Property spapr_phb_properties[] = {
653     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
654     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
655     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
656     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
657     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
658                        SPAPR_PCI_MMIO_WIN_SIZE),
659     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
660     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
661                        SPAPR_PCI_IO_WIN_SIZE),
662     DEFINE_PROP_END_OF_LIST(),
663 };
664 
665 static const VMStateDescription vmstate_spapr_pci_lsi = {
666     .name = "spapr_pci/lsi",
667     .version_id = 1,
668     .minimum_version_id = 1,
669     .minimum_version_id_old = 1,
670     .fields      = (VMStateField []) {
671         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
672 
673         VMSTATE_END_OF_LIST()
674     },
675 };
676 
677 static const VMStateDescription vmstate_spapr_pci_msi = {
678     .name = "spapr_pci/lsi",
679     .version_id = 1,
680     .minimum_version_id = 1,
681     .minimum_version_id_old = 1,
682     .fields      = (VMStateField []) {
683         VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
684         VMSTATE_UINT32(irq, struct spapr_pci_msi),
685         VMSTATE_UINT32(nvec, struct spapr_pci_msi),
686 
687         VMSTATE_END_OF_LIST()
688     },
689 };
690 
691 static const VMStateDescription vmstate_spapr_pci = {
692     .name = "spapr_pci",
693     .version_id = 1,
694     .minimum_version_id = 1,
695     .minimum_version_id_old = 1,
696     .fields      = (VMStateField []) {
697         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
698         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
699         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
700         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
701         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
702         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
703         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
704                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
705         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
706                              vmstate_spapr_pci_msi, struct spapr_pci_msi),
707 
708         VMSTATE_END_OF_LIST()
709     },
710 };
711 
712 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
713                                            PCIBus *rootbus)
714 {
715     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
716 
717     return sphb->dtbusname;
718 }
719 
720 static void spapr_phb_class_init(ObjectClass *klass, void *data)
721 {
722     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
723     SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
724     DeviceClass *dc = DEVICE_CLASS(klass);
725 
726     hc->root_bus_path = spapr_phb_root_bus_path;
727     sdc->init = spapr_phb_init;
728     dc->props = spapr_phb_properties;
729     dc->reset = spapr_phb_reset;
730     dc->vmsd = &vmstate_spapr_pci;
731 }
732 
733 static const TypeInfo spapr_phb_info = {
734     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
735     .parent        = TYPE_PCI_HOST_BRIDGE,
736     .instance_size = sizeof(sPAPRPHBState),
737     .class_init    = spapr_phb_class_init,
738 };
739 
740 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
741 {
742     DeviceState *dev;
743 
744     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
745     qdev_prop_set_uint32(dev, "index", index);
746     qdev_init_nofail(dev);
747 
748     return PCI_HOST_BRIDGE(dev);
749 }
750 
751 /* Macros to operate with address in OF binding to PCI */
752 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
753 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
754 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
755 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
756 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
757 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
758 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
759 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
760 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
761 
762 int spapr_populate_pci_dt(sPAPRPHBState *phb,
763                           uint32_t xics_phandle,
764                           void *fdt)
765 {
766     int bus_off, i, j;
767     char nodename[256];
768     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
769     struct {
770         uint32_t hi;
771         uint64_t child;
772         uint64_t parent;
773         uint64_t size;
774     } QEMU_PACKED ranges[] = {
775         {
776             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
777             cpu_to_be64(phb->io_win_addr),
778             cpu_to_be64(memory_region_size(&phb->iospace)),
779         },
780         {
781             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
782             cpu_to_be64(phb->mem_win_addr),
783             cpu_to_be64(memory_region_size(&phb->memwindow)),
784         },
785     };
786     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
787     uint32_t interrupt_map_mask[] = {
788         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
789     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
790 
791     /* Start populating the FDT */
792     sprintf(nodename, "pci@%" PRIx64, phb->buid);
793     bus_off = fdt_add_subnode(fdt, 0, nodename);
794     if (bus_off < 0) {
795         return bus_off;
796     }
797 
798 #define _FDT(exp) \
799     do { \
800         int ret = (exp);                                           \
801         if (ret < 0) {                                             \
802             return ret;                                            \
803         }                                                          \
804     } while (0)
805 
806     /* Write PHB properties */
807     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
808     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
809     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
810     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
811     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
812     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
813     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
814     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
815     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
816     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
817 
818     /* Build the interrupt-map, this must matches what is done
819      * in pci_spapr_map_irq
820      */
821     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
822                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
823     for (i = 0; i < PCI_SLOT_MAX; i++) {
824         for (j = 0; j < PCI_NUM_PINS; j++) {
825             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
826             int lsi_num = pci_spapr_swizzle(i, j);
827 
828             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
829             irqmap[1] = 0;
830             irqmap[2] = 0;
831             irqmap[3] = cpu_to_be32(j+1);
832             irqmap[4] = cpu_to_be32(xics_phandle);
833             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
834             irqmap[6] = cpu_to_be32(0x8);
835         }
836     }
837     /* Write interrupt map */
838     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
839                      sizeof(interrupt_map)));
840 
841     spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
842                  phb->dma_liobn, phb->dma_window_start,
843                  phb->dma_window_size);
844 
845     return 0;
846 }
847 
848 void spapr_pci_rtas_init(void)
849 {
850     spapr_rtas_register("read-pci-config", rtas_read_pci_config);
851     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
852     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
853     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
854     if (msi_supported) {
855         spapr_rtas_register("ibm,query-interrupt-source-number",
856                             rtas_ibm_query_interrupt_source_number);
857         spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
858     }
859 }
860 
861 static void spapr_pci_register_types(void)
862 {
863     type_register_static(&spapr_phb_info);
864 }
865 
866 type_init(spapr_pci_register_types)
867