xref: /qemu/hw/ppc/spapr_pci.c (revision d5932334)
1 /*
2  * QEMU sPAPR PCI host originated from Uninorth PCI host
3  *
4  * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
5  * Copyright (C) 2011 David Gibson, IBM Corporation.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 #include "hw/hw.h"
26 #include "hw/pci/pci.h"
27 #include "hw/pci/msi.h"
28 #include "hw/pci/msix.h"
29 #include "hw/pci/pci_host.h"
30 #include "hw/ppc/spapr.h"
31 #include "hw/pci-host/spapr.h"
32 #include "exec/address-spaces.h"
33 #include <libfdt.h>
34 #include "trace.h"
35 #include "qemu/error-report.h"
36 
37 #include "hw/pci/pci_bus.h"
38 
39 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
40 #define RTAS_QUERY_FN           0
41 #define RTAS_CHANGE_FN          1
42 #define RTAS_RESET_FN           2
43 #define RTAS_CHANGE_MSI_FN      3
44 #define RTAS_CHANGE_MSIX_FN     4
45 
46 /* Interrupt types to return on RTAS_CHANGE_* */
47 #define RTAS_TYPE_MSI           1
48 #define RTAS_TYPE_MSIX          2
49 
50 static sPAPRPHBState *find_phb(sPAPREnvironment *spapr, uint64_t buid)
51 {
52     sPAPRPHBState *sphb;
53 
54     QLIST_FOREACH(sphb, &spapr->phbs, list) {
55         if (sphb->buid != buid) {
56             continue;
57         }
58         return sphb;
59     }
60 
61     return NULL;
62 }
63 
64 static PCIDevice *find_dev(sPAPREnvironment *spapr, uint64_t buid,
65                            uint32_t config_addr)
66 {
67     sPAPRPHBState *sphb = find_phb(spapr, buid);
68     PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
69     int bus_num = (config_addr >> 16) & 0xFF;
70     int devfn = (config_addr >> 8) & 0xFF;
71 
72     if (!phb) {
73         return NULL;
74     }
75 
76     return pci_find_device(phb->bus, bus_num, devfn);
77 }
78 
79 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
80 {
81     /* This handles the encoding of extended config space addresses */
82     return ((arg >> 20) & 0xf00) | (arg & 0xff);
83 }
84 
85 static void finish_read_pci_config(sPAPREnvironment *spapr, uint64_t buid,
86                                    uint32_t addr, uint32_t size,
87                                    target_ulong rets)
88 {
89     PCIDevice *pci_dev;
90     uint32_t val;
91 
92     if ((size != 1) && (size != 2) && (size != 4)) {
93         /* access must be 1, 2 or 4 bytes */
94         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
95         return;
96     }
97 
98     pci_dev = find_dev(spapr, buid, addr);
99     addr = rtas_pci_cfgaddr(addr);
100 
101     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
102         /* Access must be to a valid device, within bounds and
103          * naturally aligned */
104         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
105         return;
106     }
107 
108     val = pci_host_config_read_common(pci_dev, addr,
109                                       pci_config_size(pci_dev), size);
110 
111     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
112     rtas_st(rets, 1, val);
113 }
114 
115 static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
116                                      uint32_t token, uint32_t nargs,
117                                      target_ulong args,
118                                      uint32_t nret, target_ulong rets)
119 {
120     uint64_t buid;
121     uint32_t size, addr;
122 
123     if ((nargs != 4) || (nret != 2)) {
124         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
125         return;
126     }
127 
128     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
129     size = rtas_ld(args, 3);
130     addr = rtas_ld(args, 0);
131 
132     finish_read_pci_config(spapr, buid, addr, size, rets);
133 }
134 
135 static void rtas_read_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
136                                  uint32_t token, uint32_t nargs,
137                                  target_ulong args,
138                                  uint32_t nret, target_ulong rets)
139 {
140     uint32_t size, addr;
141 
142     if ((nargs != 2) || (nret != 2)) {
143         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
144         return;
145     }
146 
147     size = rtas_ld(args, 1);
148     addr = rtas_ld(args, 0);
149 
150     finish_read_pci_config(spapr, 0, addr, size, rets);
151 }
152 
153 static void finish_write_pci_config(sPAPREnvironment *spapr, uint64_t buid,
154                                     uint32_t addr, uint32_t size,
155                                     uint32_t val, target_ulong rets)
156 {
157     PCIDevice *pci_dev;
158 
159     if ((size != 1) && (size != 2) && (size != 4)) {
160         /* access must be 1, 2 or 4 bytes */
161         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
162         return;
163     }
164 
165     pci_dev = find_dev(spapr, buid, addr);
166     addr = rtas_pci_cfgaddr(addr);
167 
168     if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
169         /* Access must be to a valid device, within bounds and
170          * naturally aligned */
171         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
172         return;
173     }
174 
175     pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
176                                  val, size);
177 
178     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
179 }
180 
181 static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
182                                       uint32_t token, uint32_t nargs,
183                                       target_ulong args,
184                                       uint32_t nret, target_ulong rets)
185 {
186     uint64_t buid;
187     uint32_t val, size, addr;
188 
189     if ((nargs != 5) || (nret != 1)) {
190         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
191         return;
192     }
193 
194     buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
195     val = rtas_ld(args, 4);
196     size = rtas_ld(args, 3);
197     addr = rtas_ld(args, 0);
198 
199     finish_write_pci_config(spapr, buid, addr, size, val, rets);
200 }
201 
202 static void rtas_write_pci_config(PowerPCCPU *cpu, sPAPREnvironment *spapr,
203                                   uint32_t token, uint32_t nargs,
204                                   target_ulong args,
205                                   uint32_t nret, target_ulong rets)
206 {
207     uint32_t val, size, addr;
208 
209     if ((nargs != 3) || (nret != 1)) {
210         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
211         return;
212     }
213 
214 
215     val = rtas_ld(args, 2);
216     size = rtas_ld(args, 1);
217     addr = rtas_ld(args, 0);
218 
219     finish_write_pci_config(spapr, 0, addr, size, val, rets);
220 }
221 
222 /*
223  * Find an entry with config_addr or returns the empty one if not found AND
224  * alloc_new is set.
225  * At the moment the msi_table entries are never released so there is
226  * no point to look till the end of the list if we need to find the free entry.
227  */
228 static int spapr_msicfg_find(sPAPRPHBState *phb, uint32_t config_addr,
229                              bool alloc_new)
230 {
231     int i;
232 
233     for (i = 0; i < SPAPR_MSIX_MAX_DEVS; ++i) {
234         if (!phb->msi_table[i].nvec) {
235             break;
236         }
237         if (phb->msi_table[i].config_addr == config_addr) {
238             return i;
239         }
240     }
241     if ((i < SPAPR_MSIX_MAX_DEVS) && alloc_new) {
242         trace_spapr_pci_msi("Allocating new MSI config", i, config_addr);
243         return i;
244     }
245 
246     return -1;
247 }
248 
249 /*
250  * Set MSI/MSIX message data.
251  * This is required for msi_notify()/msix_notify() which
252  * will write at the addresses via spapr_msi_write().
253  */
254 static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
255                              unsigned first_irq, unsigned req_num)
256 {
257     unsigned i;
258     MSIMessage msg = { .address = addr, .data = first_irq };
259 
260     if (!msix) {
261         msi_set_message(pdev, msg);
262         trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
263         return;
264     }
265 
266     for (i = 0; i < req_num; ++i, ++msg.data) {
267         msix_set_message(pdev, i, msg);
268         trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
269     }
270 }
271 
272 static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
273                                 uint32_t token, uint32_t nargs,
274                                 target_ulong args, uint32_t nret,
275                                 target_ulong rets)
276 {
277     uint32_t config_addr = rtas_ld(args, 0);
278     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
279     unsigned int func = rtas_ld(args, 3);
280     unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
281     unsigned int seq_num = rtas_ld(args, 5);
282     unsigned int ret_intr_type;
283     int ndev, irq, max_irqs = 0;
284     sPAPRPHBState *phb = NULL;
285     PCIDevice *pdev = NULL;
286 
287     switch (func) {
288     case RTAS_CHANGE_MSI_FN:
289     case RTAS_CHANGE_FN:
290         ret_intr_type = RTAS_TYPE_MSI;
291         break;
292     case RTAS_CHANGE_MSIX_FN:
293         ret_intr_type = RTAS_TYPE_MSIX;
294         break;
295     default:
296         error_report("rtas_ibm_change_msi(%u) is not implemented", func);
297         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
298         return;
299     }
300 
301     /* Fins sPAPRPHBState */
302     phb = find_phb(spapr, buid);
303     if (phb) {
304         pdev = find_dev(spapr, buid, config_addr);
305     }
306     if (!phb || !pdev) {
307         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
308         return;
309     }
310 
311     /* Releasing MSIs */
312     if (!req_num) {
313         ndev = spapr_msicfg_find(phb, config_addr, false);
314         if (ndev < 0) {
315             trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
316             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
317             return;
318         }
319         trace_spapr_pci_msi("Released MSIs", ndev, config_addr);
320         rtas_st(rets, 0, RTAS_OUT_SUCCESS);
321         rtas_st(rets, 1, 0);
322         return;
323     }
324 
325     /* Enabling MSI */
326 
327     /* Find a device number in the map to add or reuse the existing one */
328     ndev = spapr_msicfg_find(phb, config_addr, true);
329     if (ndev >= SPAPR_MSIX_MAX_DEVS || ndev < 0) {
330         error_report("No free entry for a new MSI device");
331         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
332         return;
333     }
334     trace_spapr_pci_msi("Configuring MSI", ndev, config_addr);
335 
336     /* Check if the device supports as many IRQs as requested */
337     if (ret_intr_type == RTAS_TYPE_MSI) {
338         max_irqs = msi_nr_vectors_allocated(pdev);
339     } else if (ret_intr_type == RTAS_TYPE_MSIX) {
340         max_irqs = pdev->msix_entries_nr;
341     }
342     if (!max_irqs) {
343         error_report("Requested interrupt type %d is not enabled for device#%d",
344                      ret_intr_type, ndev);
345         rtas_st(rets, 0, -1); /* Hardware error */
346         return;
347     }
348     /* Correct the number if the guest asked for too many */
349     if (req_num > max_irqs) {
350         req_num = max_irqs;
351     }
352 
353     /* Check if there is an old config and MSI number has not changed */
354     if (phb->msi_table[ndev].nvec && (req_num != phb->msi_table[ndev].nvec)) {
355         /* Unexpected behaviour */
356         error_report("Cannot reuse MSI config for device#%d", ndev);
357         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
358         return;
359     }
360 
361     /* There is no cached config, allocate MSIs */
362     if (!phb->msi_table[ndev].nvec) {
363         irq = spapr_allocate_irq_block(req_num, false,
364                                        ret_intr_type == RTAS_TYPE_MSI);
365         if (irq < 0) {
366             error_report("Cannot allocate MSIs for device#%d", ndev);
367             rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
368             return;
369         }
370         phb->msi_table[ndev].irq = irq;
371         phb->msi_table[ndev].nvec = req_num;
372         phb->msi_table[ndev].config_addr = config_addr;
373     }
374 
375     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
376     spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
377                      phb->msi_table[ndev].irq, req_num);
378 
379     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
380     rtas_st(rets, 1, req_num);
381     rtas_st(rets, 2, ++seq_num);
382     rtas_st(rets, 3, ret_intr_type);
383 
384     trace_spapr_pci_rtas_ibm_change_msi(func, req_num);
385 }
386 
387 static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
388                                                    sPAPREnvironment *spapr,
389                                                    uint32_t token,
390                                                    uint32_t nargs,
391                                                    target_ulong args,
392                                                    uint32_t nret,
393                                                    target_ulong rets)
394 {
395     uint32_t config_addr = rtas_ld(args, 0);
396     uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
397     unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
398     int ndev;
399     sPAPRPHBState *phb = NULL;
400 
401     /* Fins sPAPRPHBState */
402     phb = find_phb(spapr, buid);
403     if (!phb) {
404         rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
405         return;
406     }
407 
408     /* Find device descriptor and start IRQ */
409     ndev = spapr_msicfg_find(phb, config_addr, false);
410     if (ndev < 0) {
411         trace_spapr_pci_msi("MSI has not been enabled", -1, config_addr);
412         rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
413         return;
414     }
415 
416     intr_src_num = phb->msi_table[ndev].irq + ioa_intr_num;
417     trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
418                                                            intr_src_num);
419 
420     rtas_st(rets, 0, RTAS_OUT_SUCCESS);
421     rtas_st(rets, 1, intr_src_num);
422     rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
423 }
424 
425 static int pci_spapr_swizzle(int slot, int pin)
426 {
427     return (slot + pin) % PCI_NUM_PINS;
428 }
429 
430 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
431 {
432     /*
433      * Here we need to convert pci_dev + irq_num to some unique value
434      * which is less than number of IRQs on the specific bus (4).  We
435      * use standard PCI swizzling, that is (slot number + pin number)
436      * % 4.
437      */
438     return pci_spapr_swizzle(PCI_SLOT(pci_dev->devfn), irq_num);
439 }
440 
441 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
442 {
443     /*
444      * Here we use the number returned by pci_spapr_map_irq to find a
445      * corresponding qemu_irq.
446      */
447     sPAPRPHBState *phb = opaque;
448 
449     trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
450     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
451 }
452 
453 static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
454 {
455     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
456     PCIINTxRoute route;
457 
458     route.mode = PCI_INTX_ENABLED;
459     route.irq = sphb->lsi_table[pin].irq;
460 
461     return route;
462 }
463 
464 /*
465  * MSI/MSIX memory region implementation.
466  * The handler handles both MSI and MSIX.
467  * For MSI-X, the vector number is encoded as a part of the address,
468  * data is set to 0.
469  * For MSI, the vector number is encoded in least bits in data.
470  */
471 static void spapr_msi_write(void *opaque, hwaddr addr,
472                             uint64_t data, unsigned size)
473 {
474     uint32_t irq = data;
475 
476     trace_spapr_pci_msi_write(addr, data, irq);
477 
478     qemu_irq_pulse(xics_get_qirq(spapr->icp, irq));
479 }
480 
481 static const MemoryRegionOps spapr_msi_ops = {
482     /* There is no .read as the read result is undefined by PCI spec */
483     .read = NULL,
484     .write = spapr_msi_write,
485     .endianness = DEVICE_LITTLE_ENDIAN
486 };
487 
488 void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
489 {
490     uint64_t window_size = 4096;
491 
492     /*
493      * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
494      * we need to allocate some memory to catch those writes coming
495      * from msi_notify()/msix_notify().
496      * As MSIMessage:addr is going to be the same and MSIMessage:data
497      * is going to be a VIRQ number, 4 bytes of the MSI MR will only
498      * be used.
499      *
500      * For KVM we want to ensure that this memory is a full page so that
501      * our memory slot is of page size granularity.
502      */
503 #ifdef CONFIG_KVM
504     if (kvm_enabled()) {
505         window_size = getpagesize();
506     }
507 #endif
508 
509     spapr->msi_win_addr = addr;
510     memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
511                           "msi", window_size);
512     memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
513                                 &spapr->msiwindow);
514 }
515 
516 /*
517  * PHB PCI device
518  */
519 static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
520 {
521     sPAPRPHBState *phb = opaque;
522 
523     return &phb->iommu_as;
524 }
525 
526 static void spapr_phb_realize(DeviceState *dev, Error **errp)
527 {
528     SysBusDevice *s = SYS_BUS_DEVICE(dev);
529     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
530     PCIHostState *phb = PCI_HOST_BRIDGE(s);
531     sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
532     char *namebuf;
533     int i;
534     PCIBus *bus;
535 
536     if (sphb->index != -1) {
537         hwaddr windows_base;
538 
539         if ((sphb->buid != -1) || (sphb->dma_liobn != -1)
540             || (sphb->mem_win_addr != -1)
541             || (sphb->io_win_addr != -1)) {
542             error_setg(errp, "Either \"index\" or other parameters must"
543                        " be specified for PAPR PHB, not both");
544             return;
545         }
546 
547         sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
548         sphb->dma_liobn = SPAPR_PCI_BASE_LIOBN + sphb->index;
549 
550         windows_base = SPAPR_PCI_WINDOW_BASE
551             + sphb->index * SPAPR_PCI_WINDOW_SPACING;
552         sphb->mem_win_addr = windows_base + SPAPR_PCI_MMIO_WIN_OFF;
553         sphb->io_win_addr = windows_base + SPAPR_PCI_IO_WIN_OFF;
554     }
555 
556     if (sphb->buid == -1) {
557         error_setg(errp, "BUID not specified for PHB");
558         return;
559     }
560 
561     if (sphb->dma_liobn == -1) {
562         error_setg(errp, "LIOBN not specified for PHB");
563         return;
564     }
565 
566     if (sphb->mem_win_addr == -1) {
567         error_setg(errp, "Memory window address not specified for PHB");
568         return;
569     }
570 
571     if (sphb->io_win_addr == -1) {
572         error_setg(errp, "IO window address not specified for PHB");
573         return;
574     }
575 
576     if (find_phb(spapr, sphb->buid)) {
577         error_setg(errp, "PCI host bridges must have unique BUIDs");
578         return;
579     }
580 
581     sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
582 
583     namebuf = alloca(strlen(sphb->dtbusname) + 32);
584 
585     /* Initialize memory regions */
586     sprintf(namebuf, "%s.mmio", sphb->dtbusname);
587     memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
588 
589     sprintf(namebuf, "%s.mmio-alias", sphb->dtbusname);
590     memory_region_init_alias(&sphb->memwindow, OBJECT(sphb),
591                              namebuf, &sphb->memspace,
592                              SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
593     memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
594                                 &sphb->memwindow);
595 
596     /* Initialize IO regions */
597     sprintf(namebuf, "%s.io", sphb->dtbusname);
598     memory_region_init(&sphb->iospace, OBJECT(sphb),
599                        namebuf, SPAPR_PCI_IO_WIN_SIZE);
600 
601     sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
602     memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
603                              &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
604     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
605                                 &sphb->iowindow);
606 
607     bus = pci_register_bus(dev, NULL,
608                            pci_spapr_set_irq, pci_spapr_map_irq, sphb,
609                            &sphb->memspace, &sphb->iospace,
610                            PCI_DEVFN(0, 0), PCI_NUM_PINS, TYPE_PCI_BUS);
611     phb->bus = bus;
612 
613     /*
614      * Initialize PHB address space.
615      * By default there will be at least one subregion for default
616      * 32bit DMA window.
617      * Later the guest might want to create another DMA window
618      * which will become another memory subregion.
619      */
620     sprintf(namebuf, "%s.iommu-root", sphb->dtbusname);
621 
622     memory_region_init(&sphb->iommu_root, OBJECT(sphb),
623                        namebuf, UINT64_MAX);
624     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
625                        sphb->dtbusname);
626 
627     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
628 
629     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
630 
631     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
632 
633     /* Initialize the LSI table */
634     for (i = 0; i < PCI_NUM_PINS; i++) {
635         uint32_t irq;
636 
637         irq = spapr_allocate_lsi(0);
638         if (!irq) {
639             error_setg(errp, "spapr_allocate_lsi failed");
640             return;
641         }
642 
643         sphb->lsi_table[i].irq = irq;
644     }
645 
646     if (!info->finish_realize) {
647         error_setg(errp, "finish_realize not defined");
648         return;
649     }
650 
651     info->finish_realize(sphb, errp);
652 }
653 
654 static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
655 {
656     sPAPRTCETable *tcet;
657 
658     tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
659                                0,
660                                SPAPR_TCE_PAGE_SHIFT,
661                                0x40000000 >> SPAPR_TCE_PAGE_SHIFT);
662     if (!tcet) {
663         error_setg(errp, "Unable to create TCE table for %s",
664                    sphb->dtbusname);
665         return ;
666     }
667 
668     /* Register default 32bit DMA window */
669     memory_region_add_subregion(&sphb->iommu_root, 0,
670                                 spapr_tce_get_iommu(tcet));
671 }
672 
673 static int spapr_phb_children_reset(Object *child, void *opaque)
674 {
675     DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
676 
677     if (dev) {
678         device_reset(dev);
679     }
680 
681     return 0;
682 }
683 
684 static void spapr_phb_reset(DeviceState *qdev)
685 {
686     /* Reset the IOMMU state */
687     object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
688 }
689 
690 static Property spapr_phb_properties[] = {
691     DEFINE_PROP_INT32("index", sPAPRPHBState, index, -1),
692     DEFINE_PROP_UINT64("buid", sPAPRPHBState, buid, -1),
693     DEFINE_PROP_UINT32("liobn", sPAPRPHBState, dma_liobn, -1),
694     DEFINE_PROP_UINT64("mem_win_addr", sPAPRPHBState, mem_win_addr, -1),
695     DEFINE_PROP_UINT64("mem_win_size", sPAPRPHBState, mem_win_size,
696                        SPAPR_PCI_MMIO_WIN_SIZE),
697     DEFINE_PROP_UINT64("io_win_addr", sPAPRPHBState, io_win_addr, -1),
698     DEFINE_PROP_UINT64("io_win_size", sPAPRPHBState, io_win_size,
699                        SPAPR_PCI_IO_WIN_SIZE),
700     DEFINE_PROP_END_OF_LIST(),
701 };
702 
703 static const VMStateDescription vmstate_spapr_pci_lsi = {
704     .name = "spapr_pci/lsi",
705     .version_id = 1,
706     .minimum_version_id = 1,
707     .fields = (VMStateField[]) {
708         VMSTATE_UINT32_EQUAL(irq, struct spapr_pci_lsi),
709 
710         VMSTATE_END_OF_LIST()
711     },
712 };
713 
714 static const VMStateDescription vmstate_spapr_pci_msi = {
715     .name = "spapr_pci/lsi",
716     .version_id = 1,
717     .minimum_version_id = 1,
718     .fields = (VMStateField[]) {
719         VMSTATE_UINT32(config_addr, struct spapr_pci_msi),
720         VMSTATE_UINT32(irq, struct spapr_pci_msi),
721         VMSTATE_UINT32(nvec, struct spapr_pci_msi),
722 
723         VMSTATE_END_OF_LIST()
724     },
725 };
726 
727 static const VMStateDescription vmstate_spapr_pci = {
728     .name = "spapr_pci",
729     .version_id = 1,
730     .minimum_version_id = 1,
731     .fields = (VMStateField[]) {
732         VMSTATE_UINT64_EQUAL(buid, sPAPRPHBState),
733         VMSTATE_UINT32_EQUAL(dma_liobn, sPAPRPHBState),
734         VMSTATE_UINT64_EQUAL(mem_win_addr, sPAPRPHBState),
735         VMSTATE_UINT64_EQUAL(mem_win_size, sPAPRPHBState),
736         VMSTATE_UINT64_EQUAL(io_win_addr, sPAPRPHBState),
737         VMSTATE_UINT64_EQUAL(io_win_size, sPAPRPHBState),
738         VMSTATE_STRUCT_ARRAY(lsi_table, sPAPRPHBState, PCI_NUM_PINS, 0,
739                              vmstate_spapr_pci_lsi, struct spapr_pci_lsi),
740         VMSTATE_STRUCT_ARRAY(msi_table, sPAPRPHBState, SPAPR_MSIX_MAX_DEVS, 0,
741                              vmstate_spapr_pci_msi, struct spapr_pci_msi),
742 
743         VMSTATE_END_OF_LIST()
744     },
745 };
746 
747 static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
748                                            PCIBus *rootbus)
749 {
750     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
751 
752     return sphb->dtbusname;
753 }
754 
755 static void spapr_phb_class_init(ObjectClass *klass, void *data)
756 {
757     PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
758     DeviceClass *dc = DEVICE_CLASS(klass);
759     sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
760 
761     hc->root_bus_path = spapr_phb_root_bus_path;
762     dc->realize = spapr_phb_realize;
763     dc->props = spapr_phb_properties;
764     dc->reset = spapr_phb_reset;
765     dc->vmsd = &vmstate_spapr_pci;
766     set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
767     dc->cannot_instantiate_with_device_add_yet = false;
768     spc->finish_realize = spapr_phb_finish_realize;
769 }
770 
771 static const TypeInfo spapr_phb_info = {
772     .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
773     .parent        = TYPE_PCI_HOST_BRIDGE,
774     .instance_size = sizeof(sPAPRPHBState),
775     .class_init    = spapr_phb_class_init,
776     .class_size    = sizeof(sPAPRPHBClass),
777 };
778 
779 PCIHostState *spapr_create_phb(sPAPREnvironment *spapr, int index)
780 {
781     DeviceState *dev;
782 
783     dev = qdev_create(NULL, TYPE_SPAPR_PCI_HOST_BRIDGE);
784     qdev_prop_set_uint32(dev, "index", index);
785     qdev_init_nofail(dev);
786 
787     return PCI_HOST_BRIDGE(dev);
788 }
789 
790 /* Macros to operate with address in OF binding to PCI */
791 #define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
792 #define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
793 #define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
794 #define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
795 #define b_ss(x)         b_x((x), 24, 2) /* the space code */
796 #define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
797 #define b_ddddd(x)      b_x((x), 11, 5) /* device number */
798 #define b_fff(x)        b_x((x), 8, 3)  /* function number */
799 #define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
800 
801 typedef struct sPAPRTCEDT {
802     void *fdt;
803     int node_off;
804 } sPAPRTCEDT;
805 
806 static int spapr_phb_children_dt(Object *child, void *opaque)
807 {
808     sPAPRTCEDT *p = opaque;
809     sPAPRTCETable *tcet;
810 
811     tcet = (sPAPRTCETable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
812     if (!tcet) {
813         return 0;
814     }
815 
816     spapr_dma_dt(p->fdt, p->node_off, "ibm,dma-window",
817                  tcet->liobn, tcet->bus_offset,
818                  tcet->nb_table << tcet->page_shift);
819     /* Stop after the first window */
820 
821     return 1;
822 }
823 
824 int spapr_populate_pci_dt(sPAPRPHBState *phb,
825                           uint32_t xics_phandle,
826                           void *fdt)
827 {
828     int bus_off, i, j;
829     char nodename[256];
830     uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
831     struct {
832         uint32_t hi;
833         uint64_t child;
834         uint64_t parent;
835         uint64_t size;
836     } QEMU_PACKED ranges[] = {
837         {
838             cpu_to_be32(b_ss(1)), cpu_to_be64(0),
839             cpu_to_be64(phb->io_win_addr),
840             cpu_to_be64(memory_region_size(&phb->iospace)),
841         },
842         {
843             cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
844             cpu_to_be64(phb->mem_win_addr),
845             cpu_to_be64(memory_region_size(&phb->memwindow)),
846         },
847     };
848     uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
849     uint32_t interrupt_map_mask[] = {
850         cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
851     uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
852 
853     /* Start populating the FDT */
854     sprintf(nodename, "pci@%" PRIx64, phb->buid);
855     bus_off = fdt_add_subnode(fdt, 0, nodename);
856     if (bus_off < 0) {
857         return bus_off;
858     }
859 
860 #define _FDT(exp) \
861     do { \
862         int ret = (exp);                                           \
863         if (ret < 0) {                                             \
864             return ret;                                            \
865         }                                                          \
866     } while (0)
867 
868     /* Write PHB properties */
869     _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
870     _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
871     _FDT(fdt_setprop_cell(fdt, bus_off, "#address-cells", 0x3));
872     _FDT(fdt_setprop_cell(fdt, bus_off, "#size-cells", 0x2));
873     _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
874     _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
875     _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
876     _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof(ranges)));
877     _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
878     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
879     _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", XICS_IRQS));
880 
881     /* Build the interrupt-map, this must matches what is done
882      * in pci_spapr_map_irq
883      */
884     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
885                      &interrupt_map_mask, sizeof(interrupt_map_mask)));
886     for (i = 0; i < PCI_SLOT_MAX; i++) {
887         for (j = 0; j < PCI_NUM_PINS; j++) {
888             uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
889             int lsi_num = pci_spapr_swizzle(i, j);
890 
891             irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
892             irqmap[1] = 0;
893             irqmap[2] = 0;
894             irqmap[3] = cpu_to_be32(j+1);
895             irqmap[4] = cpu_to_be32(xics_phandle);
896             irqmap[5] = cpu_to_be32(phb->lsi_table[lsi_num].irq);
897             irqmap[6] = cpu_to_be32(0x8);
898         }
899     }
900     /* Write interrupt map */
901     _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
902                      sizeof(interrupt_map)));
903 
904     object_child_foreach(OBJECT(phb), spapr_phb_children_dt,
905                          &((sPAPRTCEDT){ .fdt = fdt, .node_off = bus_off }));
906 
907     return 0;
908 }
909 
910 void spapr_pci_rtas_init(void)
911 {
912     spapr_rtas_register("read-pci-config", rtas_read_pci_config);
913     spapr_rtas_register("write-pci-config", rtas_write_pci_config);
914     spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
915     spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
916     if (msi_supported) {
917         spapr_rtas_register("ibm,query-interrupt-source-number",
918                             rtas_ibm_query_interrupt_source_number);
919         spapr_rtas_register("ibm,change-msi", rtas_ibm_change_msi);
920     }
921 }
922 
923 static void spapr_pci_register_types(void)
924 {
925     type_register_static(&spapr_phb_info);
926 }
927 
928 type_init(spapr_pci_register_types)
929