xref: /qemu/hw/ppc/pnv.c (revision 7271a819)
1 /*
2  * QEMU PowerPC PowerNV machine model
3  *
4  * Copyright (c) 2016, IBM Corporation.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "sysemu/sysemu.h"
23 #include "sysemu/numa.h"
24 #include "sysemu/cpus.h"
25 #include "hw/hw.h"
26 #include "target/ppc/cpu.h"
27 #include "qemu/log.h"
28 #include "hw/ppc/fdt.h"
29 #include "hw/ppc/ppc.h"
30 #include "hw/ppc/pnv.h"
31 #include "hw/ppc/pnv_core.h"
32 #include "hw/loader.h"
33 #include "exec/address-spaces.h"
34 #include "qemu/cutils.h"
35 #include "qapi/visitor.h"
36 #include "monitor/monitor.h"
37 #include "hw/intc/intc.h"
38 #include "hw/ipmi/ipmi.h"
39 
40 #include "hw/ppc/xics.h"
41 #include "hw/ppc/pnv_xscom.h"
42 
43 #include "hw/isa/isa.h"
44 #include "hw/char/serial.h"
45 #include "hw/timer/mc146818rtc.h"
46 
47 #include <libfdt.h>
48 
49 #define FDT_MAX_SIZE            0x00100000
50 
51 #define FW_FILE_NAME            "skiboot.lid"
52 #define FW_LOAD_ADDR            0x0
53 #define FW_MAX_SIZE             0x00400000
54 
55 #define KERNEL_LOAD_ADDR        0x20000000
56 #define INITRD_LOAD_ADDR        0x40000000
57 
58 /*
59  * On Power Systems E880 (POWER8), the max cpus (threads) should be :
60  *     4 * 4 sockets * 12 cores * 8 threads = 1536
61  * Let's make it 2^11
62  */
63 #define MAX_CPUS                2048
64 
65 /*
66  * Memory nodes are created by hostboot, one for each range of memory
67  * that has a different "affinity". In practice, it means one range
68  * per chip.
69  */
70 static void powernv_populate_memory_node(void *fdt, int chip_id, hwaddr start,
71                                          hwaddr size)
72 {
73     char *mem_name;
74     uint64_t mem_reg_property[2];
75     int off;
76 
77     mem_reg_property[0] = cpu_to_be64(start);
78     mem_reg_property[1] = cpu_to_be64(size);
79 
80     mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
81     off = fdt_add_subnode(fdt, 0, mem_name);
82     g_free(mem_name);
83 
84     _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
85     _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
86                        sizeof(mem_reg_property))));
87     _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
88 }
89 
90 static int get_cpus_node(void *fdt)
91 {
92     int cpus_offset = fdt_path_offset(fdt, "/cpus");
93 
94     if (cpus_offset < 0) {
95         cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
96                                       "cpus");
97         if (cpus_offset) {
98             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
99             _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
100         }
101     }
102     _FDT(cpus_offset);
103     return cpus_offset;
104 }
105 
106 /*
107  * The PowerNV cores (and threads) need to use real HW ids and not an
108  * incremental index like it has been done on other platforms. This HW
109  * id is stored in the CPU PIR, it is used to create cpu nodes in the
110  * device tree, used in XSCOM to address cores and in interrupt
111  * servers.
112  */
113 static void powernv_create_core_node(PnvChip *chip, PnvCore *pc, void *fdt)
114 {
115     CPUState *cs = CPU(DEVICE(pc->threads));
116     DeviceClass *dc = DEVICE_GET_CLASS(cs);
117     PowerPCCPU *cpu = POWERPC_CPU(cs);
118     int smt_threads = CPU_CORE(pc)->nr_threads;
119     CPUPPCState *env = &cpu->env;
120     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
121     uint32_t servers_prop[smt_threads];
122     int i;
123     uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
124                        0xffffffff, 0xffffffff};
125     uint32_t tbfreq = PNV_TIMEBASE_FREQ;
126     uint32_t cpufreq = 1000000000;
127     uint32_t page_sizes_prop[64];
128     size_t page_sizes_prop_size;
129     const uint8_t pa_features[] = { 24, 0,
130                                     0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
131                                     0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
132                                     0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
133                                     0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
134     int offset;
135     char *nodename;
136     int cpus_offset = get_cpus_node(fdt);
137 
138     nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
139     offset = fdt_add_subnode(fdt, cpus_offset, nodename);
140     _FDT(offset);
141     g_free(nodename);
142 
143     _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
144 
145     _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
146     _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
147     _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
148 
149     _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
150     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
151                             env->dcache_line_size)));
152     _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
153                             env->dcache_line_size)));
154     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
155                             env->icache_line_size)));
156     _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
157                             env->icache_line_size)));
158 
159     if (pcc->l1_dcache_size) {
160         _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
161                                pcc->l1_dcache_size)));
162     } else {
163         warn_report("Unknown L1 dcache size for cpu");
164     }
165     if (pcc->l1_icache_size) {
166         _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
167                                pcc->l1_icache_size)));
168     } else {
169         warn_report("Unknown L1 icache size for cpu");
170     }
171 
172     _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
173     _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
174     _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr)));
175     _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
176     _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
177 
178     if (env->spr_cb[SPR_PURR].oea_read) {
179         _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
180     }
181 
182     if (env->mmu_model & POWERPC_MMU_1TSEG) {
183         _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
184                            segs, sizeof(segs))));
185     }
186 
187     /* Advertise VMX/VSX (vector extensions) if available
188      *   0 / no property == no vector extensions
189      *   1               == VMX / Altivec available
190      *   2               == VSX available */
191     if (env->insns_flags & PPC_ALTIVEC) {
192         uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
193 
194         _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
195     }
196 
197     /* Advertise DFP (Decimal Floating Point) if available
198      *   0 / no property == no DFP
199      *   1               == DFP available */
200     if (env->insns_flags2 & PPC2_DFP) {
201         _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
202     }
203 
204     page_sizes_prop_size = ppc_create_page_sizes_prop(env, page_sizes_prop,
205                                                   sizeof(page_sizes_prop));
206     if (page_sizes_prop_size) {
207         _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
208                            page_sizes_prop, page_sizes_prop_size)));
209     }
210 
211     _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
212                        pa_features, sizeof(pa_features))));
213 
214     /* Build interrupt servers properties */
215     for (i = 0; i < smt_threads; i++) {
216         servers_prop[i] = cpu_to_be32(pc->pir + i);
217     }
218     _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
219                        servers_prop, sizeof(servers_prop))));
220 }
221 
222 static void powernv_populate_icp(PnvChip *chip, void *fdt, uint32_t pir,
223                                  uint32_t nr_threads)
224 {
225     uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
226     char *name;
227     const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
228     uint32_t irange[2], i, rsize;
229     uint64_t *reg;
230     int offset;
231 
232     irange[0] = cpu_to_be32(pir);
233     irange[1] = cpu_to_be32(nr_threads);
234 
235     rsize = sizeof(uint64_t) * 2 * nr_threads;
236     reg = g_malloc(rsize);
237     for (i = 0; i < nr_threads; i++) {
238         reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
239         reg[i * 2 + 1] = cpu_to_be64(0x1000);
240     }
241 
242     name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
243     offset = fdt_add_subnode(fdt, 0, name);
244     _FDT(offset);
245     g_free(name);
246 
247     _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
248     _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
249     _FDT((fdt_setprop_string(fdt, offset, "device_type",
250                               "PowerPC-External-Interrupt-Presentation")));
251     _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
252     _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
253                        irange, sizeof(irange))));
254     _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
255     _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
256     g_free(reg);
257 }
258 
259 static int pnv_chip_lpc_offset(PnvChip *chip, void *fdt)
260 {
261     char *name;
262     int offset;
263 
264     name = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
265                            (uint64_t) PNV_XSCOM_BASE(chip), PNV_XSCOM_LPC_BASE);
266     offset = fdt_path_offset(fdt, name);
267     g_free(name);
268     return offset;
269 }
270 
271 static void powernv_populate_chip(PnvChip *chip, void *fdt)
272 {
273     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
274     char *typename = pnv_core_typename(pcc->cpu_model);
275     size_t typesize = object_type_get_instance_size(typename);
276     int i;
277 
278     pnv_xscom_populate(chip, fdt, 0);
279 
280     /* The default LPC bus of a multichip system is on chip 0. It's
281      * recognized by the firmware (skiboot) using a "primary"
282      * property.
283      */
284     if (chip->chip_id == 0x0) {
285         int lpc_offset = pnv_chip_lpc_offset(chip, fdt);
286 
287         _FDT((fdt_setprop(fdt, lpc_offset, "primary", NULL, 0)));
288     }
289 
290     for (i = 0; i < chip->nr_cores; i++) {
291         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
292 
293         powernv_create_core_node(chip, pnv_core, fdt);
294 
295         /* Interrupt Control Presenters (ICP). One per core. */
296         powernv_populate_icp(chip, fdt, pnv_core->pir,
297                              CPU_CORE(pnv_core)->nr_threads);
298     }
299 
300     if (chip->ram_size) {
301         powernv_populate_memory_node(fdt, chip->chip_id, chip->ram_start,
302                                      chip->ram_size);
303     }
304     g_free(typename);
305 }
306 
307 static void powernv_populate_rtc(ISADevice *d, void *fdt, int lpc_off)
308 {
309     uint32_t io_base = d->ioport_id;
310     uint32_t io_regs[] = {
311         cpu_to_be32(1),
312         cpu_to_be32(io_base),
313         cpu_to_be32(2)
314     };
315     char *name;
316     int node;
317 
318     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
319     node = fdt_add_subnode(fdt, lpc_off, name);
320     _FDT(node);
321     g_free(name);
322 
323     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
324     _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
325 }
326 
327 static void powernv_populate_serial(ISADevice *d, void *fdt, int lpc_off)
328 {
329     const char compatible[] = "ns16550\0pnpPNP,501";
330     uint32_t io_base = d->ioport_id;
331     uint32_t io_regs[] = {
332         cpu_to_be32(1),
333         cpu_to_be32(io_base),
334         cpu_to_be32(8)
335     };
336     char *name;
337     int node;
338 
339     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
340     node = fdt_add_subnode(fdt, lpc_off, name);
341     _FDT(node);
342     g_free(name);
343 
344     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
345     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
346                       sizeof(compatible))));
347 
348     _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
349     _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
350     _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
351     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
352                            fdt_get_phandle(fdt, lpc_off))));
353 
354     /* This is needed by Linux */
355     _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
356 }
357 
358 static void powernv_populate_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
359 {
360     const char compatible[] = "bt\0ipmi-bt";
361     uint32_t io_base;
362     uint32_t io_regs[] = {
363         cpu_to_be32(1),
364         0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
365         cpu_to_be32(3)
366     };
367     uint32_t irq;
368     char *name;
369     int node;
370 
371     io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
372     io_regs[1] = cpu_to_be32(io_base);
373 
374     irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
375 
376     name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
377     node = fdt_add_subnode(fdt, lpc_off, name);
378     _FDT(node);
379     g_free(name);
380 
381     _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
382     _FDT((fdt_setprop(fdt, node, "compatible", compatible,
383                       sizeof(compatible))));
384 
385     /* Mark it as reserved to avoid Linux trying to claim it */
386     _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
387     _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
388     _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
389                            fdt_get_phandle(fdt, lpc_off))));
390 }
391 
392 typedef struct ForeachPopulateArgs {
393     void *fdt;
394     int offset;
395 } ForeachPopulateArgs;
396 
397 static int powernv_populate_isa_device(DeviceState *dev, void *opaque)
398 {
399     ForeachPopulateArgs *args = opaque;
400     ISADevice *d = ISA_DEVICE(dev);
401 
402     if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
403         powernv_populate_rtc(d, args->fdt, args->offset);
404     } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
405         powernv_populate_serial(d, args->fdt, args->offset);
406     } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
407         powernv_populate_ipmi_bt(d, args->fdt, args->offset);
408     } else {
409         error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
410                      d->ioport_id);
411     }
412 
413     return 0;
414 }
415 
416 static void powernv_populate_isa(ISABus *bus, void *fdt, int lpc_offset)
417 {
418     ForeachPopulateArgs args = {
419         .fdt = fdt,
420         .offset = lpc_offset,
421     };
422 
423     /* ISA devices are not necessarily parented to the ISA bus so we
424      * can not use object_child_foreach() */
425     qbus_walk_children(BUS(bus), powernv_populate_isa_device,
426                        NULL, NULL, NULL, &args);
427 }
428 
429 static void *powernv_create_fdt(MachineState *machine)
430 {
431     const char plat_compat[] = "qemu,powernv\0ibm,powernv";
432     PnvMachineState *pnv = POWERNV_MACHINE(machine);
433     void *fdt;
434     char *buf;
435     int off;
436     int i;
437     int lpc_offset;
438 
439     fdt = g_malloc0(FDT_MAX_SIZE);
440     _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
441 
442     /* Root node */
443     _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
444     _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
445     _FDT((fdt_setprop_string(fdt, 0, "model",
446                              "IBM PowerNV (emulated by qemu)")));
447     _FDT((fdt_setprop(fdt, 0, "compatible", plat_compat,
448                       sizeof(plat_compat))));
449 
450     buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
451     _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
452     if (qemu_uuid_set) {
453         _FDT((fdt_property_string(fdt, "system-id", buf)));
454     }
455     g_free(buf);
456 
457     off = fdt_add_subnode(fdt, 0, "chosen");
458     if (machine->kernel_cmdline) {
459         _FDT((fdt_setprop_string(fdt, off, "bootargs",
460                                  machine->kernel_cmdline)));
461     }
462 
463     if (pnv->initrd_size) {
464         uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
465         uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
466 
467         _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
468                                &start_prop, sizeof(start_prop))));
469         _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
470                                &end_prop, sizeof(end_prop))));
471     }
472 
473     /* Populate device tree for each chip */
474     for (i = 0; i < pnv->num_chips; i++) {
475         powernv_populate_chip(pnv->chips[i], fdt);
476     }
477 
478     /* Populate ISA devices on chip 0 */
479     lpc_offset = pnv_chip_lpc_offset(pnv->chips[0], fdt);
480     powernv_populate_isa(pnv->isa_bus, fdt, lpc_offset);
481 
482     if (pnv->bmc) {
483         pnv_bmc_populate_sensors(pnv->bmc, fdt);
484     }
485 
486     return fdt;
487 }
488 
489 static void pnv_powerdown_notify(Notifier *n, void *opaque)
490 {
491     PnvMachineState *pnv = POWERNV_MACHINE(qdev_get_machine());
492 
493     if (pnv->bmc) {
494         pnv_bmc_powerdown(pnv->bmc);
495     }
496 }
497 
498 static void ppc_powernv_reset(void)
499 {
500     MachineState *machine = MACHINE(qdev_get_machine());
501     PnvMachineState *pnv = POWERNV_MACHINE(machine);
502     void *fdt;
503     Object *obj;
504 
505     qemu_devices_reset();
506 
507     /* OpenPOWER systems have a BMC, which can be defined on the
508      * command line with:
509      *
510      *   -device ipmi-bmc-sim,id=bmc0
511      *
512      * This is the internal simulator but it could also be an external
513      * BMC.
514      */
515     obj = object_resolve_path_type("", "ipmi-bmc-sim", NULL);
516     if (obj) {
517         pnv->bmc = IPMI_BMC(obj);
518     }
519 
520     fdt = powernv_create_fdt(machine);
521 
522     /* Pack resulting tree */
523     _FDT((fdt_pack(fdt)));
524 
525     cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
526 }
527 
528 static ISABus *pnv_isa_create(PnvChip *chip)
529 {
530     PnvLpcController *lpc = &chip->lpc;
531     ISABus *isa_bus;
532     qemu_irq *irqs;
533     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
534 
535     /* let isa_bus_new() create its own bridge on SysBus otherwise
536      * devices speficied on the command line won't find the bus and
537      * will fail to create.
538      */
539     isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io,
540                           &error_fatal);
541 
542     irqs = pnv_lpc_isa_irq_create(lpc, pcc->chip_type, ISA_NUM_IRQS);
543 
544     isa_bus_irqs(isa_bus, irqs);
545     return isa_bus;
546 }
547 
548 static void ppc_powernv_init(MachineState *machine)
549 {
550     PnvMachineState *pnv = POWERNV_MACHINE(machine);
551     MemoryRegion *ram;
552     char *fw_filename;
553     long fw_size;
554     int i;
555     char *chip_typename;
556 
557     /* allocate RAM */
558     if (machine->ram_size < (1 * G_BYTE)) {
559         warn_report("skiboot may not work with < 1GB of RAM");
560     }
561 
562     ram = g_new(MemoryRegion, 1);
563     memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram",
564                                          machine->ram_size);
565     memory_region_add_subregion(get_system_memory(), 0, ram);
566 
567     /* load skiboot firmware  */
568     if (bios_name == NULL) {
569         bios_name = FW_FILE_NAME;
570     }
571 
572     fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
573     if (!fw_filename) {
574         error_report("Could not find OPAL firmware '%s'", bios_name);
575         exit(1);
576     }
577 
578     fw_size = load_image_targphys(fw_filename, FW_LOAD_ADDR, FW_MAX_SIZE);
579     if (fw_size < 0) {
580         error_report("Could not load OPAL firmware '%s'", fw_filename);
581         exit(1);
582     }
583     g_free(fw_filename);
584 
585     /* load kernel */
586     if (machine->kernel_filename) {
587         long kernel_size;
588 
589         kernel_size = load_image_targphys(machine->kernel_filename,
590                                           KERNEL_LOAD_ADDR, 0x2000000);
591         if (kernel_size < 0) {
592             error_report("Could not load kernel '%s'",
593                          machine->kernel_filename);
594             exit(1);
595         }
596     }
597 
598     /* load initrd */
599     if (machine->initrd_filename) {
600         pnv->initrd_base = INITRD_LOAD_ADDR;
601         pnv->initrd_size = load_image_targphys(machine->initrd_filename,
602                                   pnv->initrd_base, 0x10000000); /* 128MB max */
603         if (pnv->initrd_size < 0) {
604             error_report("Could not load initial ram disk '%s'",
605                          machine->initrd_filename);
606             exit(1);
607         }
608     }
609 
610     /* We need some cpu model to instantiate the PnvChip class */
611     if (machine->cpu_model == NULL) {
612         machine->cpu_model = "POWER8";
613     }
614 
615     /* Create the processor chips */
616     chip_typename = g_strdup_printf(TYPE_PNV_CHIP "-%s", machine->cpu_model);
617     if (!object_class_by_name(chip_typename)) {
618         error_report("invalid CPU model '%s' for %s machine",
619                      machine->cpu_model, MACHINE_GET_CLASS(machine)->name);
620         exit(1);
621     }
622 
623     pnv->chips = g_new0(PnvChip *, pnv->num_chips);
624     for (i = 0; i < pnv->num_chips; i++) {
625         char chip_name[32];
626         Object *chip = object_new(chip_typename);
627 
628         pnv->chips[i] = PNV_CHIP(chip);
629 
630         /* TODO: put all the memory in one node on chip 0 until we find a
631          * way to specify different ranges for each chip
632          */
633         if (i == 0) {
634             object_property_set_int(chip, machine->ram_size, "ram-size",
635                                     &error_fatal);
636         }
637 
638         snprintf(chip_name, sizeof(chip_name), "chip[%d]", PNV_CHIP_HWID(i));
639         object_property_add_child(OBJECT(pnv), chip_name, chip, &error_fatal);
640         object_property_set_int(chip, PNV_CHIP_HWID(i), "chip-id",
641                                 &error_fatal);
642         object_property_set_int(chip, smp_cores, "nr-cores", &error_fatal);
643         object_property_set_bool(chip, true, "realized", &error_fatal);
644     }
645     g_free(chip_typename);
646 
647     /* Instantiate ISA bus on chip 0 */
648     pnv->isa_bus = pnv_isa_create(pnv->chips[0]);
649 
650     /* Create serial port */
651     serial_hds_isa_init(pnv->isa_bus, 0, MAX_SERIAL_PORTS);
652 
653     /* Create an RTC ISA device too */
654     rtc_init(pnv->isa_bus, 2000, NULL);
655 
656     /* OpenPOWER systems use a IPMI SEL Event message to notify the
657      * host to powerdown */
658     pnv->powerdown_notifier.notify = pnv_powerdown_notify;
659     qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
660 }
661 
662 /*
663  *    0:21  Reserved - Read as zeros
664  *   22:24  Chip ID
665  *   25:28  Core number
666  *   29:31  Thread ID
667  */
668 static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
669 {
670     return (chip->chip_id << 7) | (core_id << 3);
671 }
672 
673 /*
674  *    0:48  Reserved - Read as zeroes
675  *   49:52  Node ID
676  *   53:55  Chip ID
677  *   56     Reserved - Read as zero
678  *   57:61  Core number
679  *   62:63  Thread ID
680  *
681  * We only care about the lower bits. uint32_t is fine for the moment.
682  */
683 static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
684 {
685     return (chip->chip_id << 8) | (core_id << 2);
686 }
687 
688 /* Allowed core identifiers on a POWER8 Processor Chip :
689  *
690  * <EX0 reserved>
691  *  EX1  - Venice only
692  *  EX2  - Venice only
693  *  EX3  - Venice only
694  *  EX4
695  *  EX5
696  *  EX6
697  * <EX7,8 reserved> <reserved>
698  *  EX9  - Venice only
699  *  EX10 - Venice only
700  *  EX11 - Venice only
701  *  EX12
702  *  EX13
703  *  EX14
704  * <EX15 reserved>
705  */
706 #define POWER8E_CORE_MASK  (0x7070ull)
707 #define POWER8_CORE_MASK   (0x7e7eull)
708 
709 /*
710  * POWER9 has 24 cores, ids starting at 0x20
711  */
712 #define POWER9_CORE_MASK   (0xffffff00000000ull)
713 
714 static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
715 {
716     DeviceClass *dc = DEVICE_CLASS(klass);
717     PnvChipClass *k = PNV_CHIP_CLASS(klass);
718 
719     k->cpu_model = "POWER8E";
720     k->chip_type = PNV_CHIP_POWER8E;
721     k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
722     k->cores_mask = POWER8E_CORE_MASK;
723     k->core_pir = pnv_chip_core_pir_p8;
724     k->xscom_base = 0x003fc0000000000ull;
725     k->xscom_core_base = 0x10000000ull;
726     dc->desc = "PowerNV Chip POWER8E";
727 }
728 
729 static const TypeInfo pnv_chip_power8e_info = {
730     .name          = TYPE_PNV_CHIP_POWER8E,
731     .parent        = TYPE_PNV_CHIP,
732     .instance_size = sizeof(PnvChip),
733     .class_init    = pnv_chip_power8e_class_init,
734 };
735 
736 static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
737 {
738     DeviceClass *dc = DEVICE_CLASS(klass);
739     PnvChipClass *k = PNV_CHIP_CLASS(klass);
740 
741     k->cpu_model = "POWER8";
742     k->chip_type = PNV_CHIP_POWER8;
743     k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
744     k->cores_mask = POWER8_CORE_MASK;
745     k->core_pir = pnv_chip_core_pir_p8;
746     k->xscom_base = 0x003fc0000000000ull;
747     k->xscom_core_base = 0x10000000ull;
748     dc->desc = "PowerNV Chip POWER8";
749 }
750 
751 static const TypeInfo pnv_chip_power8_info = {
752     .name          = TYPE_PNV_CHIP_POWER8,
753     .parent        = TYPE_PNV_CHIP,
754     .instance_size = sizeof(PnvChip),
755     .class_init    = pnv_chip_power8_class_init,
756 };
757 
758 static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
759 {
760     DeviceClass *dc = DEVICE_CLASS(klass);
761     PnvChipClass *k = PNV_CHIP_CLASS(klass);
762 
763     k->cpu_model = "POWER8NVL";
764     k->chip_type = PNV_CHIP_POWER8NVL;
765     k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
766     k->cores_mask = POWER8_CORE_MASK;
767     k->core_pir = pnv_chip_core_pir_p8;
768     k->xscom_base = 0x003fc0000000000ull;
769     k->xscom_core_base = 0x10000000ull;
770     dc->desc = "PowerNV Chip POWER8NVL";
771 }
772 
773 static const TypeInfo pnv_chip_power8nvl_info = {
774     .name          = TYPE_PNV_CHIP_POWER8NVL,
775     .parent        = TYPE_PNV_CHIP,
776     .instance_size = sizeof(PnvChip),
777     .class_init    = pnv_chip_power8nvl_class_init,
778 };
779 
780 static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
781 {
782     DeviceClass *dc = DEVICE_CLASS(klass);
783     PnvChipClass *k = PNV_CHIP_CLASS(klass);
784 
785     k->cpu_model = "POWER9";
786     k->chip_type = PNV_CHIP_POWER9;
787     k->chip_cfam_id = 0x100d104980000000ull; /* P9 Nimbus DD1.0 */
788     k->cores_mask = POWER9_CORE_MASK;
789     k->core_pir = pnv_chip_core_pir_p9;
790     k->xscom_base = 0x00603fc00000000ull;
791     k->xscom_core_base = 0x0ull;
792     dc->desc = "PowerNV Chip POWER9";
793 }
794 
795 static const TypeInfo pnv_chip_power9_info = {
796     .name          = TYPE_PNV_CHIP_POWER9,
797     .parent        = TYPE_PNV_CHIP,
798     .instance_size = sizeof(PnvChip),
799     .class_init    = pnv_chip_power9_class_init,
800 };
801 
802 static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
803 {
804     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
805     int cores_max;
806 
807     /*
808      * No custom mask for this chip, let's use the default one from *
809      * the chip class
810      */
811     if (!chip->cores_mask) {
812         chip->cores_mask = pcc->cores_mask;
813     }
814 
815     /* filter alien core ids ! some are reserved */
816     if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
817         error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
818                    chip->cores_mask);
819         return;
820     }
821     chip->cores_mask &= pcc->cores_mask;
822 
823     /* now that we have a sane layout, let check the number of cores */
824     cores_max = ctpop64(chip->cores_mask);
825     if (chip->nr_cores > cores_max) {
826         error_setg(errp, "warning: too many cores for chip ! Limit is %d",
827                    cores_max);
828         return;
829     }
830 }
831 
832 static void pnv_chip_init(Object *obj)
833 {
834     PnvChip *chip = PNV_CHIP(obj);
835     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
836 
837     chip->xscom_base = pcc->xscom_base;
838 
839     object_initialize(&chip->lpc, sizeof(chip->lpc), TYPE_PNV_LPC);
840     object_property_add_child(obj, "lpc", OBJECT(&chip->lpc), NULL);
841 
842     object_initialize(&chip->psi, sizeof(chip->psi), TYPE_PNV_PSI);
843     object_property_add_child(obj, "psi", OBJECT(&chip->psi), NULL);
844     object_property_add_const_link(OBJECT(&chip->psi), "xics",
845                                    OBJECT(qdev_get_machine()), &error_abort);
846 
847     object_initialize(&chip->occ, sizeof(chip->occ), TYPE_PNV_OCC);
848     object_property_add_child(obj, "occ", OBJECT(&chip->occ), NULL);
849     object_property_add_const_link(OBJECT(&chip->occ), "psi",
850                                    OBJECT(&chip->psi), &error_abort);
851 
852     /* The LPC controller needs PSI to generate interrupts */
853     object_property_add_const_link(OBJECT(&chip->lpc), "psi",
854                                    OBJECT(&chip->psi), &error_abort);
855 }
856 
857 static void pnv_chip_icp_realize(PnvChip *chip, Error **errp)
858 {
859     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
860     char *typename = pnv_core_typename(pcc->cpu_model);
861     size_t typesize = object_type_get_instance_size(typename);
862     int i, j;
863     char *name;
864     XICSFabric *xi = XICS_FABRIC(qdev_get_machine());
865 
866     name = g_strdup_printf("icp-%x", chip->chip_id);
867     memory_region_init(&chip->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
868     sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip->icp_mmio);
869     g_free(name);
870 
871     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
872 
873     /* Map the ICP registers for each thread */
874     for (i = 0; i < chip->nr_cores; i++) {
875         PnvCore *pnv_core = PNV_CORE(chip->cores + i * typesize);
876         int core_hwid = CPU_CORE(pnv_core)->core_id;
877 
878         for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
879             uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
880             PnvICPState *icp = PNV_ICP(xics_icp_get(xi, pir));
881 
882             memory_region_add_subregion(&chip->icp_mmio, pir << 12, &icp->mmio);
883         }
884     }
885 
886     g_free(typename);
887 }
888 
889 static void pnv_chip_realize(DeviceState *dev, Error **errp)
890 {
891     PnvChip *chip = PNV_CHIP(dev);
892     Error *error = NULL;
893     PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
894     char *typename = pnv_core_typename(pcc->cpu_model);
895     size_t typesize = object_type_get_instance_size(typename);
896     int i, core_hwid;
897 
898     if (!object_class_by_name(typename)) {
899         error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
900         return;
901     }
902 
903     /* XSCOM bridge */
904     pnv_xscom_realize(chip, &error);
905     if (error) {
906         error_propagate(errp, error);
907         return;
908     }
909     sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
910 
911     /* Cores */
912     pnv_chip_core_sanitize(chip, &error);
913     if (error) {
914         error_propagate(errp, error);
915         return;
916     }
917 
918     chip->cores = g_malloc0(typesize * chip->nr_cores);
919 
920     for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
921              && (i < chip->nr_cores); core_hwid++) {
922         char core_name[32];
923         void *pnv_core = chip->cores + i * typesize;
924 
925         if (!(chip->cores_mask & (1ull << core_hwid))) {
926             continue;
927         }
928 
929         object_initialize(pnv_core, typesize, typename);
930         snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
931         object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core),
932                                   &error_fatal);
933         object_property_set_int(OBJECT(pnv_core), smp_threads, "nr-threads",
934                                 &error_fatal);
935         object_property_set_int(OBJECT(pnv_core), core_hwid,
936                                 CPU_CORE_PROP_CORE_ID, &error_fatal);
937         object_property_set_int(OBJECT(pnv_core),
938                                 pcc->core_pir(chip, core_hwid),
939                                 "pir", &error_fatal);
940         object_property_add_const_link(OBJECT(pnv_core), "xics",
941                                        qdev_get_machine(), &error_fatal);
942         object_property_set_bool(OBJECT(pnv_core), true, "realized",
943                                  &error_fatal);
944         object_unref(OBJECT(pnv_core));
945 
946         /* Each core has an XSCOM MMIO region */
947         pnv_xscom_add_subregion(chip,
948                                 PNV_XSCOM_EX_CORE_BASE(pcc->xscom_core_base,
949                                                        core_hwid),
950                                 &PNV_CORE(pnv_core)->xscom_regs);
951         i++;
952     }
953     g_free(typename);
954 
955     /* Create LPC controller */
956     object_property_set_bool(OBJECT(&chip->lpc), true, "realized",
957                              &error_fatal);
958     pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip->lpc.xscom_regs);
959 
960     /* Interrupt Management Area. This is the memory region holding
961      * all the Interrupt Control Presenter (ICP) registers */
962     pnv_chip_icp_realize(chip, &error);
963     if (error) {
964         error_propagate(errp, error);
965         return;
966     }
967 
968     /* Processor Service Interface (PSI) Host Bridge */
969     object_property_set_int(OBJECT(&chip->psi), PNV_PSIHB_BASE(chip),
970                             "bar", &error_fatal);
971     object_property_set_bool(OBJECT(&chip->psi), true, "realized", &error);
972     if (error) {
973         error_propagate(errp, error);
974         return;
975     }
976     pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, &chip->psi.xscom_regs);
977 
978     /* Create the simplified OCC model */
979     object_property_set_bool(OBJECT(&chip->occ), true, "realized", &error);
980     if (error) {
981         error_propagate(errp, error);
982         return;
983     }
984     pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip->occ.xscom_regs);
985 }
986 
987 static Property pnv_chip_properties[] = {
988     DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
989     DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
990     DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
991     DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
992     DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
993     DEFINE_PROP_END_OF_LIST(),
994 };
995 
996 static void pnv_chip_class_init(ObjectClass *klass, void *data)
997 {
998     DeviceClass *dc = DEVICE_CLASS(klass);
999 
1000     set_bit(DEVICE_CATEGORY_CPU, dc->categories);
1001     dc->realize = pnv_chip_realize;
1002     dc->props = pnv_chip_properties;
1003     dc->desc = "PowerNV Chip";
1004 }
1005 
1006 static const TypeInfo pnv_chip_info = {
1007     .name          = TYPE_PNV_CHIP,
1008     .parent        = TYPE_SYS_BUS_DEVICE,
1009     .class_init    = pnv_chip_class_init,
1010     .instance_init = pnv_chip_init,
1011     .class_size    = sizeof(PnvChipClass),
1012     .abstract      = true,
1013 };
1014 
1015 static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
1016 {
1017     PnvMachineState *pnv = POWERNV_MACHINE(xi);
1018     int i;
1019 
1020     for (i = 0; i < pnv->num_chips; i++) {
1021         if (ics_valid_irq(&pnv->chips[i]->psi.ics, irq)) {
1022             return &pnv->chips[i]->psi.ics;
1023         }
1024     }
1025     return NULL;
1026 }
1027 
1028 static void pnv_ics_resend(XICSFabric *xi)
1029 {
1030     PnvMachineState *pnv = POWERNV_MACHINE(xi);
1031     int i;
1032 
1033     for (i = 0; i < pnv->num_chips; i++) {
1034         ics_resend(&pnv->chips[i]->psi.ics);
1035     }
1036 }
1037 
1038 static PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
1039 {
1040     CPUState *cs;
1041 
1042     CPU_FOREACH(cs) {
1043         PowerPCCPU *cpu = POWERPC_CPU(cs);
1044         CPUPPCState *env = &cpu->env;
1045 
1046         if (env->spr_cb[SPR_PIR].default_value == pir) {
1047             return cpu;
1048         }
1049     }
1050 
1051     return NULL;
1052 }
1053 
1054 static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
1055 {
1056     PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
1057 
1058     return cpu ? ICP(cpu->intc) : NULL;
1059 }
1060 
1061 static void pnv_pic_print_info(InterruptStatsProvider *obj,
1062                                Monitor *mon)
1063 {
1064     PnvMachineState *pnv = POWERNV_MACHINE(obj);
1065     int i;
1066     CPUState *cs;
1067 
1068     CPU_FOREACH(cs) {
1069         PowerPCCPU *cpu = POWERPC_CPU(cs);
1070 
1071         icp_pic_print_info(ICP(cpu->intc), mon);
1072     }
1073 
1074     for (i = 0; i < pnv->num_chips; i++) {
1075         ics_pic_print_info(&pnv->chips[i]->psi.ics, mon);
1076     }
1077 }
1078 
1079 static void pnv_get_num_chips(Object *obj, Visitor *v, const char *name,
1080                               void *opaque, Error **errp)
1081 {
1082     visit_type_uint32(v, name, &POWERNV_MACHINE(obj)->num_chips, errp);
1083 }
1084 
1085 static void pnv_set_num_chips(Object *obj, Visitor *v, const char *name,
1086                               void *opaque, Error **errp)
1087 {
1088     PnvMachineState *pnv = POWERNV_MACHINE(obj);
1089     uint32_t num_chips;
1090     Error *local_err = NULL;
1091 
1092     visit_type_uint32(v, name, &num_chips, &local_err);
1093     if (local_err) {
1094         error_propagate(errp, local_err);
1095         return;
1096     }
1097 
1098     /*
1099      * TODO: should we decide on how many chips we can create based
1100      * on #cores and Venice vs. Murano vs. Naples chip type etc...,
1101      */
1102     if (!is_power_of_2(num_chips) || num_chips > 4) {
1103         error_setg(errp, "invalid number of chips: '%d'", num_chips);
1104         return;
1105     }
1106 
1107     pnv->num_chips = num_chips;
1108 }
1109 
1110 static void powernv_machine_initfn(Object *obj)
1111 {
1112     PnvMachineState *pnv = POWERNV_MACHINE(obj);
1113     pnv->num_chips = 1;
1114 }
1115 
1116 static void powernv_machine_class_props_init(ObjectClass *oc)
1117 {
1118     object_class_property_add(oc, "num-chips", "uint32",
1119                               pnv_get_num_chips, pnv_set_num_chips,
1120                               NULL, NULL, NULL);
1121     object_class_property_set_description(oc, "num-chips",
1122                               "Specifies the number of processor chips",
1123                               NULL);
1124 }
1125 
1126 static void powernv_machine_class_init(ObjectClass *oc, void *data)
1127 {
1128     MachineClass *mc = MACHINE_CLASS(oc);
1129     XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
1130     InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
1131 
1132     mc->desc = "IBM PowerNV (Non-Virtualized)";
1133     mc->init = ppc_powernv_init;
1134     mc->reset = ppc_powernv_reset;
1135     mc->max_cpus = MAX_CPUS;
1136     mc->block_default_type = IF_IDE; /* Pnv provides a AHCI device for
1137                                       * storage */
1138     mc->no_parallel = 1;
1139     mc->default_boot_order = NULL;
1140     mc->default_ram_size = 1 * G_BYTE;
1141     xic->icp_get = pnv_icp_get;
1142     xic->ics_get = pnv_ics_get;
1143     xic->ics_resend = pnv_ics_resend;
1144     ispc->print_info = pnv_pic_print_info;
1145 
1146     powernv_machine_class_props_init(oc);
1147 }
1148 
1149 static const TypeInfo powernv_machine_info = {
1150     .name          = TYPE_POWERNV_MACHINE,
1151     .parent        = TYPE_MACHINE,
1152     .instance_size = sizeof(PnvMachineState),
1153     .instance_init = powernv_machine_initfn,
1154     .class_init    = powernv_machine_class_init,
1155     .interfaces = (InterfaceInfo[]) {
1156         { TYPE_XICS_FABRIC },
1157         { TYPE_INTERRUPT_STATS_PROVIDER },
1158         { },
1159     },
1160 };
1161 
1162 static void powernv_machine_register_types(void)
1163 {
1164     type_register_static(&powernv_machine_info);
1165     type_register_static(&pnv_chip_info);
1166     type_register_static(&pnv_chip_power8e_info);
1167     type_register_static(&pnv_chip_power8_info);
1168     type_register_static(&pnv_chip_power8nvl_info);
1169     type_register_static(&pnv_chip_power9_info);
1170 }
1171 
1172 type_init(powernv_machine_register_types)
1173