1 /* Copyright 2013-2016 IBM Corp.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 * implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <skiboot.h>
18 #include <psi.h>
19 #include <chiptod.h>
20 #include <nx.h>
21 #include <cpu.h>
22 #include <processor.h>
23 #include <xscom.h>
24 #include <opal.h>
25 #include <opal-msg.h>
26 #include <elf.h>
27 #include <io.h>
28 #include <cec.h>
29 #include <device.h>
30 #include <pci.h>
31 #include <lpc.h>
32 #include <i2c.h>
33 #include <chip.h>
34 #include <interrupts.h>
35 #include <mem_region.h>
36 #include <trace.h>
37 #include <console.h>
38 #include <fsi-master.h>
39 #include <centaur.h>
40 #include <libfdt/libfdt.h>
41 #include <timer.h>
42 #include <ipmi.h>
43 #include <sensor.h>
44 #include <xive.h>
45 #include <nvram.h>
46 #include <vas.h>
47 #include <libstb/secureboot.h>
48 #include <libstb/trustedboot.h>
49 #include <phys-map.h>
50 #include <imc.h>
51 #include <dts.h>
52 #include <dio-p9.h>
53 #include <sbe-p9.h>
54 #include <debug_descriptor.h>
55 #include <occ.h>
56
57 enum proc_gen proc_gen;
58 unsigned int pcie_max_link_speed;
59 bool verbose_eeh;
60
61 static uint64_t kernel_entry;
62 static size_t kernel_size;
63 static bool kernel_32bit;
64
65 /* We backup the previous vectors here before copying our own */
66 static uint8_t old_vectors[EXCEPTION_VECTORS_END];
67
68 #ifdef SKIBOOT_GCOV
69 void skiboot_gcov_done(void);
70 #endif
71
72 struct debug_descriptor debug_descriptor = {
73 .eye_catcher = "OPALdbug",
74 .version = DEBUG_DESC_VERSION,
75 .state_flags = 0,
76 .memcons_phys = (uint64_t)&memcons,
77 .trace_mask = 0, /* All traces disabled by default */
78 /* console log level:
79 * high 4 bits in memory, low 4 bits driver (e.g. uart). */
80 #ifdef DEBUG
81 .console_log_levels = (PR_TRACE << 4) | PR_DEBUG,
82 #else
83 .console_log_levels = (PR_DEBUG << 4) | PR_NOTICE,
84 #endif
85 };
86
try_load_elf64_le(struct elf_hdr * header)87 static bool try_load_elf64_le(struct elf_hdr *header)
88 {
89 struct elf64_hdr *kh = (struct elf64_hdr *)header;
90 uint64_t load_base = (uint64_t)kh;
91 struct elf64_phdr *ph;
92 unsigned int i;
93
94 printf("INIT: 64-bit LE kernel discovered\n");
95
96 /* Look for a loadable program header that has our entry in it
97 *
98 * Note that we execute the kernel in-place, we don't actually
99 * obey the load informations in the headers. This is expected
100 * to work for the Linux Kernel because it's a fairly dumb ELF
101 * but it will not work for any ELF binary.
102 */
103 ph = (struct elf64_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
104 for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
105 if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
106 continue;
107 if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) ||
108 (le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) <
109 le64_to_cpu(kh->e_entry))
110 continue;
111
112 /* Get our entry */
113 kernel_entry = le64_to_cpu(kh->e_entry) -
114 le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset);
115 break;
116 }
117
118 if (!kernel_entry) {
119 prerror("INIT: Failed to find kernel entry !\n");
120 return false;
121 }
122 kernel_entry += load_base;
123 kernel_32bit = false;
124
125 kernel_size = le64_to_cpu(kh->e_shoff) +
126 ((uint32_t)le16_to_cpu(kh->e_shentsize) *
127 (uint32_t)le16_to_cpu(kh->e_shnum));
128
129 prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
130 kernel_entry, kernel_size);
131
132 return true;
133 }
134
try_load_elf64(struct elf_hdr * header)135 static bool try_load_elf64(struct elf_hdr *header)
136 {
137 struct elf64_hdr *kh = (struct elf64_hdr *)header;
138 uint64_t load_base = (uint64_t)kh;
139 struct elf64_phdr *ph;
140 struct elf64_shdr *sh;
141 unsigned int i;
142
143 /* Check it's a ppc64 LE ELF */
144 if (kh->ei_ident == ELF_IDENT &&
145 kh->ei_data == ELF_DATA_LSB &&
146 kh->e_machine == le16_to_cpu(ELF_MACH_PPC64)) {
147 return try_load_elf64_le(header);
148 }
149
150 /* Check it's a ppc64 ELF */
151 if (kh->ei_ident != ELF_IDENT ||
152 kh->ei_data != ELF_DATA_MSB ||
153 kh->e_machine != ELF_MACH_PPC64) {
154 prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
155 return false;
156 }
157
158 /* Look for a loadable program header that has our entry in it
159 *
160 * Note that we execute the kernel in-place, we don't actually
161 * obey the load informations in the headers. This is expected
162 * to work for the Linux Kernel because it's a fairly dumb ELF
163 * but it will not work for any ELF binary.
164 */
165 ph = (struct elf64_phdr *)(load_base + kh->e_phoff);
166 for (i = 0; i < kh->e_phnum; i++, ph++) {
167 if (ph->p_type != ELF_PTYPE_LOAD)
168 continue;
169 if (ph->p_vaddr > kh->e_entry ||
170 (ph->p_vaddr + ph->p_memsz) < kh->e_entry)
171 continue;
172
173 /* Get our entry */
174 kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset;
175 break;
176 }
177
178 if (!kernel_entry) {
179 prerror("INIT: Failed to find kernel entry !\n");
180 return false;
181 }
182
183 /* For the normal big-endian ELF ABI, the kernel entry points
184 * to a function descriptor in the data section. Linux instead
185 * has it point directly to code. Test whether it is pointing
186 * into an executable section or not to figure this out. Default
187 * to assuming it obeys the ABI.
188 */
189 sh = (struct elf64_shdr *)(load_base + kh->e_shoff);
190 for (i = 0; i < kh->e_shnum; i++, sh++) {
191 if (sh->sh_addr <= kh->e_entry &&
192 (sh->sh_addr + sh->sh_size) > kh->e_entry)
193 break;
194 }
195
196 if (i == kh->e_shnum || !(sh->sh_flags & ELF_SFLAGS_X)) {
197 kernel_entry = *(uint64_t *)(kernel_entry + load_base);
198 kernel_entry = kernel_entry - ph->p_vaddr + ph->p_offset;
199 }
200
201 kernel_entry += load_base;
202 kernel_32bit = false;
203
204 kernel_size = kh->e_shoff +
205 ((uint32_t)kh->e_shentsize * (uint32_t)kh->e_shnum);
206
207 printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n",
208 kernel_entry, kernel_size);
209
210 return true;
211 }
212
try_load_elf32_le(struct elf_hdr * header)213 static bool try_load_elf32_le(struct elf_hdr *header)
214 {
215 struct elf32_hdr *kh = (struct elf32_hdr *)header;
216 uint64_t load_base = (uint64_t)kh;
217 struct elf32_phdr *ph;
218 unsigned int i;
219
220 printf("INIT: 32-bit LE kernel discovered\n");
221
222 /* Look for a loadable program header that has our entry in it
223 *
224 * Note that we execute the kernel in-place, we don't actually
225 * obey the load informations in the headers. This is expected
226 * to work for the Linux Kernel because it's a fairly dumb ELF
227 * but it will not work for any ELF binary.
228 */
229 ph = (struct elf32_phdr *)(load_base + le32_to_cpu(kh->e_phoff));
230 for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
231 if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
232 continue;
233 if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) ||
234 (le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) <
235 le32_to_cpu(kh->e_entry))
236 continue;
237
238 /* Get our entry */
239 kernel_entry = le32_to_cpu(kh->e_entry) -
240 le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset);
241 break;
242 }
243
244 if (!kernel_entry) {
245 prerror("INIT: Failed to find kernel entry !\n");
246 return false;
247 }
248
249 kernel_entry += load_base;
250 kernel_32bit = true;
251
252 printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
253
254 return true;
255 }
256
try_load_elf32(struct elf_hdr * header)257 static bool try_load_elf32(struct elf_hdr *header)
258 {
259 struct elf32_hdr *kh = (struct elf32_hdr *)header;
260 uint64_t load_base = (uint64_t)kh;
261 struct elf32_phdr *ph;
262 unsigned int i;
263
264 /* Check it's a ppc32 LE ELF */
265 if (header->ei_ident == ELF_IDENT &&
266 header->ei_data == ELF_DATA_LSB &&
267 header->e_machine == le16_to_cpu(ELF_MACH_PPC32)) {
268 return try_load_elf32_le(header);
269 }
270
271 /* Check it's a ppc32 ELF */
272 if (header->ei_ident != ELF_IDENT ||
273 header->ei_data != ELF_DATA_MSB ||
274 header->e_machine != ELF_MACH_PPC32) {
275 prerror("INIT: Kernel doesn't look like an ppc32 ELF\n");
276 return false;
277 }
278
279 /* Look for a loadable program header that has our entry in it
280 *
281 * Note that we execute the kernel in-place, we don't actually
282 * obey the load informations in the headers. This is expected
283 * to work for the Linux Kernel because it's a fairly dumb ELF
284 * but it will not work for any ELF binary.
285 */
286 ph = (struct elf32_phdr *)(load_base + kh->e_phoff);
287 for (i = 0; i < kh->e_phnum; i++, ph++) {
288 if (ph->p_type != ELF_PTYPE_LOAD)
289 continue;
290 if (ph->p_vaddr > kh->e_entry ||
291 (ph->p_vaddr + ph->p_memsz) < kh->e_entry)
292 continue;
293
294 /* Get our entry */
295 kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset;
296 break;
297 }
298
299 if (!kernel_entry) {
300 prerror("INIT: Failed to find kernel entry !\n");
301 return false;
302 }
303
304 kernel_entry += load_base;
305 kernel_32bit = true;
306
307 printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
308
309 return true;
310 }
311
312 extern char __builtin_kernel_start[];
313 extern char __builtin_kernel_end[];
314 extern uint64_t boot_offset;
315
316 static size_t initramfs_size;
317
start_preload_kernel(void)318 bool start_preload_kernel(void)
319 {
320 int loaded;
321
322 /* Try to load an external kernel payload through the platform hooks */
323 kernel_size = KERNEL_LOAD_SIZE;
324 loaded = start_preload_resource(RESOURCE_ID_KERNEL,
325 RESOURCE_SUBID_NONE,
326 KERNEL_LOAD_BASE,
327 &kernel_size);
328 if (loaded != OPAL_SUCCESS) {
329 printf("INIT: platform start load kernel failed\n");
330 kernel_size = 0;
331 return false;
332 }
333
334 initramfs_size = INITRAMFS_LOAD_SIZE;
335 loaded = start_preload_resource(RESOURCE_ID_INITRAMFS,
336 RESOURCE_SUBID_NONE,
337 INITRAMFS_LOAD_BASE, &initramfs_size);
338 if (loaded != OPAL_SUCCESS) {
339 printf("INIT: platform start load initramfs failed\n");
340 initramfs_size = 0;
341 return false;
342 }
343
344 return true;
345 }
346
load_kernel(void)347 static bool load_kernel(void)
348 {
349 void *stb_container = NULL;
350 struct elf_hdr *kh;
351 int loaded;
352
353 prlog(PR_NOTICE, "INIT: Waiting for kernel...\n");
354
355 loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,
356 RESOURCE_SUBID_NONE);
357
358 if (loaded != OPAL_SUCCESS) {
359 printf("INIT: platform wait for kernel load failed\n");
360 kernel_size = 0;
361 }
362
363 /* Try embedded kernel payload */
364 if (!kernel_size) {
365 kernel_size = __builtin_kernel_end - __builtin_kernel_start;
366 if (kernel_size) {
367 /* Move the built-in kernel up */
368 uint64_t builtin_base =
369 ((uint64_t)__builtin_kernel_start) -
370 SKIBOOT_BASE + boot_offset;
371 printf("Using built-in kernel\n");
372 memmove(KERNEL_LOAD_BASE, (void*)builtin_base,
373 kernel_size);
374 }
375 }
376
377 if (dt_has_node_property(dt_chosen, "kernel-base-address", NULL)) {
378 kernel_entry = dt_prop_get_u64(dt_chosen,
379 "kernel-base-address");
380 prlog(PR_DEBUG, "INIT: Kernel image at 0x%llx\n", kernel_entry);
381 kh = (struct elf_hdr *)kernel_entry;
382 /*
383 * If the kernel is at 0, restore it as it was overwritten
384 * by our vectors.
385 */
386 if (kernel_entry < EXCEPTION_VECTORS_END) {
387 cpu_set_sreset_enable(false);
388 memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);
389 sync_icache();
390 } else {
391 /* Hack for STB in Mambo, assume at least 4kb in mem */
392 if (!kernel_size)
393 kernel_size = SECURE_BOOT_HEADERS_SIZE;
394 if (stb_is_container((void*)kernel_entry, kernel_size)) {
395 stb_container = (void*)kernel_entry;
396 kh = (struct elf_hdr *) (kernel_entry + SECURE_BOOT_HEADERS_SIZE);
397 } else
398 kh = (struct elf_hdr *) (kernel_entry);
399 }
400 } else {
401 if (!kernel_size) {
402 printf("INIT: Assuming kernel at %p\n",
403 KERNEL_LOAD_BASE);
404 /* Hack for STB in Mambo, assume at least 4kb in mem */
405 kernel_size = SECURE_BOOT_HEADERS_SIZE;
406 kernel_entry = (uint64_t)KERNEL_LOAD_BASE;
407 }
408 if (stb_is_container(KERNEL_LOAD_BASE, kernel_size)) {
409 stb_container = KERNEL_LOAD_BASE;
410 kh = (struct elf_hdr *) (KERNEL_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE);
411 } else
412 kh = (struct elf_hdr *) (KERNEL_LOAD_BASE);
413
414 }
415
416 prlog(PR_DEBUG,
417 "INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
418 kernel_size);
419
420 if (kh->ei_ident != ELF_IDENT) {
421 prerror("INIT: ELF header not found. Assuming raw binary.\n");
422 return true;
423 }
424
425 if (kh->ei_class == ELF_CLASS_64) {
426 if (!try_load_elf64(kh))
427 return false;
428 } else if (kh->ei_class == ELF_CLASS_32) {
429 if (!try_load_elf32(kh))
430 return false;
431 } else {
432 prerror("INIT: Neither ELF32 not ELF64 ?\n");
433 return false;
434 }
435
436 if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
437 secureboot_verify(RESOURCE_ID_KERNEL,
438 stb_container,
439 SECURE_BOOT_HEADERS_SIZE + kernel_size);
440 trustedboot_measure(RESOURCE_ID_KERNEL,
441 stb_container,
442 SECURE_BOOT_HEADERS_SIZE + kernel_size);
443 }
444
445 return true;
446 }
447
load_initramfs(void)448 static void load_initramfs(void)
449 {
450 uint64_t *initramfs_start;
451 void *stb_container = NULL;
452 int loaded;
453
454 loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,
455 RESOURCE_SUBID_NONE);
456
457 if (loaded != OPAL_SUCCESS || !initramfs_size)
458 return;
459
460 if (stb_is_container(INITRAMFS_LOAD_BASE, initramfs_size)) {
461 stb_container = INITRAMFS_LOAD_BASE;
462 initramfs_start = INITRAMFS_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE;
463 } else {
464 initramfs_start = INITRAMFS_LOAD_BASE;
465 }
466
467 dt_check_del_prop(dt_chosen, "linux,initrd-start");
468 dt_check_del_prop(dt_chosen, "linux,initrd-end");
469
470 printf("INIT: Initramfs loaded, size: %zu bytes\n", initramfs_size);
471
472 dt_add_property_u64(dt_chosen, "linux,initrd-start",
473 (uint64_t)initramfs_start);
474 dt_add_property_u64(dt_chosen, "linux,initrd-end",
475 (uint64_t)initramfs_start + initramfs_size);
476
477 if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
478 secureboot_verify(RESOURCE_ID_INITRAMFS,
479 stb_container,
480 SECURE_BOOT_HEADERS_SIZE + initramfs_size);
481 trustedboot_measure(RESOURCE_ID_INITRAMFS,
482 stb_container,
483 SECURE_BOOT_HEADERS_SIZE + initramfs_size);
484 }
485 }
486
cpu_disable_ME_RI_one(void * param __unused)487 static void cpu_disable_ME_RI_one(void *param __unused)
488 {
489 disable_machine_check();
490 mtmsrd(0, 1);
491 }
492
cpu_disable_ME_RI_all(void)493 static int64_t cpu_disable_ME_RI_all(void)
494 {
495 struct cpu_thread *cpu;
496 struct cpu_job **jobs;
497
498 jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
499 assert(jobs);
500
501 for_each_available_cpu(cpu) {
502 if (cpu == this_cpu())
503 continue;
504 jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI",
505 cpu_disable_ME_RI_one, NULL);
506 }
507
508 /* this cpu */
509 cpu_disable_ME_RI_one(NULL);
510
511 for_each_available_cpu(cpu) {
512 if (jobs[cpu->pir])
513 cpu_wait_job(jobs[cpu->pir], true);
514 }
515
516 free(jobs);
517
518 return OPAL_SUCCESS;
519 }
520
521 void *fdt;
522
load_and_boot_kernel(bool is_reboot)523 void __noreturn load_and_boot_kernel(bool is_reboot)
524 {
525 const struct dt_property *memprop;
526 const char *cmdline, *stdoutp;
527 uint64_t mem_top;
528
529 memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
530 if (memprop)
531 mem_top = (u64)dt_property_get_cell(memprop, 0) << 32
532 | dt_property_get_cell(memprop, 1);
533 else /* XXX HB hack, might want to calc it */
534 mem_top = 0x40000000;
535
536 op_display(OP_LOG, OP_MOD_INIT, 0x000A);
537
538 /* Load kernel LID */
539 if (!load_kernel()) {
540 op_display(OP_FATAL, OP_MOD_INIT, 1);
541 abort();
542 }
543
544 load_initramfs();
545
546 trustedboot_exit_boot_services();
547
548 ipmi_set_fw_progress_sensor(IPMI_FW_OS_BOOT);
549
550
551 if (!is_reboot) {
552 /* We wait for the nvram read to complete here so we can
553 * grab stuff from there such as the kernel arguments
554 */
555 nvram_wait_for_load();
556
557 if (!occ_sensors_init())
558 dts_sensor_create_nodes(sensor_node);
559
560 } else {
561 /* fdt will be rebuilt */
562 free(fdt);
563 fdt = NULL;
564
565 nvram_reinit();
566 occ_pstates_init();
567 }
568
569 /* Use nvram bootargs over device tree */
570 cmdline = nvram_query_safe("bootargs");
571 if (cmdline) {
572 dt_check_del_prop(dt_chosen, "bootargs");
573 dt_add_property_string(dt_chosen, "bootargs", cmdline);
574 prlog(PR_DEBUG, "INIT: Command line from NVRAM: %s\n",
575 cmdline);
576 }
577
578 op_display(OP_LOG, OP_MOD_INIT, 0x000B);
579
580 add_fast_reboot_dt_entries();
581
582 if (platform.finalise_dt)
583 platform.finalise_dt(is_reboot);
584
585 /* Create the device tree blob to boot OS. */
586 fdt = create_dtb(dt_root, false);
587 if (!fdt) {
588 op_display(OP_FATAL, OP_MOD_INIT, 2);
589 abort();
590 }
591
592 op_display(OP_LOG, OP_MOD_INIT, 0x000C);
593
594 mem_dump_free();
595
596 /* Dump the selected console */
597 stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
598 prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
599
600 fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
601
602 /* Check there is something there before we branch to it */
603 if (*(uint32_t *)kernel_entry == 0) {
604 prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n");
605 assert(0);
606 }
607
608 if (platform.exit)
609 platform.exit();
610
611 /* Take processors out of nap */
612 cpu_set_sreset_enable(false);
613 cpu_set_ipi_enable(false);
614
615 printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n",
616 kernel_entry, fdt, fdt_totalsize(fdt));
617
618 /* Disable machine checks on all */
619 cpu_disable_ME_RI_all();
620
621 debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
622
623 cpu_give_self_os();
624
625 if (kernel_32bit)
626 start_kernel32(kernel_entry, fdt, mem_top);
627 start_kernel(kernel_entry, fdt, mem_top);
628 }
629
dt_fixups(void)630 static void dt_fixups(void)
631 {
632 struct dt_node *n;
633 struct dt_node *primary_lpc = NULL;
634
635 /* lpc node missing #address/size cells. Also pick one as
636 * primary for now (TBD: How to convey that from HB)
637 */
638 dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") {
639 if (!primary_lpc || dt_has_node_property(n, "primary", NULL))
640 primary_lpc = n;
641 if (dt_has_node_property(n, "#address-cells", NULL))
642 break;
643 dt_add_property_cells(n, "#address-cells", 2);
644 dt_add_property_cells(n, "#size-cells", 1);
645 dt_add_property_strings(n, "status", "ok");
646 }
647
648 /* Missing "primary" property in LPC bus */
649 if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL))
650 dt_add_property(primary_lpc, "primary", NULL, 0);
651
652 /* Missing "scom-controller" */
653 dt_for_each_compatible(dt_root, n, "ibm,xscom") {
654 if (!dt_has_node_property(n, "scom-controller", NULL))
655 dt_add_property(n, "scom-controller", NULL, 0);
656 }
657 }
658
add_arch_vector(void)659 static void add_arch_vector(void)
660 {
661 /**
662 * vec5 = a PVR-list : Number-of-option-vectors :
663 * option-vectors[Number-of-option-vectors + 1]
664 */
665 uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00};
666
667 if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL))
668 return;
669
670 dt_add_property(dt_chosen, "ibm,architecture-vec-5",
671 vec5, sizeof(vec5));
672 }
673
dt_init_misc(void)674 static void dt_init_misc(void)
675 {
676 /* Check if there's a /chosen node, if not, add one */
677 dt_chosen = dt_find_by_path(dt_root, "/chosen");
678 if (!dt_chosen)
679 dt_chosen = dt_new(dt_root, "chosen");
680 assert(dt_chosen);
681
682 /* Add IBM architecture vectors if needed */
683 add_arch_vector();
684
685 /* Add the "OPAL virtual ICS*/
686 add_ics_node();
687
688 /* Additional fixups. TODO: Move into platform */
689 dt_fixups();
690 }
691
console_get_level(const char * s)692 static u8 console_get_level(const char *s)
693 {
694 if (strcmp(s, "emerg") == 0)
695 return PR_EMERG;
696 if (strcmp(s, "alert") == 0)
697 return PR_ALERT;
698 if (strcmp(s, "crit") == 0)
699 return PR_CRIT;
700 if (strcmp(s, "err") == 0)
701 return PR_ERR;
702 if (strcmp(s, "warning") == 0)
703 return PR_WARNING;
704 if (strcmp(s, "notice") == 0)
705 return PR_NOTICE;
706 if (strcmp(s, "printf") == 0)
707 return PR_PRINTF;
708 if (strcmp(s, "info") == 0)
709 return PR_INFO;
710 if (strcmp(s, "debug") == 0)
711 return PR_DEBUG;
712 if (strcmp(s, "trace") == 0)
713 return PR_TRACE;
714 if (strcmp(s, "insane") == 0)
715 return PR_INSANE;
716 /* Assume it's a number instead */
717 return atoi(s);
718 }
719
console_log_level(void)720 static void console_log_level(void)
721 {
722 const char *s;
723 u8 level;
724
725 /* console log level:
726 * high 4 bits in memory, low 4 bits driver (e.g. uart). */
727 s = nvram_query_safe("log-level-driver");
728 if (s) {
729 level = console_get_level(s);
730 debug_descriptor.console_log_levels =
731 (debug_descriptor.console_log_levels & 0xf0 ) |
732 (level & 0x0f);
733 prlog(PR_NOTICE, "console: Setting driver log level to %i\n",
734 level & 0x0f);
735 }
736 s = nvram_query_safe("log-level-memory");
737 if (s) {
738 level = console_get_level(s);
739 debug_descriptor.console_log_levels =
740 (debug_descriptor.console_log_levels & 0x0f ) |
741 ((level & 0x0f) << 4);
742 prlog(PR_NOTICE, "console: Setting memory log level to %i\n",
743 level & 0x0f);
744 }
745 }
746
747 typedef void (*ctorcall_t)(void);
748
do_ctors(void)749 static void __nomcount do_ctors(void)
750 {
751 extern ctorcall_t __ctors_start[], __ctors_end[];
752 ctorcall_t *call;
753
754 for (call = __ctors_start; call < __ctors_end; call++)
755 (*call)();
756 }
757
758 #ifndef PPC64_ELF_ABI_v2
branch_null(void)759 static void branch_null(void)
760 {
761 assert_fail("Branch to NULL !");
762 }
763
764
setup_branch_null_catcher(void)765 static void setup_branch_null_catcher(void)
766 {
767 void (*bn)(void) = branch_null;
768
769 /*
770 * FIXME: This copies the function descriptor (16 bytes) for
771 * ABI v1 (ie. big endian). This will be broken if we ever
772 * move to ABI v2 (ie little endian)
773 */
774 memcpy_null(0, bn, 16);
775 }
776 #else
setup_branch_null_catcher(void)777 static void setup_branch_null_catcher(void)
778 {
779 }
780 #endif
781
copy_sreset_vector(void)782 void copy_sreset_vector(void)
783 {
784 uint32_t *src, *dst;
785
786 /* Copy the reset code over the entry point. */
787 src = &reset_patch_start;
788 dst = (uint32_t *)0x100;
789 while(src < &reset_patch_end)
790 *(dst++) = *(src++);
791 sync_icache();
792 }
793
copy_sreset_vector_fast_reboot(void)794 void copy_sreset_vector_fast_reboot(void)
795 {
796 uint32_t *src, *dst;
797
798 /* Copy the reset code over the entry point. */
799 src = &reset_fast_reboot_patch_start;
800 dst = (uint32_t *)0x100;
801 while(src < &reset_fast_reboot_patch_end)
802 *(dst++) = *(src++);
803 sync_icache();
804 }
805
copy_exception_vectors(void)806 void copy_exception_vectors(void)
807 {
808 /* Backup previous vectors as this could contain a kernel
809 * image.
810 */
811 memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);
812
813 /* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as
814 * this is the boot flag used by CPUs still potentially entering
815 * skiboot.
816 */
817 BUILD_ASSERT((&reset_patch_end - &reset_patch_start) <
818 EXCEPTION_VECTORS_END - 0x100);
819 memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100),
820 EXCEPTION_VECTORS_END - 0x100);
821 sync_icache();
822 }
823
per_thread_sanity_checks(void)824 static void per_thread_sanity_checks(void)
825 {
826 struct cpu_thread *cpu = this_cpu();
827
828 /**
829 * @fwts-label NonZeroHRMOR
830 * @fwts-advice The contents of the hypervisor real mode offset register
831 * (HRMOR) is bitwise orded with the address of any hypervisor real mode
832 * (i.e Skiboot) memory accesses. Skiboot does not support operating
833 * with a non-zero HRMOR and setting it will break some things (e.g
834 * XSCOMs) in hard-to-debug ways.
835 */
836 assert(mfspr(SPR_HRMOR) == 0);
837
838 /**
839 * @fwts-label UnknownSecondary
840 * @fwts-advice The boot CPU attampted to call in a secondary thread
841 * without initialising the corresponding cpu_thread structure. This may
842 * happen if the HDAT or devicetree reports too few threads or cores for
843 * this processor.
844 */
845 assert(cpu->state != cpu_state_no_cpu);
846 }
847
pci_nvram_init(void)848 static void pci_nvram_init(void)
849 {
850 const char *nvram_speed;
851
852 verbose_eeh = nvram_query_eq_safe("pci-eeh-verbose", "true");
853 if (verbose_eeh)
854 prlog(PR_INFO, "PHB: Verbose EEH enabled\n");
855
856 pcie_max_link_speed = 0;
857
858 nvram_speed = nvram_query_dangerous("pcie-max-link-speed");
859 if (nvram_speed) {
860 pcie_max_link_speed = atoi(nvram_speed);
861 prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n",
862 pcie_max_link_speed);
863 }
864 }
865
mem_csum(void * _p,void * _e)866 static uint32_t mem_csum(void *_p, void *_e)
867 {
868 size_t len = _e - _p;
869 uint32_t *p = _p;
870 uint32_t v1 = 0, v2 = 0;
871 uint32_t csum;
872 unsigned int i;
873
874 for (i = 0; i < len; i += 4) {
875 uint32_t v = *p++;
876 v1 += v;
877 v2 += v1;
878 }
879
880 csum = v1 ^ v2;
881
882 return csum;
883 }
884
885 static uint32_t romem_csum;
886
checksum_romem(void)887 static void checksum_romem(void)
888 {
889 uint32_t csum;
890
891 romem_csum = 0;
892 if (chip_quirk(QUIRK_SLOW_SIM))
893 return;
894
895 csum = mem_csum(_start, _romem_end);
896 romem_csum ^= csum;
897 csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);
898 romem_csum ^= csum;
899 }
900
verify_romem(void)901 bool verify_romem(void)
902 {
903 uint32_t old = romem_csum;
904 checksum_romem();
905 if (old != romem_csum) {
906 romem_csum = old;
907 prlog(PR_NOTICE, "OPAL checksums did not match\n");
908 return false;
909 }
910 return true;
911 }
912
913 /* Called from head.S, thus no prototype. */
914 void main_cpu_entry(const void *fdt);
915
main_cpu_entry(const void * fdt)916 void __noreturn __nomcount main_cpu_entry(const void *fdt)
917 {
918 /*
919 * WARNING: At this point. the timebases have
920 * *not* been synchronized yet. Do not use any timebase
921 * related functions for timeouts etc... unless you can cope
922 * with the speed being some random core clock divider and
923 * the value jumping backward when the synchronization actually
924 * happens (in chiptod_init() below).
925 *
926 * Also the current cpu_thread() struct is not initialized
927 * either so we need to clear it out first thing first (without
928 * putting any other useful info in there jus yet) otherwise
929 * printf an locks are going to play funny games with "con_suspend"
930 */
931 pre_init_boot_cpu();
932
933 /*
934 * Before first printk, ensure console buffer is clear or
935 * reading tools might think it has wrapped
936 */
937 clear_console();
938
939 /*
940 * Some boot firmwares enter OPAL with MSR[ME]=1, as they presumably
941 * handle machine checks until we take over. As we overwrite the
942 * previous exception vectors with our own handlers, disable MSR[ME].
943 * This could be done atomically by patching in a branch then patching
944 * it out last, but that's a lot of effort.
945 */
946 disable_machine_check();
947
948 /* Copy all vectors down to 0 */
949 copy_exception_vectors();
950
951 /*
952 * Enable MSR[ME] bit so we can take MCEs. We don't currently
953 * recover, but we print some useful information.
954 */
955 enable_machine_check();
956 mtmsrd(MSR_RI, 1);
957
958 /* Setup a NULL catcher to catch accidental NULL ptr calls */
959 setup_branch_null_catcher();
960
961 /* Call library constructors */
962 do_ctors();
963
964 prlog(PR_NOTICE, "OPAL %s%s starting...\n", version,
965 #ifdef DEBUG
966 "-debug"
967 #else
968 ""
969 #endif
970 );
971 prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n",
972 (debug_descriptor.console_log_levels >> 4),
973 (debug_descriptor.console_log_levels & 0x0f));
974 prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n");
975
976 #ifdef SKIBOOT_GCOV
977 skiboot_gcov_done();
978 #endif
979
980 /* Initialize boot cpu's cpu_thread struct */
981 init_boot_cpu();
982
983 /* Now locks can be used */
984 init_locks();
985
986 /* Create the OPAL call table early on, entries can be overridden
987 * later on (FSP console code for example)
988 */
989 opal_table_init();
990
991 /* Init the physical map table so we can start mapping things */
992 phys_map_init();
993
994 /*
995 * If we are coming in with a flat device-tree, we expand it
996 * now. Else look for HDAT and create a device-tree from them
997 *
998 * Hack alert: When entering via the OPAL entry point, fdt
999 * is set to -1, we record that and pass it to parse_hdat
1000 */
1001
1002 dt_root = dt_new_root("");
1003
1004 if (fdt == (void *)-1ul) {
1005 if (parse_hdat(true) < 0)
1006 abort();
1007 } else if (fdt == NULL) {
1008 if (parse_hdat(false) < 0)
1009 abort();
1010 } else {
1011 dt_expand(fdt);
1012 }
1013 dt_add_cpufeatures(dt_root);
1014
1015 /* Now that we have a full devicetree, verify that we aren't on fire. */
1016 per_thread_sanity_checks();
1017
1018 /*
1019 * From there, we follow a fairly strict initialization order.
1020 *
1021 * First we need to build up our chip data structures and initialize
1022 * XSCOM which will be needed for a number of susbequent things.
1023 *
1024 * We want XSCOM available as early as the platform probe in case the
1025 * probe requires some HW accesses.
1026 *
1027 * We also initialize the FSI master at that point in case we need
1028 * to access chips via that path early on.
1029 */
1030 init_chips();
1031
1032 xscom_init();
1033 mfsi_init();
1034
1035 /*
1036 * Direct controls facilities provides some controls over CPUs
1037 * using scoms.
1038 */
1039 direct_controls_init();
1040
1041 /*
1042 * Put various bits & pieces in device-tree that might not
1043 * already be there such as the /chosen node if not there yet,
1044 * the ICS node, etc... This can potentially use XSCOM
1045 */
1046 dt_init_misc();
1047
1048 /*
1049 * Initialize LPC (P8 and beyond) so we can get to UART, BMC and
1050 * other system controller. This is done before probe_platform
1051 * so that the platform probing code can access an external
1052 * BMC if needed.
1053 */
1054 lpc_init();
1055
1056 /*
1057 * This should be done before mem_region_init, so the stack
1058 * region length can be set according to the maximum PIR.
1059 */
1060 init_cpu_max_pir();
1061
1062 /*
1063 * Now, we init our memory map from the device-tree, and immediately
1064 * reserve areas which we know might contain data coming from
1065 * HostBoot. We need to do these things before we start doing
1066 * allocations outside of our heap, such as chip local allocs,
1067 * otherwise we might clobber those data.
1068 */
1069 mem_region_init();
1070
1071 /* Reserve HOMER and OCC area */
1072 homer_init();
1073
1074 /* Initialize the rest of the cpu thread structs */
1075 init_all_cpus();
1076 if (proc_gen == proc_gen_p9)
1077 cpu_set_ipi_enable(true);
1078
1079 /* Add the /opal node to the device-tree */
1080 add_opal_node();
1081
1082 /*
1083 * We probe the platform now. This means the platform probe gets
1084 * the opportunity to reserve additional areas of memory if needed.
1085 *
1086 * Note: Timebases still not synchronized.
1087 */
1088 probe_platform();
1089
1090 /* Allocate our split trace buffers now. Depends add_opal_node() */
1091 init_trace_buffers();
1092
1093 /* On P8, get the ICPs and make sure they are in a sane state */
1094 init_interrupts();
1095 if (proc_gen == proc_gen_p8)
1096 cpu_set_ipi_enable(true);
1097
1098 /* On P9, initialize XIVE */
1099 init_xive();
1100
1101 /* Grab centaurs from device-tree if present (only on FSP-less) */
1102 centaur_init();
1103
1104 /* Initialize PSI (depends on probe_platform being called) */
1105 psi_init();
1106
1107 /* Initialize/enable LPC interrupts. This must be done after the
1108 * PSI interface has been initialized since it serves as an interrupt
1109 * source for LPC interrupts.
1110 */
1111 lpc_init_interrupts();
1112
1113 /* Call in secondary CPUs */
1114 cpu_bringup();
1115
1116 /* We can now overwrite the 0x100 vector as we are no longer being
1117 * entered there.
1118 */
1119 copy_sreset_vector();
1120
1121 /* We can now do NAP mode */
1122 cpu_set_sreset_enable(true);
1123
1124 /*
1125 * Synchronize time bases. Prior to chiptod_init() the timebase
1126 * is free-running at a frequency based on the core clock rather
1127 * than being synchronised to the ChipTOD network. This means
1128 * that the timestamps in early boot might be a little off compared
1129 * to wall clock time.
1130 */
1131 chiptod_init();
1132
1133 /* Initialize P9 DIO */
1134 p9_dio_init();
1135
1136 /*
1137 * SBE uses TB value for scheduling timer. Hence init after
1138 * chiptod init
1139 */
1140 p9_sbe_init();
1141
1142 /* Initialize i2c */
1143 p8_i2c_init();
1144
1145 /* Register routine to dispatch and read sensors */
1146 sensor_init();
1147
1148 /*
1149 * Initialize the opal messaging before platform.init as we are
1150 * getting request to queue occ load opal message when host services
1151 * got load occ request from FSP
1152 */
1153 opal_init_msg();
1154
1155 /*
1156 * We have initialized the basic HW, we can now call into the
1157 * platform to perform subsequent inits, such as establishing
1158 * communication with the FSP or starting IPMI.
1159 */
1160 if (platform.init)
1161 platform.init();
1162
1163 /* Read in NVRAM and set it up */
1164 nvram_init();
1165
1166 /* Set the console level */
1167 console_log_level();
1168
1169 /* Secure/Trusted Boot init. We look for /ibm,secureboot in DT */
1170 secureboot_init();
1171 trustedboot_init();
1172
1173 /*
1174 * BMC platforms load version information from flash after
1175 * secure/trustedboot init.
1176 */
1177 if (platform.bmc)
1178 flash_fw_version_preload();
1179
1180 /* preload the IMC catalog dtb */
1181 imc_catalog_preload();
1182
1183 /* Install the OPAL Console handlers */
1184 init_opal_console();
1185
1186 /*
1187 * Some platforms set a flag to wait for SBE validation to be
1188 * performed by the BMC. If this occurs it leaves the SBE in a
1189 * bad state and the system will reboot at this point.
1190 */
1191 if (platform.seeprom_update)
1192 platform.seeprom_update();
1193
1194 /* Init SLW related stuff, including fastsleep */
1195 slw_init();
1196
1197 op_display(OP_LOG, OP_MOD_INIT, 0x0002);
1198
1199 /*
1200 * On some POWER9 BMC systems, we need to initialise the OCC
1201 * before the NPU to facilitate NVLink/OpenCAPI presence
1202 * detection, so we set it up as early as possible. On FSP
1203 * systems, Hostboot starts booting the OCC later, so we delay
1204 * OCC initialisation as late as possible to give it the
1205 * maximum time to boot up.
1206 */
1207 if (platform.bmc)
1208 occ_pstates_init();
1209
1210 pci_nvram_init();
1211
1212 preload_capp_ucode();
1213 start_preload_kernel();
1214
1215 /* Catalog decompression routine */
1216 imc_decompress_catalog();
1217
1218 /* Virtual Accelerator Switchboard */
1219 vas_init();
1220
1221 /* NX init */
1222 nx_init();
1223
1224 /* Init In-Memory Collection related stuff (load the IMC dtb into memory) */
1225 imc_init();
1226
1227 /* Probe PHB3 on P8 */
1228 probe_phb3();
1229
1230 /* Probe PHB4 on P9 */
1231 probe_phb4();
1232
1233 /* Probe NPUs */
1234 probe_npu();
1235 probe_npu2();
1236
1237 /* Initialize PCI */
1238 pci_init_slots();
1239
1240 /* Add OPAL timer related properties */
1241 late_init_timers();
1242
1243 /* Setup ibm,firmware-versions if able */
1244 if (platform.bmc) {
1245 flash_dt_add_fw_version();
1246 ipmi_dt_add_bmc_info();
1247 }
1248
1249 ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT);
1250
1251 /*
1252 * These last few things must be done as late as possible
1253 * because they rely on various other things having been setup,
1254 * for example, add_opal_interrupts() will add all the interrupt
1255 * sources that are going to the firmware. We can't add a new one
1256 * after that call. Similarly, the mem_region calls will construct
1257 * the reserve maps in the DT so we shouldn't affect the memory
1258 * regions after that
1259 */
1260
1261 /* Create the LPC bus interrupt-map on P9 */
1262 lpc_finalize_interrupts();
1263
1264 /* Add the list of interrupts going to OPAL */
1265 add_opal_interrupts();
1266
1267 /* Now release parts of memory nodes we haven't used ourselves... */
1268 mem_region_release_unused();
1269
1270 /* ... and add remaining reservations to the DT */
1271 mem_region_add_dt_reserved();
1272
1273 /*
1274 * Update /ibm,secureboot/ibm,cvc/memory-region to point to
1275 * /reserved-memory/secure-crypt-algo-code instead of
1276 * /ibm,hostboot/reserved-memory/secure-crypt-algo-code.
1277 */
1278 cvc_update_reserved_memory_phandle();
1279
1280 prd_register_reserved_memory();
1281
1282 checksum_romem();
1283
1284 load_and_boot_kernel(false);
1285 }
1286
__secondary_cpu_entry(void)1287 void __noreturn __secondary_cpu_entry(void)
1288 {
1289 struct cpu_thread *cpu = this_cpu();
1290
1291 /* Secondary CPU called in */
1292 cpu_callin(cpu);
1293
1294 enable_machine_check();
1295 mtmsrd(MSR_RI, 1);
1296
1297 /* Some XIVE setup */
1298 xive_cpu_callin(cpu);
1299
1300 /* Wait for work to do */
1301 while(true) {
1302 if (cpu_check_jobs(cpu))
1303 cpu_process_jobs();
1304 else
1305 cpu_idle_job();
1306 }
1307 }
1308
1309 /* Called from head.S, thus no prototype. */
1310 void secondary_cpu_entry(void);
1311
secondary_cpu_entry(void)1312 void __noreturn __nomcount secondary_cpu_entry(void)
1313 {
1314 struct cpu_thread *cpu = this_cpu();
1315
1316 per_thread_sanity_checks();
1317
1318 prlog(PR_DEBUG, "INIT: CPU PIR 0x%04x called in\n", cpu->pir);
1319
1320 __secondary_cpu_entry();
1321 }
1322