1 /* Copyright 2013-2017 IBM Corp.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 * implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 /*
18 * TODO: Index array by PIR to be able to catch them easily
19 * from assembly such as machine checks etc...
20 */
21 #include <skiboot.h>
22 #include <cpu.h>
23 #include <device.h>
24 #include <mem_region.h>
25 #include <opal.h>
26 #include <stack.h>
27 #include <trace.h>
28 #include <affinity.h>
29 #include <chip.h>
30 #include <timebase.h>
31 #include <interrupts.h>
32 #include <ccan/str/str.h>
33 #include <ccan/container_of/container_of.h>
34 #include <xscom.h>
35
36 /* The cpu_threads array is static and indexed by PIR in
37 * order to speed up lookup from asm entry points
38 */
39 struct cpu_stack {
40 union {
41 uint8_t stack[STACK_SIZE];
42 struct cpu_thread cpu;
43 };
44 } __align(STACK_SIZE);
45
46 static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
47 unsigned int cpu_thread_count;
48 unsigned int cpu_max_pir;
49 struct cpu_thread *boot_cpu;
50 static struct lock reinit_lock = LOCK_UNLOCKED;
51 static bool hile_supported;
52 static bool radix_supported;
53 static unsigned long hid0_hile;
54 static unsigned long hid0_attn;
55 static bool sreset_enabled;
56 static bool ipi_enabled;
57 static bool pm_enabled;
58 static bool current_hile_mode;
59 static bool current_radix_mode;
60 static bool tm_suspend_enabled;
61
62 unsigned long cpu_secondary_start __force_data = 0;
63
64 struct cpu_job {
65 struct list_node link;
66 void (*func)(void *data);
67 void *data;
68 const char *name;
69 bool complete;
70 bool no_return;
71 };
72
73 /* attribute const as cpu_stacks is constant. */
cpu_stack_bottom(unsigned int pir)74 unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
75 {
76 return ((unsigned long)&cpu_stacks[pir]) +
77 sizeof(struct cpu_thread) + STACK_SAFETY_GAP;
78 }
79
cpu_stack_top(unsigned int pir)80 unsigned long __attrconst cpu_stack_top(unsigned int pir)
81 {
82 /* This is the top of the normal stack. */
83 return ((unsigned long)&cpu_stacks[pir]) +
84 NORMAL_STACK_SIZE - STACK_TOP_GAP;
85 }
86
cpu_emergency_stack_top(unsigned int pir)87 unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir)
88 {
89 /* This is the top of the emergency stack, above the normal stack. */
90 return ((unsigned long)&cpu_stacks[pir]) +
91 NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP;
92 }
93
cpu_relax(void)94 void __nomcount cpu_relax(void)
95 {
96 /* Relax a bit to give sibling threads some breathing space */
97 smt_lowest();
98 asm volatile("nop; nop; nop; nop;\n"
99 "nop; nop; nop; nop;\n"
100 "nop; nop; nop; nop;\n"
101 "nop; nop; nop; nop;\n");
102 smt_medium();
103 barrier();
104 }
105
cpu_wake(struct cpu_thread * cpu)106 static void cpu_wake(struct cpu_thread *cpu)
107 {
108 /* Is it idle ? If not, no need to wake */
109 sync();
110 if (!cpu->in_idle)
111 return;
112
113 if (proc_gen == proc_gen_p8) {
114 /* Poke IPI */
115 icp_kick_cpu(cpu);
116 } else if (proc_gen == proc_gen_p9) {
117 p9_dbell_send(cpu->pir);
118 }
119 }
120
121 /*
122 * If chip_id is >= 0, schedule the job on that node.
123 * Otherwise schedule the job anywhere.
124 */
cpu_find_job_target(int32_t chip_id)125 static struct cpu_thread *cpu_find_job_target(int32_t chip_id)
126 {
127 struct cpu_thread *cpu, *best, *me = this_cpu();
128 uint32_t best_count;
129
130 /* We try to find a target to run a job. We need to avoid
131 * a CPU that has a "no return" job on its queue as it might
132 * never be able to process anything.
133 *
134 * Additionally we don't check the list but the job count
135 * on the target CPUs, since that is decremented *after*
136 * a job has been completed.
137 */
138
139
140 /* First we scan all available primary threads
141 */
142 for_each_available_cpu(cpu) {
143 if (chip_id >= 0 && cpu->chip_id != chip_id)
144 continue;
145 if (cpu == me || !cpu_is_thread0(cpu) || cpu->job_has_no_return)
146 continue;
147 if (cpu->job_count)
148 continue;
149 lock(&cpu->job_lock);
150 if (!cpu->job_count)
151 return cpu;
152 unlock(&cpu->job_lock);
153 }
154
155 /* Now try again with secondary threads included and keep
156 * track of the one with the less jobs queued up. This is
157 * done in a racy way, but it's just an optimization in case
158 * we are overcommitted on jobs. Could could also just pick
159 * a random one...
160 */
161 best = NULL;
162 best_count = -1u;
163 for_each_available_cpu(cpu) {
164 if (chip_id >= 0 && cpu->chip_id != chip_id)
165 continue;
166 if (cpu == me || cpu->job_has_no_return)
167 continue;
168 if (!best || cpu->job_count < best_count) {
169 best = cpu;
170 best_count = cpu->job_count;
171 }
172 if (cpu->job_count)
173 continue;
174 lock(&cpu->job_lock);
175 if (!cpu->job_count)
176 return cpu;
177 unlock(&cpu->job_lock);
178 }
179
180 /* We haven't found anybody, do we have a bestie ? */
181 if (best) {
182 lock(&best->job_lock);
183 return best;
184 }
185
186 /* Go away */
187 return NULL;
188 }
189
190 /* job_lock is held, returns with it released */
queue_job_on_cpu(struct cpu_thread * cpu,struct cpu_job * job)191 static void queue_job_on_cpu(struct cpu_thread *cpu, struct cpu_job *job)
192 {
193 /* That's bad, the job will never run */
194 if (cpu->job_has_no_return) {
195 prlog(PR_WARNING, "WARNING ! Job %s scheduled on CPU 0x%x"
196 " which has a no-return job on its queue !\n",
197 job->name, cpu->pir);
198 backtrace();
199 }
200 list_add_tail(&cpu->job_queue, &job->link);
201 if (job->no_return)
202 cpu->job_has_no_return = true;
203 else
204 cpu->job_count++;
205 if (pm_enabled)
206 cpu_wake(cpu);
207 unlock(&cpu->job_lock);
208 }
209
__cpu_queue_job(struct cpu_thread * cpu,const char * name,void (* func)(void * data),void * data,bool no_return)210 struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
211 const char *name,
212 void (*func)(void *data), void *data,
213 bool no_return)
214 {
215 struct cpu_job *job;
216
217 #ifdef DEBUG_SERIALIZE_CPU_JOBS
218 if (cpu == NULL)
219 cpu = this_cpu();
220 #endif
221
222 if (cpu && !cpu_is_available(cpu)) {
223 prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
224 cpu->pir);
225 return NULL;
226 }
227
228 job = zalloc(sizeof(struct cpu_job));
229 if (!job)
230 return NULL;
231 job->func = func;
232 job->data = data;
233 job->name = name;
234 job->complete = false;
235 job->no_return = no_return;
236
237 /* Pick a candidate. Returns with target queue locked */
238 if (cpu == NULL)
239 cpu = cpu_find_job_target(-1);
240 else if (cpu != this_cpu())
241 lock(&cpu->job_lock);
242 else
243 cpu = NULL;
244
245 /* Can't be scheduled, run it now */
246 if (cpu == NULL) {
247 if (!this_cpu()->job_has_no_return)
248 this_cpu()->job_has_no_return = no_return;
249 func(data);
250 job->complete = true;
251 return job;
252 }
253
254 queue_job_on_cpu(cpu, job);
255
256 return job;
257 }
258
cpu_queue_job_on_node(uint32_t chip_id,const char * name,void (* func)(void * data),void * data)259 struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
260 const char *name,
261 void (*func)(void *data), void *data)
262 {
263 struct cpu_thread *cpu;
264 struct cpu_job *job;
265
266 job = zalloc(sizeof(struct cpu_job));
267 if (!job)
268 return NULL;
269 job->func = func;
270 job->data = data;
271 job->name = name;
272 job->complete = false;
273 job->no_return = false;
274
275 /* Pick a candidate. Returns with target queue locked */
276 cpu = cpu_find_job_target(chip_id);
277
278 /* Can't be scheduled... */
279 if (cpu == NULL) {
280 cpu = this_cpu();
281 if (cpu->chip_id == chip_id) {
282 /* Run it now if we're the right node. */
283 func(data);
284 job->complete = true;
285 return job;
286 }
287 /* Otherwise fail. */
288 free(job);
289 return NULL;
290 }
291
292 queue_job_on_cpu(cpu, job);
293
294 return job;
295 }
296
cpu_poll_job(struct cpu_job * job)297 bool cpu_poll_job(struct cpu_job *job)
298 {
299 lwsync();
300 return job->complete;
301 }
302
cpu_wait_job(struct cpu_job * job,bool free_it)303 void cpu_wait_job(struct cpu_job *job, bool free_it)
304 {
305 unsigned long time_waited = 0;
306
307 if (!job)
308 return;
309
310 while (!job->complete) {
311 /* This will call OPAL pollers for us */
312 time_wait_ms(10);
313 time_waited += 10;
314 lwsync();
315 if ((time_waited % 30000) == 0) {
316 prlog(PR_INFO, "cpu_wait_job(%s) for %lums\n",
317 job->name, time_waited);
318 backtrace();
319 }
320 }
321 lwsync();
322
323 if (time_waited > 1000)
324 prlog(PR_DEBUG, "cpu_wait_job(%s) for %lums\n",
325 job->name, time_waited);
326
327 if (free_it)
328 free(job);
329 }
330
cpu_check_jobs(struct cpu_thread * cpu)331 bool cpu_check_jobs(struct cpu_thread *cpu)
332 {
333 return !list_empty_nocheck(&cpu->job_queue);
334 }
335
cpu_process_jobs(void)336 void cpu_process_jobs(void)
337 {
338 struct cpu_thread *cpu = this_cpu();
339 struct cpu_job *job = NULL;
340 void (*func)(void *);
341 void *data;
342
343 sync();
344 if (!cpu_check_jobs(cpu))
345 return;
346
347 lock(&cpu->job_lock);
348 while (true) {
349 bool no_return;
350
351 job = list_pop(&cpu->job_queue, struct cpu_job, link);
352 if (!job)
353 break;
354
355 func = job->func;
356 data = job->data;
357 no_return = job->no_return;
358 unlock(&cpu->job_lock);
359 prlog(PR_TRACE, "running job %s on %x\n", job->name, cpu->pir);
360 if (no_return)
361 free(job);
362 func(data);
363 if (!list_empty(&cpu->locks_held)) {
364 if (no_return)
365 prlog(PR_ERR, "OPAL no-return job returned with"
366 "locks held!\n");
367 else
368 prlog(PR_ERR, "OPAL job %s returning with locks held\n",
369 job->name);
370 drop_my_locks(true);
371 }
372 lock(&cpu->job_lock);
373 if (!no_return) {
374 cpu->job_count--;
375 lwsync();
376 job->complete = true;
377 }
378 }
379 unlock(&cpu->job_lock);
380 }
381
382 enum cpu_wake_cause {
383 cpu_wake_on_job,
384 cpu_wake_on_dec,
385 };
386
cpu_idle_p8(enum cpu_wake_cause wake_on)387 static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
388 {
389 uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE;
390 struct cpu_thread *cpu = this_cpu();
391 unsigned int vec = 0;
392
393 if (!pm_enabled) {
394 prlog_once(PR_DEBUG, "cpu_idle_p8 called pm disabled\n");
395 return vec;
396 }
397
398 /* Clean up ICP, be ready for IPIs */
399 icp_prep_for_pm();
400
401 /* Synchronize with wakers */
402 if (wake_on == cpu_wake_on_job) {
403 /* Mark ourselves in idle so other CPUs know to send an IPI */
404 cpu->in_idle = true;
405 sync();
406
407 /* Check for jobs again */
408 if (cpu_check_jobs(cpu) || !pm_enabled)
409 goto skip_sleep;
410
411 /* Setup wakup cause in LPCR: EE (for IPI) */
412 lpcr |= SPR_LPCR_P8_PECE2;
413 mtspr(SPR_LPCR, lpcr);
414
415 } else {
416 /* Mark outselves sleeping so cpu_set_pm_enable knows to
417 * send an IPI
418 */
419 cpu->in_sleep = true;
420 sync();
421
422 /* Check if PM got disabled */
423 if (!pm_enabled)
424 goto skip_sleep;
425
426 /* EE and DEC */
427 lpcr |= SPR_LPCR_P8_PECE2 | SPR_LPCR_P8_PECE3;
428 mtspr(SPR_LPCR, lpcr);
429 }
430 isync();
431
432 /* Enter nap */
433 vec = enter_p8_pm_state(false);
434
435 skip_sleep:
436 /* Restore */
437 sync();
438 cpu->in_idle = false;
439 cpu->in_sleep = false;
440 reset_cpu_icp();
441
442 return vec;
443 }
444
cpu_idle_p9(enum cpu_wake_cause wake_on)445 static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
446 {
447 uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE;
448 uint64_t psscr;
449 struct cpu_thread *cpu = this_cpu();
450 unsigned int vec = 0;
451
452 if (!pm_enabled) {
453 prlog_once(PR_DEBUG, "cpu_idle_p9 called pm disabled\n");
454 return vec;
455 }
456
457 /* Synchronize with wakers */
458 if (wake_on == cpu_wake_on_job) {
459 /* Mark ourselves in idle so other CPUs know to send an IPI */
460 cpu->in_idle = true;
461 sync();
462
463 /* Check for jobs again */
464 if (cpu_check_jobs(cpu) || !pm_enabled)
465 goto skip_sleep;
466
467 /* HV DBELL for IPI */
468 lpcr |= SPR_LPCR_P9_PECEL1;
469 } else {
470 /* Mark outselves sleeping so cpu_set_pm_enable knows to
471 * send an IPI
472 */
473 cpu->in_sleep = true;
474 sync();
475
476 /* Check if PM got disabled */
477 if (!pm_enabled)
478 goto skip_sleep;
479
480 /* HV DBELL and DEC */
481 lpcr |= SPR_LPCR_P9_PECEL1 | SPR_LPCR_P9_PECEL3;
482 }
483
484 mtspr(SPR_LPCR, lpcr);
485 isync();
486
487 if (sreset_enabled) {
488 /* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */
489 /* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
490 psscr = PPC_BIT(42) | PPC_BIT(43) |
491 PPC_BITMASK(54, 55) | PPC_BIT(63);
492 vec = enter_p9_pm_state(psscr);
493 } else {
494 /* stop with EC=0 (resumes) which does not require sreset. */
495 /* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
496 psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
497 enter_p9_pm_lite_state(psscr);
498 }
499
500 /* Clear doorbell */
501 p9_dbell_receive();
502
503 skip_sleep:
504 /* Restore */
505 sync();
506 cpu->in_idle = false;
507 cpu->in_sleep = false;
508
509 return vec;
510 }
511
cpu_idle_pm(enum cpu_wake_cause wake_on)512 static void cpu_idle_pm(enum cpu_wake_cause wake_on)
513 {
514 unsigned int vec;
515
516 switch(proc_gen) {
517 case proc_gen_p8:
518 vec = cpu_idle_p8(wake_on);
519 break;
520 case proc_gen_p9:
521 vec = cpu_idle_p9(wake_on);
522 break;
523 default:
524 vec = 0;
525 prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n");
526 break;
527 }
528
529 if (vec == 0x100) {
530 unsigned long srr1 = mfspr(SPR_SRR1);
531
532 switch (srr1 & SPR_SRR1_PM_WAKE_MASK) {
533 case SPR_SRR1_PM_WAKE_SRESET:
534 exception_entry_pm_sreset();
535 break;
536 default:
537 break;
538 }
539 mtmsrd(MSR_RI, 1);
540
541 } else if (vec == 0x200) {
542 exception_entry_pm_mce();
543 enable_machine_check();
544 mtmsrd(MSR_RI, 1);
545 }
546 }
547
cpu_idle_job(void)548 void cpu_idle_job(void)
549 {
550 if (pm_enabled) {
551 cpu_idle_pm(cpu_wake_on_job);
552 } else {
553 struct cpu_thread *cpu = this_cpu();
554
555 smt_lowest();
556 /* Check for jobs again */
557 while (!cpu_check_jobs(cpu)) {
558 if (pm_enabled)
559 break;
560 cpu_relax();
561 barrier();
562 }
563 smt_medium();
564 }
565 }
566
cpu_idle_delay(unsigned long delay)567 void cpu_idle_delay(unsigned long delay)
568 {
569 unsigned long now = mftb();
570 unsigned long end = now + delay;
571 unsigned long min_pm = usecs_to_tb(10);
572
573 if (pm_enabled && delay > min_pm) {
574 pm:
575 for (;;) {
576 if (delay >= 0x7fffffff)
577 delay = 0x7fffffff;
578 mtspr(SPR_DEC, delay);
579
580 cpu_idle_pm(cpu_wake_on_dec);
581
582 now = mftb();
583 if (tb_compare(now, end) == TB_AAFTERB)
584 break;
585 delay = end - now;
586 if (!(pm_enabled && delay > min_pm))
587 goto no_pm;
588 }
589 } else {
590 no_pm:
591 smt_lowest();
592 for (;;) {
593 now = mftb();
594 if (tb_compare(now, end) == TB_AAFTERB)
595 break;
596 delay = end - now;
597 if (pm_enabled && delay > min_pm) {
598 smt_medium();
599 goto pm;
600 }
601 }
602 smt_medium();
603 }
604 }
605
cpu_pm_disable(void)606 static void cpu_pm_disable(void)
607 {
608 struct cpu_thread *cpu;
609
610 pm_enabled = false;
611 sync();
612
613 if (proc_gen == proc_gen_p8) {
614 for_each_available_cpu(cpu) {
615 while (cpu->in_sleep || cpu->in_idle) {
616 icp_kick_cpu(cpu);
617 cpu_relax();
618 }
619 }
620 } else if (proc_gen == proc_gen_p9) {
621 for_each_available_cpu(cpu) {
622 if (cpu->in_sleep || cpu->in_idle)
623 p9_dbell_send(cpu->pir);
624 }
625
626 smt_lowest();
627 for_each_available_cpu(cpu) {
628 while (cpu->in_sleep || cpu->in_idle)
629 barrier();
630 }
631 smt_medium();
632 }
633 }
634
cpu_set_sreset_enable(bool enabled)635 void cpu_set_sreset_enable(bool enabled)
636 {
637 if (sreset_enabled == enabled)
638 return;
639
640 if (proc_gen == proc_gen_p8) {
641 /* Public P8 Mambo has broken NAP */
642 if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
643 return;
644
645 sreset_enabled = enabled;
646 sync();
647
648 if (!enabled) {
649 cpu_pm_disable();
650 } else {
651 if (ipi_enabled)
652 pm_enabled = true;
653 }
654
655 } else if (proc_gen == proc_gen_p9) {
656 sreset_enabled = enabled;
657 sync();
658 /*
659 * Kick everybody out of PM so they can adjust the PM
660 * mode they are using (EC=0/1).
661 */
662 cpu_pm_disable();
663 if (ipi_enabled)
664 pm_enabled = true;
665 }
666 }
667
cpu_set_ipi_enable(bool enabled)668 void cpu_set_ipi_enable(bool enabled)
669 {
670 if (ipi_enabled == enabled)
671 return;
672
673 if (proc_gen == proc_gen_p8) {
674 ipi_enabled = enabled;
675 sync();
676 if (!enabled) {
677 cpu_pm_disable();
678 } else {
679 if (sreset_enabled)
680 pm_enabled = true;
681 }
682
683 } else if (proc_gen == proc_gen_p9) {
684 ipi_enabled = enabled;
685 sync();
686 if (!enabled)
687 cpu_pm_disable();
688 else
689 pm_enabled = true;
690 }
691 }
692
cpu_process_local_jobs(void)693 void cpu_process_local_jobs(void)
694 {
695 struct cpu_thread *cpu = first_available_cpu();
696
697 while (cpu) {
698 if (cpu != this_cpu())
699 return;
700
701 cpu = next_available_cpu(cpu);
702 }
703
704 if (!cpu)
705 cpu = first_available_cpu();
706
707 /* No CPU to run on, just run synchro */
708 if (cpu == this_cpu()) {
709 prlog_once(PR_DEBUG, "Processing jobs synchronously\n");
710 cpu_process_jobs();
711 opal_run_pollers();
712 }
713 }
714
715
get_cpu_node(u32 pir)716 struct dt_node *get_cpu_node(u32 pir)
717 {
718 struct cpu_thread *t = find_cpu_by_pir(pir);
719
720 return t ? t->node : NULL;
721 }
722
723 /* This only covers primary, active cpus */
find_cpu_by_chip_id(u32 chip_id)724 struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
725 {
726 struct cpu_thread *t;
727
728 for_each_available_cpu(t) {
729 if (t->is_secondary)
730 continue;
731 if (t->chip_id == chip_id)
732 return t;
733 }
734 return NULL;
735 }
736
find_cpu_by_node(struct dt_node * cpu)737 struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
738 {
739 struct cpu_thread *t;
740
741 for_each_available_cpu(t) {
742 if (t->node == cpu)
743 return t;
744 }
745 return NULL;
746 }
747
find_cpu_by_pir(u32 pir)748 struct cpu_thread *find_cpu_by_pir(u32 pir)
749 {
750 if (pir > cpu_max_pir)
751 return NULL;
752 return &cpu_stacks[pir].cpu;
753 }
754
find_cpu_by_pir_nomcount(u32 pir)755 struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir)
756 {
757 if (pir > cpu_max_pir)
758 return NULL;
759 return &cpu_stacks[pir].cpu;
760 }
761
find_cpu_by_server(u32 server_no)762 struct cpu_thread *find_cpu_by_server(u32 server_no)
763 {
764 struct cpu_thread *t;
765
766 for_each_cpu(t) {
767 if (t->server_no == server_no)
768 return t;
769 }
770 return NULL;
771 }
772
next_cpu(struct cpu_thread * cpu)773 struct cpu_thread *next_cpu(struct cpu_thread *cpu)
774 {
775 struct cpu_stack *s;
776 unsigned int index = 0;
777
778 if (cpu != NULL) {
779 s = container_of(cpu, struct cpu_stack, cpu);
780 index = s - cpu_stacks + 1;
781 }
782 for (; index <= cpu_max_pir; index++) {
783 cpu = &cpu_stacks[index].cpu;
784 if (cpu->state != cpu_state_no_cpu)
785 return cpu;
786 }
787 return NULL;
788 }
789
first_cpu(void)790 struct cpu_thread *first_cpu(void)
791 {
792 return next_cpu(NULL);
793 }
794
next_available_cpu(struct cpu_thread * cpu)795 struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
796 {
797 do {
798 cpu = next_cpu(cpu);
799 } while(cpu && !cpu_is_available(cpu));
800
801 return cpu;
802 }
803
first_available_cpu(void)804 struct cpu_thread *first_available_cpu(void)
805 {
806 return next_available_cpu(NULL);
807 }
808
next_present_cpu(struct cpu_thread * cpu)809 struct cpu_thread *next_present_cpu(struct cpu_thread *cpu)
810 {
811 do {
812 cpu = next_cpu(cpu);
813 } while(cpu && !cpu_is_present(cpu));
814
815 return cpu;
816 }
817
first_present_cpu(void)818 struct cpu_thread *first_present_cpu(void)
819 {
820 return next_present_cpu(NULL);
821 }
822
next_ungarded_cpu(struct cpu_thread * cpu)823 struct cpu_thread *next_ungarded_cpu(struct cpu_thread *cpu)
824 {
825 do {
826 cpu = next_cpu(cpu);
827 } while(cpu && cpu->state == cpu_state_unavailable);
828
829 return cpu;
830 }
831
first_ungarded_cpu(void)832 struct cpu_thread *first_ungarded_cpu(void)
833 {
834 return next_ungarded_cpu(NULL);
835 }
836
next_ungarded_primary(struct cpu_thread * cpu)837 struct cpu_thread *next_ungarded_primary(struct cpu_thread *cpu)
838 {
839 do {
840 cpu = next_cpu(cpu);
841 } while(cpu && (cpu->state == cpu_state_unavailable || cpu->primary != cpu));
842
843 return cpu;
844 }
845
first_ungarded_primary(void)846 struct cpu_thread *first_ungarded_primary(void)
847 {
848 return next_ungarded_primary(NULL);
849 }
850
get_available_nr_cores_in_chip(u32 chip_id)851 u8 get_available_nr_cores_in_chip(u32 chip_id)
852 {
853 struct cpu_thread *core;
854 u8 nr_cores = 0;
855
856 for_each_available_core_in_chip(core, chip_id)
857 nr_cores++;
858
859 return nr_cores;
860 }
861
next_available_core_in_chip(struct cpu_thread * core,u32 chip_id)862 struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
863 u32 chip_id)
864 {
865 do {
866 core = next_cpu(core);
867 } while(core && (!cpu_is_available(core) ||
868 core->chip_id != chip_id ||
869 core->is_secondary));
870 return core;
871 }
872
first_available_core_in_chip(u32 chip_id)873 struct cpu_thread *first_available_core_in_chip(u32 chip_id)
874 {
875 return next_available_core_in_chip(NULL, chip_id);
876 }
877
cpu_get_core_index(struct cpu_thread * cpu)878 uint32_t cpu_get_core_index(struct cpu_thread *cpu)
879 {
880 return pir_to_core_id(cpu->pir);
881 }
882
cpu_remove_node(const struct cpu_thread * t)883 void cpu_remove_node(const struct cpu_thread *t)
884 {
885 struct dt_node *i;
886
887 /* Find this cpu node */
888 dt_for_each_node(dt_root, i) {
889 const struct dt_property *p;
890
891 if (!dt_has_node_property(i, "device_type", "cpu"))
892 continue;
893 p = dt_find_property(i, "ibm,pir");
894 if (!p)
895 continue;
896 if (dt_property_get_cell(p, 0) == t->pir) {
897 dt_free(i);
898 return;
899 }
900 }
901 prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
902 abort();
903 }
904
cpu_disable_all_threads(struct cpu_thread * cpu)905 void cpu_disable_all_threads(struct cpu_thread *cpu)
906 {
907 unsigned int i;
908 struct dt_property *p;
909
910 for (i = 0; i <= cpu_max_pir; i++) {
911 struct cpu_thread *t = &cpu_stacks[i].cpu;
912
913 if (t->primary == cpu->primary)
914 t->state = cpu_state_disabled;
915
916 }
917
918 /* Mark this core as bad so that Linux kernel don't use this CPU. */
919 prlog(PR_DEBUG, "CPU: Mark CPU bad (PIR 0x%04x)...\n", cpu->pir);
920 p = __dt_find_property(cpu->node, "status");
921 if (p)
922 dt_del_property(cpu->node, p);
923
924 dt_add_property_string(cpu->node, "status", "bad");
925
926 /* XXX Do something to actually stop the core */
927 }
928
init_cpu_thread(struct cpu_thread * t,enum cpu_thread_state state,unsigned int pir)929 static void init_cpu_thread(struct cpu_thread *t,
930 enum cpu_thread_state state,
931 unsigned int pir)
932 {
933 /* offset within cpu_thread to prevent stack_guard clobber */
934 const size_t guard_skip = container_off_var(t, stack_guard) +
935 sizeof(t->stack_guard);
936
937 memset(((void *)t) + guard_skip, 0, sizeof(struct cpu_thread) - guard_skip);
938 init_lock(&t->dctl_lock);
939 init_lock(&t->job_lock);
940 list_head_init(&t->job_queue);
941 list_head_init(&t->locks_held);
942 t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
943 t->state = state;
944 t->pir = pir;
945 #ifdef STACK_CHECK_ENABLED
946 t->stack_bot_mark = LONG_MAX;
947 #endif
948 assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
949 }
950
enable_attn(void)951 static void enable_attn(void)
952 {
953 unsigned long hid0;
954
955 hid0 = mfspr(SPR_HID0);
956 hid0 |= hid0_attn;
957 set_hid0(hid0);
958 }
959
disable_attn(void)960 static void disable_attn(void)
961 {
962 unsigned long hid0;
963
964 hid0 = mfspr(SPR_HID0);
965 hid0 &= ~hid0_attn;
966 set_hid0(hid0);
967 }
968
969 extern void __trigger_attn(void);
trigger_attn(void)970 void trigger_attn(void)
971 {
972 enable_attn();
973 __trigger_attn();
974 }
975
init_hid(void)976 static void init_hid(void)
977 {
978 /* attn is enabled even when HV=0, so make sure it's off */
979 disable_attn();
980 }
981
pre_init_boot_cpu(void)982 void __nomcount pre_init_boot_cpu(void)
983 {
984 struct cpu_thread *cpu = this_cpu();
985
986 /* We skip the stack guard ! */
987 memset(((void *)cpu) + 8, 0, sizeof(struct cpu_thread) - 8);
988 }
989
init_boot_cpu(void)990 void init_boot_cpu(void)
991 {
992 unsigned int pir, pvr;
993
994 pir = mfspr(SPR_PIR);
995 pvr = mfspr(SPR_PVR);
996
997 /* Get CPU family and other flags based on PVR */
998 switch(PVR_TYPE(pvr)) {
999 case PVR_TYPE_P8E:
1000 case PVR_TYPE_P8:
1001 proc_gen = proc_gen_p8;
1002 hile_supported = PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2;
1003 hid0_hile = SPR_HID0_POWER8_HILE;
1004 hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
1005 break;
1006 case PVR_TYPE_P8NVL:
1007 proc_gen = proc_gen_p8;
1008 hile_supported = true;
1009 hid0_hile = SPR_HID0_POWER8_HILE;
1010 hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
1011 break;
1012 case PVR_TYPE_P9:
1013 case PVR_TYPE_P9P:
1014 proc_gen = proc_gen_p9;
1015 hile_supported = true;
1016 radix_supported = true;
1017 hid0_hile = SPR_HID0_POWER9_HILE;
1018 hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN;
1019 break;
1020 default:
1021 proc_gen = proc_gen_unknown;
1022 }
1023
1024 /* Get a CPU thread count based on family */
1025 switch(proc_gen) {
1026 case proc_gen_p8:
1027 cpu_thread_count = 8;
1028 prlog(PR_INFO, "CPU: P8 generation processor"
1029 " (max %d threads/core)\n", cpu_thread_count);
1030 break;
1031 case proc_gen_p9:
1032 cpu_thread_count = 4;
1033 prlog(PR_INFO, "CPU: P9 generation processor"
1034 " (max %d threads/core)\n", cpu_thread_count);
1035 break;
1036 default:
1037 prerror("CPU: Unknown PVR, assuming 1 thread\n");
1038 cpu_thread_count = 1;
1039 cpu_max_pir = mfspr(SPR_PIR);
1040 }
1041
1042 if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
1043 prerror("CPU: POWER9N DD1 is not supported\n");
1044 abort();
1045 }
1046
1047 prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
1048 pir, pvr);
1049
1050 /*
1051 * Adjust top of RAM to include the boot CPU stack. If we have less
1052 * RAM than this, it's not possible to boot.
1053 */
1054 cpu_max_pir = pir;
1055 top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
1056
1057 /* Setup boot CPU state */
1058 boot_cpu = &cpu_stacks[pir].cpu;
1059 init_cpu_thread(boot_cpu, cpu_state_active, pir);
1060 init_boot_tracebuf(boot_cpu);
1061 assert(this_cpu() == boot_cpu);
1062 init_hid();
1063 }
1064
enable_large_dec(bool on)1065 static void enable_large_dec(bool on)
1066 {
1067 u64 lpcr = mfspr(SPR_LPCR);
1068
1069 if (on)
1070 lpcr |= SPR_LPCR_P9_LD;
1071 else
1072 lpcr &= ~SPR_LPCR_P9_LD;
1073
1074 mtspr(SPR_LPCR, lpcr);
1075 isync();
1076 }
1077
1078 #define HIGH_BIT (1ull << 63)
1079
find_dec_bits(void)1080 static int find_dec_bits(void)
1081 {
1082 int bits = 65; /* we always decrement once */
1083 u64 mask = ~0ull;
1084
1085 if (proc_gen < proc_gen_p9)
1086 return 32;
1087
1088 /* The ISA doesn't specify the width of the decrementer register so we
1089 * need to discover it. When in large mode (LPCR.LD = 1) reads from the
1090 * DEC SPR are sign extended to 64 bits and writes are truncated to the
1091 * physical register width. We can use this behaviour to detect the
1092 * width by starting from an all 1s value and left shifting until we
1093 * read a value from the DEC with it's high bit cleared.
1094 */
1095
1096 enable_large_dec(true);
1097
1098 do {
1099 bits--;
1100 mask = mask >> 1;
1101 mtspr(SPR_DEC, mask);
1102 } while (mfspr(SPR_DEC) & HIGH_BIT);
1103
1104 enable_large_dec(false);
1105
1106 prlog(PR_DEBUG, "CPU: decrementer bits %d\n", bits);
1107 return bits;
1108 }
1109
init_tm_suspend_mode_property(void)1110 static void init_tm_suspend_mode_property(void)
1111 {
1112 struct dt_node *node;
1113
1114 /* If we don't find anything, assume TM suspend is enabled */
1115 tm_suspend_enabled = true;
1116
1117 node = dt_find_by_path(dt_root, "/ibm,opal/fw-features/tm-suspend-mode");
1118 if (!node)
1119 return;
1120
1121 if (dt_find_property(node, "disabled"))
1122 tm_suspend_enabled = false;
1123 }
1124
init_cpu_max_pir(void)1125 void init_cpu_max_pir(void)
1126 {
1127 struct dt_node *cpus, *cpu;
1128
1129 cpus = dt_find_by_path(dt_root, "/cpus");
1130 assert(cpus);
1131
1132 /* Iterate all CPUs in the device-tree */
1133 dt_for_each_child(cpus, cpu) {
1134 unsigned int pir, server_no;
1135
1136 /* Skip cache nodes */
1137 if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
1138 continue;
1139
1140 server_no = dt_prop_get_u32(cpu, "reg");
1141
1142 /* If PIR property is absent, assume it's the same as the
1143 * server number
1144 */
1145 pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
1146
1147 if (cpu_max_pir < pir + cpu_thread_count - 1)
1148 cpu_max_pir = pir + cpu_thread_count - 1;
1149 }
1150
1151 prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
1152 }
1153
init_all_cpus(void)1154 void init_all_cpus(void)
1155 {
1156 struct dt_node *cpus, *cpu;
1157 unsigned int thread;
1158 int dec_bits = find_dec_bits();
1159
1160 cpus = dt_find_by_path(dt_root, "/cpus");
1161 assert(cpus);
1162
1163 init_tm_suspend_mode_property();
1164
1165 /* Iterate all CPUs in the device-tree */
1166 dt_for_each_child(cpus, cpu) {
1167 unsigned int pir, server_no, chip_id;
1168 enum cpu_thread_state state;
1169 const struct dt_property *p;
1170 struct cpu_thread *t, *pt;
1171
1172 /* Skip cache nodes */
1173 if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
1174 continue;
1175
1176 server_no = dt_prop_get_u32(cpu, "reg");
1177
1178 /* If PIR property is absent, assume it's the same as the
1179 * server number
1180 */
1181 pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
1182
1183 /* We should always have an ibm,chip-id property */
1184 chip_id = dt_get_chip_id(cpu);
1185
1186 /* Only use operational CPUs */
1187 if (!strcmp(dt_prop_get(cpu, "status"), "okay"))
1188 state = cpu_state_present;
1189 else
1190 state = cpu_state_unavailable;
1191
1192 prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
1193 " State=%d\n", pir, server_no, state);
1194
1195 /* Setup thread 0 */
1196 assert(pir <= cpu_max_pir);
1197 t = pt = &cpu_stacks[pir].cpu;
1198 if (t != boot_cpu) {
1199 init_cpu_thread(t, state, pir);
1200 /* Each cpu gets its own later in init_trace_buffers */
1201 t->trace = boot_cpu->trace;
1202 }
1203 t->server_no = server_no;
1204 t->primary = t;
1205 t->node = cpu;
1206 t->chip_id = chip_id;
1207 t->icp_regs = NULL; /* Will be set later */
1208 #ifdef DEBUG_LOCKS
1209 t->requested_lock = NULL;
1210 #endif
1211 t->core_hmi_state = 0;
1212 t->core_hmi_state_ptr = &t->core_hmi_state;
1213
1214 /* Add associativity properties */
1215 add_core_associativity(t);
1216
1217 /* Add the decrementer width property */
1218 dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
1219
1220 /* Iterate threads */
1221 p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
1222 if (!p)
1223 continue;
1224 for (thread = 1; thread < (p->len / 4); thread++) {
1225 prlog(PR_TRACE, "CPU: secondary thread %d found\n",
1226 thread);
1227 t = &cpu_stacks[pir + thread].cpu;
1228 init_cpu_thread(t, state, pir + thread);
1229 t->trace = boot_cpu->trace;
1230 t->server_no = ((const u32 *)p->prop)[thread];
1231 t->is_secondary = true;
1232 t->primary = pt;
1233 t->node = cpu;
1234 t->chip_id = chip_id;
1235 t->core_hmi_state_ptr = &pt->core_hmi_state;
1236 }
1237 prlog(PR_INFO, "CPU: %d secondary threads\n", thread);
1238 }
1239 }
1240
cpu_bringup(void)1241 void cpu_bringup(void)
1242 {
1243 struct cpu_thread *t;
1244 uint32_t count = 0;
1245
1246 prlog(PR_INFO, "CPU: Setting up secondary CPU state\n");
1247
1248 op_display(OP_LOG, OP_MOD_CPU, 0x0000);
1249
1250 /* Tell everybody to chime in ! */
1251 prlog(PR_INFO, "CPU: Calling in all processors...\n");
1252 cpu_secondary_start = 1;
1253 sync();
1254
1255 op_display(OP_LOG, OP_MOD_CPU, 0x0002);
1256
1257 for_each_cpu(t) {
1258 if (t->state != cpu_state_present &&
1259 t->state != cpu_state_active)
1260 continue;
1261
1262 /* Add a callin timeout ? If so, call cpu_remove_node(t). */
1263 while (t->state != cpu_state_active) {
1264 smt_lowest();
1265 sync();
1266 }
1267 smt_medium();
1268 count++;
1269 }
1270
1271 prlog(PR_NOTICE, "CPU: All %d processors called in...\n", count);
1272
1273 op_display(OP_LOG, OP_MOD_CPU, 0x0003);
1274 }
1275
cpu_callin(struct cpu_thread * cpu)1276 void cpu_callin(struct cpu_thread *cpu)
1277 {
1278 sync();
1279 cpu->state = cpu_state_active;
1280 sync();
1281
1282 cpu->job_has_no_return = false;
1283 if (cpu_is_thread0(cpu))
1284 init_hid();
1285 }
1286
opal_start_thread_job(void * data)1287 static void opal_start_thread_job(void *data)
1288 {
1289 cpu_give_self_os();
1290
1291 /* We do not return, so let's mark the job as
1292 * complete
1293 */
1294 start_kernel_secondary((uint64_t)data);
1295 }
1296
opal_start_cpu_thread(uint64_t server_no,uint64_t start_address)1297 static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
1298 {
1299 struct cpu_thread *cpu;
1300 struct cpu_job *job;
1301
1302 if (!opal_addr_valid((void *)start_address))
1303 return OPAL_PARAMETER;
1304
1305 cpu = find_cpu_by_server(server_no);
1306 if (!cpu) {
1307 prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
1308 return OPAL_PARAMETER;
1309 }
1310 prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
1311 server_no, cpu->pir, start_address);
1312
1313 lock(&reinit_lock);
1314 if (!cpu_is_available(cpu)) {
1315 unlock(&reinit_lock);
1316 prerror("OPAL: CPU not active in OPAL !\n");
1317 return OPAL_WRONG_STATE;
1318 }
1319 if (cpu->in_reinit) {
1320 unlock(&reinit_lock);
1321 prerror("OPAL: CPU being reinitialized !\n");
1322 return OPAL_WRONG_STATE;
1323 }
1324 job = __cpu_queue_job(cpu, "start_thread",
1325 opal_start_thread_job, (void *)start_address,
1326 true);
1327 unlock(&reinit_lock);
1328 if (!job) {
1329 prerror("OPAL: Failed to create CPU start job !\n");
1330 return OPAL_INTERNAL_ERROR;
1331 }
1332 return OPAL_SUCCESS;
1333 }
1334 opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);
1335
opal_query_cpu_status(uint64_t server_no,uint8_t * thread_status)1336 static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
1337 {
1338 struct cpu_thread *cpu;
1339
1340 if (!opal_addr_valid(thread_status))
1341 return OPAL_PARAMETER;
1342
1343 cpu = find_cpu_by_server(server_no);
1344 if (!cpu) {
1345 prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
1346 return OPAL_PARAMETER;
1347 }
1348 if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
1349 prerror("OPAL: CPU not active in OPAL nor OS !\n");
1350 return OPAL_PARAMETER;
1351 }
1352 switch(cpu->state) {
1353 case cpu_state_os:
1354 *thread_status = OPAL_THREAD_STARTED;
1355 break;
1356 case cpu_state_active:
1357 /* Active in skiboot -> inactive in OS */
1358 *thread_status = OPAL_THREAD_INACTIVE;
1359 break;
1360 default:
1361 *thread_status = OPAL_THREAD_UNAVAILABLE;
1362 }
1363
1364 return OPAL_SUCCESS;
1365 }
1366 opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);
1367
opal_return_cpu(void)1368 static int64_t opal_return_cpu(void)
1369 {
1370 prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
1371
1372 this_cpu()->in_opal_call--;
1373 if (this_cpu()->in_opal_call != 0) {
1374 printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
1375 }
1376
1377 __secondary_cpu_entry();
1378
1379 return OPAL_HARDWARE; /* Should not happen */
1380 }
1381 opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);
1382
1383 struct hid0_change_req {
1384 uint64_t clr_bits;
1385 uint64_t set_bits;
1386 };
1387
cpu_change_hid0(void * __req)1388 static void cpu_change_hid0(void *__req)
1389 {
1390 struct hid0_change_req *req = __req;
1391 unsigned long hid0, new_hid0;
1392
1393 hid0 = new_hid0 = mfspr(SPR_HID0);
1394 new_hid0 &= ~req->clr_bits;
1395 new_hid0 |= req->set_bits;
1396 prlog(PR_DEBUG, "CPU: [%08x] HID0 change 0x%016lx -> 0x%016lx\n",
1397 this_cpu()->pir, hid0, new_hid0);
1398 set_hid0(new_hid0);
1399 }
1400
cpu_change_all_hid0(struct hid0_change_req * req)1401 static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
1402 {
1403 struct cpu_thread *cpu;
1404 struct cpu_job **jobs;
1405
1406 jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
1407 assert(jobs);
1408
1409 for_each_available_cpu(cpu) {
1410 if (!cpu_is_thread0(cpu))
1411 continue;
1412 if (cpu == this_cpu())
1413 continue;
1414 jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_change_hid0",
1415 cpu_change_hid0, req);
1416 }
1417
1418 /* this cpu */
1419 cpu_change_hid0(req);
1420
1421 for_each_available_cpu(cpu) {
1422 if (jobs[cpu->pir])
1423 cpu_wait_job(jobs[cpu->pir], true);
1424 }
1425
1426 free(jobs);
1427
1428 return OPAL_SUCCESS;
1429 }
1430
cpu_cleanup_one(void * param __unused)1431 static void cpu_cleanup_one(void *param __unused)
1432 {
1433 mtspr(SPR_AMR, 0);
1434 mtspr(SPR_IAMR, 0);
1435 mtspr(SPR_PCR, 0);
1436 }
1437
cpu_cleanup_all(void)1438 static int64_t cpu_cleanup_all(void)
1439 {
1440 struct cpu_thread *cpu;
1441 struct cpu_job **jobs;
1442
1443 jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
1444 assert(jobs);
1445
1446 for_each_available_cpu(cpu) {
1447 if (cpu == this_cpu())
1448 continue;
1449 jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup",
1450 cpu_cleanup_one, NULL);
1451 }
1452
1453 /* this cpu */
1454 cpu_cleanup_one(NULL);
1455
1456 for_each_available_cpu(cpu) {
1457 if (jobs[cpu->pir])
1458 cpu_wait_job(jobs[cpu->pir], true);
1459 }
1460
1461 free(jobs);
1462
1463
1464 return OPAL_SUCCESS;
1465 }
1466
cpu_fast_reboot_complete(void)1467 void cpu_fast_reboot_complete(void)
1468 {
1469 /* Fast reboot will have cleared HID0:HILE */
1470 current_hile_mode = false;
1471
1472 /* and set HID0:RADIX */
1473 current_radix_mode = true;
1474 }
1475
opal_reinit_cpus(uint64_t flags)1476 static int64_t opal_reinit_cpus(uint64_t flags)
1477 {
1478 struct hid0_change_req req = { 0, 0 };
1479 struct cpu_thread *cpu;
1480 int64_t rc = OPAL_SUCCESS;
1481 int i;
1482
1483 prlog(PR_DEBUG, "OPAL: CPU re-init with flags: 0x%llx\n", flags);
1484
1485 if (flags & OPAL_REINIT_CPUS_HILE_LE)
1486 prlog(PR_INFO, "OPAL: Switch to little-endian OS\n");
1487 else if (flags & OPAL_REINIT_CPUS_HILE_BE)
1488 prlog(PR_INFO, "OPAL: Switch to big-endian OS\n");
1489
1490 again:
1491 lock(&reinit_lock);
1492
1493 for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
1494 if (cpu == this_cpu() || cpu->in_reinit)
1495 continue;
1496 if (cpu->state == cpu_state_os) {
1497 unlock(&reinit_lock);
1498 /*
1499 * That might be a race with return CPU during kexec
1500 * where we are still, wait a bit and try again
1501 */
1502 for (i = 0; (i < 1000) &&
1503 (cpu->state == cpu_state_os); i++) {
1504 time_wait_ms(1);
1505 }
1506 if (cpu->state == cpu_state_os) {
1507 prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
1508 return OPAL_WRONG_STATE;
1509 }
1510 goto again;
1511 }
1512 cpu->in_reinit = true;
1513 }
1514 /*
1515 * Now we need to mark ourselves "active" or we'll be skipped
1516 * by the various "for_each_active_..." calls done by slw_reinit()
1517 */
1518 this_cpu()->state = cpu_state_active;
1519 this_cpu()->in_reinit = true;
1520 unlock(&reinit_lock);
1521
1522 /*
1523 * This cleans up a few things left over by Linux
1524 * that can cause problems in cases such as radix->hash
1525 * transitions. Ideally Linux should do it but doing it
1526 * here works around existing broken kernels.
1527 */
1528 cpu_cleanup_all();
1529
1530 /* If HILE change via HID0 is supported ... */
1531 if (hile_supported &&
1532 (flags & (OPAL_REINIT_CPUS_HILE_BE |
1533 OPAL_REINIT_CPUS_HILE_LE))) {
1534 bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);
1535
1536 flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
1537 if (hile != current_hile_mode) {
1538 if (hile)
1539 req.set_bits |= hid0_hile;
1540 else
1541 req.clr_bits |= hid0_hile;
1542 current_hile_mode = hile;
1543 }
1544 }
1545
1546 /* If MMU mode change is supported */
1547 if (radix_supported &&
1548 (flags & (OPAL_REINIT_CPUS_MMU_HASH |
1549 OPAL_REINIT_CPUS_MMU_RADIX))) {
1550 bool radix = !!(flags & OPAL_REINIT_CPUS_MMU_RADIX);
1551
1552 flags &= ~(OPAL_REINIT_CPUS_MMU_HASH |
1553 OPAL_REINIT_CPUS_MMU_RADIX);
1554 if (radix != current_radix_mode) {
1555 if (radix)
1556 req.set_bits |= SPR_HID0_POWER9_RADIX;
1557 else
1558 req.clr_bits |= SPR_HID0_POWER9_RADIX;
1559
1560 current_radix_mode = radix;
1561 }
1562 }
1563
1564 /* Cleanup the TLB. We do that unconditionally, this works
1565 * around issues where OSes fail to invalidate the PWC in Radix
1566 * mode for example. This only works on P9 and later, but we
1567 * also know we don't have a problem with Linux cleanups on
1568 * P8 so this isn't a problem. If we wanted to cleanup the
1569 * TLB on P8 as well, we'd have to use jobs to do it locally
1570 * on each CPU.
1571 */
1572 cleanup_global_tlb();
1573
1574 /* Apply HID bits changes if any */
1575 if (req.set_bits || req.clr_bits)
1576 cpu_change_all_hid0(&req);
1577
1578 if (flags & OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) {
1579 flags &= ~OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED;
1580
1581 if (tm_suspend_enabled)
1582 rc = OPAL_UNSUPPORTED;
1583 else
1584 rc = OPAL_SUCCESS;
1585 }
1586
1587 /* Handle P8 DD1 SLW reinit */
1588 if (flags != 0 && proc_gen == proc_gen_p8 && !hile_supported)
1589 rc = slw_reinit(flags);
1590 else if (flags != 0)
1591 rc = OPAL_UNSUPPORTED;
1592
1593 /* And undo the above */
1594 lock(&reinit_lock);
1595 this_cpu()->state = cpu_state_os;
1596 for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
1597 cpu->in_reinit = false;
1598 unlock(&reinit_lock);
1599
1600 return rc;
1601 }
1602 opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
1603
1604 #define NMMU_XLAT_CTL_PTCR 0xb
nmmu_set_ptcr(uint64_t chip_id,struct dt_node * node,uint64_t ptcr)1605 static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr)
1606 {
1607 uint32_t nmmu_base_addr;
1608
1609 nmmu_base_addr = dt_get_address(node, 0, NULL);
1610 return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
1611 }
1612
1613 /*
1614 * Setup the the Nest MMU PTCR register for all chips in the system or
1615 * the specified chip id.
1616 *
1617 * The PTCR value may be overwritten so long as all users have been
1618 * quiesced. If it is set to an invalid memory address the system will
1619 * checkstop if anything attempts to use it.
1620 *
1621 * Returns OPAL_UNSUPPORTED if no nest mmu was found.
1622 */
opal_nmmu_set_ptcr(uint64_t chip_id,uint64_t ptcr)1623 static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
1624 {
1625 struct dt_node *node;
1626 int64_t rc = OPAL_UNSUPPORTED;
1627
1628 if (chip_id == -1ULL)
1629 dt_for_each_compatible(dt_root, node, "ibm,power9-nest-mmu") {
1630 chip_id = dt_get_chip_id(node);
1631 if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
1632 return rc;
1633 }
1634 else
1635 dt_for_each_compatible_on_chip(dt_root, node, "ibm,power9-nest-mmu", chip_id)
1636 if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
1637 return rc;
1638
1639 return rc;
1640 }
1641 opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
1642