1 /* Copyright 2013-2017 IBM Corp.
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * 	http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12  * implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * TODO: Index array by PIR to be able to catch them easily
19  * from assembly such as machine checks etc...
20  */
21 #include <skiboot.h>
22 #include <cpu.h>
23 #include <device.h>
24 #include <mem_region.h>
25 #include <opal.h>
26 #include <stack.h>
27 #include <trace.h>
28 #include <affinity.h>
29 #include <chip.h>
30 #include <timebase.h>
31 #include <interrupts.h>
32 #include <ccan/str/str.h>
33 #include <ccan/container_of/container_of.h>
34 #include <xscom.h>
35 
36 /* The cpu_threads array is static and indexed by PIR in
37  * order to speed up lookup from asm entry points
38  */
39 struct cpu_stack {
40 	union {
41 		uint8_t	stack[STACK_SIZE];
42 		struct cpu_thread cpu;
43 	};
44 } __align(STACK_SIZE);
45 
46 static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
47 unsigned int cpu_thread_count;
48 unsigned int cpu_max_pir;
49 struct cpu_thread *boot_cpu;
50 static struct lock reinit_lock = LOCK_UNLOCKED;
51 static bool hile_supported;
52 static bool radix_supported;
53 static unsigned long hid0_hile;
54 static unsigned long hid0_attn;
55 static bool sreset_enabled;
56 static bool ipi_enabled;
57 static bool pm_enabled;
58 static bool current_hile_mode;
59 static bool current_radix_mode;
60 static bool tm_suspend_enabled;
61 
62 unsigned long cpu_secondary_start __force_data = 0;
63 
64 struct cpu_job {
65 	struct list_node	link;
66 	void			(*func)(void *data);
67 	void			*data;
68 	const char		*name;
69 	bool			complete;
70 	bool		        no_return;
71 };
72 
73 /* attribute const as cpu_stacks is constant. */
cpu_stack_bottom(unsigned int pir)74 unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
75 {
76 	return ((unsigned long)&cpu_stacks[pir]) +
77 		sizeof(struct cpu_thread) + STACK_SAFETY_GAP;
78 }
79 
cpu_stack_top(unsigned int pir)80 unsigned long __attrconst cpu_stack_top(unsigned int pir)
81 {
82 	/* This is the top of the normal stack. */
83 	return ((unsigned long)&cpu_stacks[pir]) +
84 		NORMAL_STACK_SIZE - STACK_TOP_GAP;
85 }
86 
cpu_emergency_stack_top(unsigned int pir)87 unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir)
88 {
89 	/* This is the top of the emergency stack, above the normal stack. */
90 	return ((unsigned long)&cpu_stacks[pir]) +
91 		NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP;
92 }
93 
cpu_relax(void)94 void __nomcount cpu_relax(void)
95 {
96 	/* Relax a bit to give sibling threads some breathing space */
97 	smt_lowest();
98 	asm volatile("nop; nop; nop; nop;\n"
99 		     "nop; nop; nop; nop;\n"
100 		     "nop; nop; nop; nop;\n"
101 		     "nop; nop; nop; nop;\n");
102 	smt_medium();
103 	barrier();
104 }
105 
cpu_wake(struct cpu_thread * cpu)106 static void cpu_wake(struct cpu_thread *cpu)
107 {
108 	/* Is it idle ? If not, no need to wake */
109 	sync();
110 	if (!cpu->in_idle)
111 		return;
112 
113 	if (proc_gen == proc_gen_p8) {
114 		/* Poke IPI */
115 		icp_kick_cpu(cpu);
116 	} else if (proc_gen == proc_gen_p9) {
117 		p9_dbell_send(cpu->pir);
118 	}
119 }
120 
121 /*
122  * If chip_id is >= 0, schedule the job on that node.
123  * Otherwise schedule the job anywhere.
124  */
cpu_find_job_target(int32_t chip_id)125 static struct cpu_thread *cpu_find_job_target(int32_t chip_id)
126 {
127 	struct cpu_thread *cpu, *best, *me = this_cpu();
128 	uint32_t best_count;
129 
130 	/* We try to find a target to run a job. We need to avoid
131 	 * a CPU that has a "no return" job on its queue as it might
132 	 * never be able to process anything.
133 	 *
134 	 * Additionally we don't check the list but the job count
135 	 * on the target CPUs, since that is decremented *after*
136 	 * a job has been completed.
137 	 */
138 
139 
140 	/* First we scan all available primary threads
141 	 */
142 	for_each_available_cpu(cpu) {
143 		if (chip_id >= 0 && cpu->chip_id != chip_id)
144 			continue;
145 		if (cpu == me || !cpu_is_thread0(cpu) || cpu->job_has_no_return)
146 			continue;
147 		if (cpu->job_count)
148 			continue;
149 		lock(&cpu->job_lock);
150 		if (!cpu->job_count)
151 			return cpu;
152 		unlock(&cpu->job_lock);
153 	}
154 
155 	/* Now try again with secondary threads included and keep
156 	 * track of the one with the less jobs queued up. This is
157 	 * done in a racy way, but it's just an optimization in case
158 	 * we are overcommitted on jobs. Could could also just pick
159 	 * a random one...
160 	 */
161 	best = NULL;
162 	best_count = -1u;
163 	for_each_available_cpu(cpu) {
164 		if (chip_id >= 0 && cpu->chip_id != chip_id)
165 			continue;
166 		if (cpu == me || cpu->job_has_no_return)
167 			continue;
168 		if (!best || cpu->job_count < best_count) {
169 			best = cpu;
170 			best_count = cpu->job_count;
171 		}
172 		if (cpu->job_count)
173 			continue;
174 		lock(&cpu->job_lock);
175 		if (!cpu->job_count)
176 			return cpu;
177 		unlock(&cpu->job_lock);
178 	}
179 
180 	/* We haven't found anybody, do we have a bestie ? */
181 	if (best) {
182 		lock(&best->job_lock);
183 		return best;
184 	}
185 
186 	/* Go away */
187 	return NULL;
188 }
189 
190 /* job_lock is held, returns with it released */
queue_job_on_cpu(struct cpu_thread * cpu,struct cpu_job * job)191 static void queue_job_on_cpu(struct cpu_thread *cpu, struct cpu_job *job)
192 {
193 	/* That's bad, the job will never run */
194 	if (cpu->job_has_no_return) {
195 		prlog(PR_WARNING, "WARNING ! Job %s scheduled on CPU 0x%x"
196 		      " which has a no-return job on its queue !\n",
197 		      job->name, cpu->pir);
198 		backtrace();
199 	}
200 	list_add_tail(&cpu->job_queue, &job->link);
201 	if (job->no_return)
202 		cpu->job_has_no_return = true;
203 	else
204 		cpu->job_count++;
205 	if (pm_enabled)
206 		cpu_wake(cpu);
207 	unlock(&cpu->job_lock);
208 }
209 
__cpu_queue_job(struct cpu_thread * cpu,const char * name,void (* func)(void * data),void * data,bool no_return)210 struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
211 				const char *name,
212 				void (*func)(void *data), void *data,
213 				bool no_return)
214 {
215 	struct cpu_job *job;
216 
217 #ifdef DEBUG_SERIALIZE_CPU_JOBS
218 	if (cpu == NULL)
219 		cpu = this_cpu();
220 #endif
221 
222 	if (cpu && !cpu_is_available(cpu)) {
223 		prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
224 			cpu->pir);
225 		return NULL;
226 	}
227 
228 	job = zalloc(sizeof(struct cpu_job));
229 	if (!job)
230 		return NULL;
231 	job->func = func;
232 	job->data = data;
233 	job->name = name;
234 	job->complete = false;
235 	job->no_return = no_return;
236 
237 	/* Pick a candidate. Returns with target queue locked */
238 	if (cpu == NULL)
239 		cpu = cpu_find_job_target(-1);
240 	else if (cpu != this_cpu())
241 		lock(&cpu->job_lock);
242 	else
243 		cpu = NULL;
244 
245 	/* Can't be scheduled, run it now */
246 	if (cpu == NULL) {
247 		if (!this_cpu()->job_has_no_return)
248 			this_cpu()->job_has_no_return = no_return;
249 		func(data);
250 		job->complete = true;
251 		return job;
252 	}
253 
254 	queue_job_on_cpu(cpu, job);
255 
256 	return job;
257 }
258 
cpu_queue_job_on_node(uint32_t chip_id,const char * name,void (* func)(void * data),void * data)259 struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id,
260 				const char *name,
261 				void (*func)(void *data), void *data)
262 {
263 	struct cpu_thread *cpu;
264 	struct cpu_job *job;
265 
266 	job = zalloc(sizeof(struct cpu_job));
267 	if (!job)
268 		return NULL;
269 	job->func = func;
270 	job->data = data;
271 	job->name = name;
272 	job->complete = false;
273 	job->no_return = false;
274 
275 	/* Pick a candidate. Returns with target queue locked */
276 	cpu = cpu_find_job_target(chip_id);
277 
278 	/* Can't be scheduled... */
279 	if (cpu == NULL) {
280 		cpu = this_cpu();
281 		if (cpu->chip_id == chip_id) {
282 			/* Run it now if we're the right node. */
283 			func(data);
284 			job->complete = true;
285 			return job;
286 		}
287 		/* Otherwise fail. */
288 		free(job);
289 		return NULL;
290 	}
291 
292 	queue_job_on_cpu(cpu, job);
293 
294 	return job;
295 }
296 
cpu_poll_job(struct cpu_job * job)297 bool cpu_poll_job(struct cpu_job *job)
298 {
299 	lwsync();
300 	return job->complete;
301 }
302 
cpu_wait_job(struct cpu_job * job,bool free_it)303 void cpu_wait_job(struct cpu_job *job, bool free_it)
304 {
305 	unsigned long time_waited = 0;
306 
307 	if (!job)
308 		return;
309 
310 	while (!job->complete) {
311 		/* This will call OPAL pollers for us */
312 		time_wait_ms(10);
313 		time_waited += 10;
314 		lwsync();
315 		if ((time_waited % 30000) == 0) {
316 			prlog(PR_INFO, "cpu_wait_job(%s) for %lums\n",
317 			      job->name, time_waited);
318 			backtrace();
319 		}
320 	}
321 	lwsync();
322 
323 	if (time_waited > 1000)
324 		prlog(PR_DEBUG, "cpu_wait_job(%s) for %lums\n",
325 		      job->name, time_waited);
326 
327 	if (free_it)
328 		free(job);
329 }
330 
cpu_check_jobs(struct cpu_thread * cpu)331 bool cpu_check_jobs(struct cpu_thread *cpu)
332 {
333 	return !list_empty_nocheck(&cpu->job_queue);
334 }
335 
cpu_process_jobs(void)336 void cpu_process_jobs(void)
337 {
338 	struct cpu_thread *cpu = this_cpu();
339 	struct cpu_job *job = NULL;
340 	void (*func)(void *);
341 	void *data;
342 
343 	sync();
344 	if (!cpu_check_jobs(cpu))
345 		return;
346 
347 	lock(&cpu->job_lock);
348 	while (true) {
349 		bool no_return;
350 
351 		job = list_pop(&cpu->job_queue, struct cpu_job, link);
352 		if (!job)
353 			break;
354 
355 		func = job->func;
356 		data = job->data;
357 		no_return = job->no_return;
358 		unlock(&cpu->job_lock);
359 		prlog(PR_TRACE, "running job %s on %x\n", job->name, cpu->pir);
360 		if (no_return)
361 			free(job);
362 		func(data);
363 		if (!list_empty(&cpu->locks_held)) {
364 			if (no_return)
365 				prlog(PR_ERR, "OPAL no-return job returned with"
366 				      "locks held!\n");
367 			else
368 				prlog(PR_ERR, "OPAL job %s returning with locks held\n",
369 				      job->name);
370 			drop_my_locks(true);
371 		}
372 		lock(&cpu->job_lock);
373 		if (!no_return) {
374 			cpu->job_count--;
375 			lwsync();
376 			job->complete = true;
377 		}
378 	}
379 	unlock(&cpu->job_lock);
380 }
381 
382 enum cpu_wake_cause {
383 	cpu_wake_on_job,
384 	cpu_wake_on_dec,
385 };
386 
cpu_idle_p8(enum cpu_wake_cause wake_on)387 static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)
388 {
389 	uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE;
390 	struct cpu_thread *cpu = this_cpu();
391 	unsigned int vec = 0;
392 
393 	if (!pm_enabled) {
394 		prlog_once(PR_DEBUG, "cpu_idle_p8 called pm disabled\n");
395 		return vec;
396 	}
397 
398 	/* Clean up ICP, be ready for IPIs */
399 	icp_prep_for_pm();
400 
401 	/* Synchronize with wakers */
402 	if (wake_on == cpu_wake_on_job) {
403 		/* Mark ourselves in idle so other CPUs know to send an IPI */
404 		cpu->in_idle = true;
405 		sync();
406 
407 		/* Check for jobs again */
408 		if (cpu_check_jobs(cpu) || !pm_enabled)
409 			goto skip_sleep;
410 
411 		/* Setup wakup cause in LPCR: EE (for IPI) */
412 		lpcr |= SPR_LPCR_P8_PECE2;
413 		mtspr(SPR_LPCR, lpcr);
414 
415 	} else {
416 		/* Mark outselves sleeping so cpu_set_pm_enable knows to
417 		 * send an IPI
418 		 */
419 		cpu->in_sleep = true;
420 		sync();
421 
422 		/* Check if PM got disabled */
423 		if (!pm_enabled)
424 			goto skip_sleep;
425 
426 		/* EE and DEC */
427 		lpcr |= SPR_LPCR_P8_PECE2 | SPR_LPCR_P8_PECE3;
428 		mtspr(SPR_LPCR, lpcr);
429 	}
430 	isync();
431 
432 	/* Enter nap */
433 	vec = enter_p8_pm_state(false);
434 
435 skip_sleep:
436 	/* Restore */
437 	sync();
438 	cpu->in_idle = false;
439 	cpu->in_sleep = false;
440 	reset_cpu_icp();
441 
442 	return vec;
443 }
444 
cpu_idle_p9(enum cpu_wake_cause wake_on)445 static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)
446 {
447 	uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE;
448 	uint64_t psscr;
449 	struct cpu_thread *cpu = this_cpu();
450 	unsigned int vec = 0;
451 
452 	if (!pm_enabled) {
453 		prlog_once(PR_DEBUG, "cpu_idle_p9 called pm disabled\n");
454 		return vec;
455 	}
456 
457 	/* Synchronize with wakers */
458 	if (wake_on == cpu_wake_on_job) {
459 		/* Mark ourselves in idle so other CPUs know to send an IPI */
460 		cpu->in_idle = true;
461 		sync();
462 
463 		/* Check for jobs again */
464 		if (cpu_check_jobs(cpu) || !pm_enabled)
465 			goto skip_sleep;
466 
467 		/* HV DBELL for IPI */
468 		lpcr |= SPR_LPCR_P9_PECEL1;
469 	} else {
470 		/* Mark outselves sleeping so cpu_set_pm_enable knows to
471 		 * send an IPI
472 		 */
473 		cpu->in_sleep = true;
474 		sync();
475 
476 		/* Check if PM got disabled */
477 		if (!pm_enabled)
478 			goto skip_sleep;
479 
480 		/* HV DBELL and DEC */
481 		lpcr |= SPR_LPCR_P9_PECEL1 | SPR_LPCR_P9_PECEL3;
482 	}
483 
484 	mtspr(SPR_LPCR, lpcr);
485 	isync();
486 
487 	if (sreset_enabled) {
488 		/* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */
489 		/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */
490 		psscr = PPC_BIT(42) | PPC_BIT(43) |
491 			PPC_BITMASK(54, 55) | PPC_BIT(63);
492 		vec = enter_p9_pm_state(psscr);
493 	} else {
494 		/* stop with EC=0 (resumes) which does not require sreset. */
495 		/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */
496 		psscr = PPC_BITMASK(54, 55) | PPC_BIT(63);
497 		enter_p9_pm_lite_state(psscr);
498 	}
499 
500 	/* Clear doorbell */
501 	p9_dbell_receive();
502 
503  skip_sleep:
504 	/* Restore */
505 	sync();
506 	cpu->in_idle = false;
507 	cpu->in_sleep = false;
508 
509 	return vec;
510 }
511 
cpu_idle_pm(enum cpu_wake_cause wake_on)512 static void cpu_idle_pm(enum cpu_wake_cause wake_on)
513 {
514 	unsigned int vec;
515 
516 	switch(proc_gen) {
517 	case proc_gen_p8:
518 		vec = cpu_idle_p8(wake_on);
519 		break;
520 	case proc_gen_p9:
521 		vec = cpu_idle_p9(wake_on);
522 		break;
523 	default:
524 		vec = 0;
525 		prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n");
526 		break;
527 	}
528 
529 	if (vec == 0x100) {
530 		unsigned long srr1 = mfspr(SPR_SRR1);
531 
532 		switch (srr1 & SPR_SRR1_PM_WAKE_MASK) {
533 		case SPR_SRR1_PM_WAKE_SRESET:
534 			exception_entry_pm_sreset();
535 			break;
536 		default:
537 			break;
538 		}
539 		mtmsrd(MSR_RI, 1);
540 
541 	} else if (vec == 0x200) {
542 		exception_entry_pm_mce();
543 		enable_machine_check();
544 		mtmsrd(MSR_RI, 1);
545 	}
546 }
547 
cpu_idle_job(void)548 void cpu_idle_job(void)
549 {
550 	if (pm_enabled) {
551 		cpu_idle_pm(cpu_wake_on_job);
552 	} else {
553 		struct cpu_thread *cpu = this_cpu();
554 
555 		smt_lowest();
556 		/* Check for jobs again */
557 		while (!cpu_check_jobs(cpu)) {
558 			if (pm_enabled)
559 				break;
560 			cpu_relax();
561 			barrier();
562 		}
563 		smt_medium();
564 	}
565 }
566 
cpu_idle_delay(unsigned long delay)567 void cpu_idle_delay(unsigned long delay)
568 {
569 	unsigned long now = mftb();
570 	unsigned long end = now + delay;
571 	unsigned long min_pm = usecs_to_tb(10);
572 
573 	if (pm_enabled && delay > min_pm) {
574 pm:
575 		for (;;) {
576 			if (delay >= 0x7fffffff)
577 				delay = 0x7fffffff;
578 			mtspr(SPR_DEC, delay);
579 
580 			cpu_idle_pm(cpu_wake_on_dec);
581 
582 			now = mftb();
583 			if (tb_compare(now, end) == TB_AAFTERB)
584 				break;
585 			delay = end - now;
586 			if (!(pm_enabled && delay > min_pm))
587 				goto no_pm;
588 		}
589 	} else {
590 no_pm:
591 		smt_lowest();
592 		for (;;) {
593 			now = mftb();
594 			if (tb_compare(now, end) == TB_AAFTERB)
595 				break;
596 			delay = end - now;
597 			if (pm_enabled && delay > min_pm) {
598 				smt_medium();
599 				goto pm;
600 			}
601 		}
602 		smt_medium();
603 	}
604 }
605 
cpu_pm_disable(void)606 static void cpu_pm_disable(void)
607 {
608 	struct cpu_thread *cpu;
609 
610 	pm_enabled = false;
611 	sync();
612 
613 	if (proc_gen == proc_gen_p8) {
614 		for_each_available_cpu(cpu) {
615 			while (cpu->in_sleep || cpu->in_idle) {
616 				icp_kick_cpu(cpu);
617 				cpu_relax();
618 			}
619 		}
620 	} else if (proc_gen == proc_gen_p9) {
621 		for_each_available_cpu(cpu) {
622 			if (cpu->in_sleep || cpu->in_idle)
623 				p9_dbell_send(cpu->pir);
624 		}
625 
626 		smt_lowest();
627 		for_each_available_cpu(cpu) {
628 			while (cpu->in_sleep || cpu->in_idle)
629 				barrier();
630 		}
631 		smt_medium();
632 	}
633 }
634 
cpu_set_sreset_enable(bool enabled)635 void cpu_set_sreset_enable(bool enabled)
636 {
637 	if (sreset_enabled == enabled)
638 		return;
639 
640 	if (proc_gen == proc_gen_p8) {
641 		/* Public P8 Mambo has broken NAP */
642 		if (chip_quirk(QUIRK_MAMBO_CALLOUTS))
643 			return;
644 
645 		sreset_enabled = enabled;
646 		sync();
647 
648 		if (!enabled) {
649 			cpu_pm_disable();
650 		} else {
651 			if (ipi_enabled)
652 				pm_enabled = true;
653 		}
654 
655 	} else if (proc_gen == proc_gen_p9) {
656 		sreset_enabled = enabled;
657 		sync();
658 		/*
659 		 * Kick everybody out of PM so they can adjust the PM
660 		 * mode they are using (EC=0/1).
661 		 */
662 		cpu_pm_disable();
663 		if (ipi_enabled)
664 			pm_enabled = true;
665 	}
666 }
667 
cpu_set_ipi_enable(bool enabled)668 void cpu_set_ipi_enable(bool enabled)
669 {
670 	if (ipi_enabled == enabled)
671 		return;
672 
673 	if (proc_gen == proc_gen_p8) {
674 		ipi_enabled = enabled;
675 		sync();
676 		if (!enabled) {
677 			cpu_pm_disable();
678 		} else {
679 			if (sreset_enabled)
680 				pm_enabled = true;
681 		}
682 
683 	} else if (proc_gen == proc_gen_p9) {
684 		ipi_enabled = enabled;
685 		sync();
686 		if (!enabled)
687 			cpu_pm_disable();
688 		else
689 			pm_enabled = true;
690 	}
691 }
692 
cpu_process_local_jobs(void)693 void cpu_process_local_jobs(void)
694 {
695 	struct cpu_thread *cpu = first_available_cpu();
696 
697 	while (cpu) {
698 		if (cpu != this_cpu())
699 			return;
700 
701 		cpu = next_available_cpu(cpu);
702 	}
703 
704 	if (!cpu)
705 		cpu = first_available_cpu();
706 
707 	/* No CPU to run on, just run synchro */
708 	if (cpu == this_cpu()) {
709 		prlog_once(PR_DEBUG, "Processing jobs synchronously\n");
710 		cpu_process_jobs();
711 		opal_run_pollers();
712 	}
713 }
714 
715 
get_cpu_node(u32 pir)716 struct dt_node *get_cpu_node(u32 pir)
717 {
718 	struct cpu_thread *t = find_cpu_by_pir(pir);
719 
720 	return t ? t->node : NULL;
721 }
722 
723 /* This only covers primary, active cpus */
find_cpu_by_chip_id(u32 chip_id)724 struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
725 {
726 	struct cpu_thread *t;
727 
728 	for_each_available_cpu(t) {
729 		if (t->is_secondary)
730 			continue;
731 		if (t->chip_id == chip_id)
732 			return t;
733 	}
734 	return NULL;
735 }
736 
find_cpu_by_node(struct dt_node * cpu)737 struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
738 {
739 	struct cpu_thread *t;
740 
741 	for_each_available_cpu(t) {
742 		if (t->node == cpu)
743 			return t;
744 	}
745 	return NULL;
746 }
747 
find_cpu_by_pir(u32 pir)748 struct cpu_thread *find_cpu_by_pir(u32 pir)
749 {
750 	if (pir > cpu_max_pir)
751 		return NULL;
752 	return &cpu_stacks[pir].cpu;
753 }
754 
find_cpu_by_pir_nomcount(u32 pir)755 struct __nomcount cpu_thread *find_cpu_by_pir_nomcount(u32 pir)
756 {
757 	if (pir > cpu_max_pir)
758 		return NULL;
759 	return &cpu_stacks[pir].cpu;
760 }
761 
find_cpu_by_server(u32 server_no)762 struct cpu_thread *find_cpu_by_server(u32 server_no)
763 {
764 	struct cpu_thread *t;
765 
766 	for_each_cpu(t) {
767 		if (t->server_no == server_no)
768 			return t;
769 	}
770 	return NULL;
771 }
772 
next_cpu(struct cpu_thread * cpu)773 struct cpu_thread *next_cpu(struct cpu_thread *cpu)
774 {
775 	struct cpu_stack *s;
776 	unsigned int index = 0;
777 
778 	if (cpu != NULL) {
779 		s = container_of(cpu, struct cpu_stack, cpu);
780 		index = s - cpu_stacks + 1;
781 	}
782 	for (; index <= cpu_max_pir; index++) {
783 		cpu = &cpu_stacks[index].cpu;
784 		if (cpu->state != cpu_state_no_cpu)
785 			return cpu;
786 	}
787 	return NULL;
788 }
789 
first_cpu(void)790 struct cpu_thread *first_cpu(void)
791 {
792 	return next_cpu(NULL);
793 }
794 
next_available_cpu(struct cpu_thread * cpu)795 struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
796 {
797 	do {
798 		cpu = next_cpu(cpu);
799 	} while(cpu && !cpu_is_available(cpu));
800 
801 	return cpu;
802 }
803 
first_available_cpu(void)804 struct cpu_thread *first_available_cpu(void)
805 {
806 	return next_available_cpu(NULL);
807 }
808 
next_present_cpu(struct cpu_thread * cpu)809 struct cpu_thread *next_present_cpu(struct cpu_thread *cpu)
810 {
811 	do {
812 		cpu = next_cpu(cpu);
813 	} while(cpu && !cpu_is_present(cpu));
814 
815 	return cpu;
816 }
817 
first_present_cpu(void)818 struct cpu_thread *first_present_cpu(void)
819 {
820 	return next_present_cpu(NULL);
821 }
822 
next_ungarded_cpu(struct cpu_thread * cpu)823 struct cpu_thread *next_ungarded_cpu(struct cpu_thread *cpu)
824 {
825 	do {
826 		cpu = next_cpu(cpu);
827 	} while(cpu && cpu->state == cpu_state_unavailable);
828 
829 	return cpu;
830 }
831 
first_ungarded_cpu(void)832 struct cpu_thread *first_ungarded_cpu(void)
833 {
834 	return next_ungarded_cpu(NULL);
835 }
836 
next_ungarded_primary(struct cpu_thread * cpu)837 struct cpu_thread *next_ungarded_primary(struct cpu_thread *cpu)
838 {
839 	do {
840 		cpu = next_cpu(cpu);
841 	} while(cpu && (cpu->state == cpu_state_unavailable || cpu->primary != cpu));
842 
843 	return cpu;
844 }
845 
first_ungarded_primary(void)846 struct cpu_thread *first_ungarded_primary(void)
847 {
848 	return next_ungarded_primary(NULL);
849 }
850 
get_available_nr_cores_in_chip(u32 chip_id)851 u8 get_available_nr_cores_in_chip(u32 chip_id)
852 {
853 	struct cpu_thread *core;
854 	u8 nr_cores = 0;
855 
856 	for_each_available_core_in_chip(core, chip_id)
857 		nr_cores++;
858 
859 	return nr_cores;
860 }
861 
next_available_core_in_chip(struct cpu_thread * core,u32 chip_id)862 struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
863 					       u32 chip_id)
864 {
865 	do {
866 		core = next_cpu(core);
867 	} while(core && (!cpu_is_available(core) ||
868 			 core->chip_id != chip_id ||
869 			 core->is_secondary));
870 	return core;
871 }
872 
first_available_core_in_chip(u32 chip_id)873 struct cpu_thread *first_available_core_in_chip(u32 chip_id)
874 {
875 	return next_available_core_in_chip(NULL, chip_id);
876 }
877 
cpu_get_core_index(struct cpu_thread * cpu)878 uint32_t cpu_get_core_index(struct cpu_thread *cpu)
879 {
880 	return pir_to_core_id(cpu->pir);
881 }
882 
cpu_remove_node(const struct cpu_thread * t)883 void cpu_remove_node(const struct cpu_thread *t)
884 {
885 	struct dt_node *i;
886 
887 	/* Find this cpu node */
888 	dt_for_each_node(dt_root, i) {
889 		const struct dt_property *p;
890 
891 		if (!dt_has_node_property(i, "device_type", "cpu"))
892 			continue;
893 		p = dt_find_property(i, "ibm,pir");
894 		if (!p)
895 			continue;
896 		if (dt_property_get_cell(p, 0) == t->pir) {
897 			dt_free(i);
898 			return;
899 		}
900 	}
901 	prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
902 	abort();
903 }
904 
cpu_disable_all_threads(struct cpu_thread * cpu)905 void cpu_disable_all_threads(struct cpu_thread *cpu)
906 {
907 	unsigned int i;
908 	struct dt_property *p;
909 
910 	for (i = 0; i <= cpu_max_pir; i++) {
911 		struct cpu_thread *t = &cpu_stacks[i].cpu;
912 
913 		if (t->primary == cpu->primary)
914 			t->state = cpu_state_disabled;
915 
916 	}
917 
918 	/* Mark this core as bad so that Linux kernel don't use this CPU. */
919 	prlog(PR_DEBUG, "CPU: Mark CPU bad (PIR 0x%04x)...\n", cpu->pir);
920 	p = __dt_find_property(cpu->node, "status");
921 	if (p)
922 		dt_del_property(cpu->node, p);
923 
924 	dt_add_property_string(cpu->node, "status", "bad");
925 
926 	/* XXX Do something to actually stop the core */
927 }
928 
init_cpu_thread(struct cpu_thread * t,enum cpu_thread_state state,unsigned int pir)929 static void init_cpu_thread(struct cpu_thread *t,
930 			    enum cpu_thread_state state,
931 			    unsigned int pir)
932 {
933 	/* offset within cpu_thread to prevent stack_guard clobber */
934 	const size_t guard_skip = container_off_var(t, stack_guard) +
935 		sizeof(t->stack_guard);
936 
937 	memset(((void *)t) + guard_skip, 0, sizeof(struct cpu_thread) - guard_skip);
938 	init_lock(&t->dctl_lock);
939 	init_lock(&t->job_lock);
940 	list_head_init(&t->job_queue);
941 	list_head_init(&t->locks_held);
942 	t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir;
943 	t->state = state;
944 	t->pir = pir;
945 #ifdef STACK_CHECK_ENABLED
946 	t->stack_bot_mark = LONG_MAX;
947 #endif
948 	assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
949 }
950 
enable_attn(void)951 static void enable_attn(void)
952 {
953 	unsigned long hid0;
954 
955 	hid0 = mfspr(SPR_HID0);
956 	hid0 |= hid0_attn;
957 	set_hid0(hid0);
958 }
959 
disable_attn(void)960 static void disable_attn(void)
961 {
962 	unsigned long hid0;
963 
964 	hid0 = mfspr(SPR_HID0);
965 	hid0 &= ~hid0_attn;
966 	set_hid0(hid0);
967 }
968 
969 extern void __trigger_attn(void);
trigger_attn(void)970 void trigger_attn(void)
971 {
972 	enable_attn();
973 	__trigger_attn();
974 }
975 
init_hid(void)976 static void init_hid(void)
977 {
978 	/* attn is enabled even when HV=0, so make sure it's off */
979 	disable_attn();
980 }
981 
pre_init_boot_cpu(void)982 void __nomcount pre_init_boot_cpu(void)
983 {
984 	struct cpu_thread *cpu = this_cpu();
985 
986 	/* We skip the stack guard ! */
987 	memset(((void *)cpu) + 8, 0, sizeof(struct cpu_thread) - 8);
988 }
989 
init_boot_cpu(void)990 void init_boot_cpu(void)
991 {
992 	unsigned int pir, pvr;
993 
994 	pir = mfspr(SPR_PIR);
995 	pvr = mfspr(SPR_PVR);
996 
997 	/* Get CPU family and other flags based on PVR */
998 	switch(PVR_TYPE(pvr)) {
999 	case PVR_TYPE_P8E:
1000 	case PVR_TYPE_P8:
1001 		proc_gen = proc_gen_p8;
1002 		hile_supported = PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2;
1003 		hid0_hile = SPR_HID0_POWER8_HILE;
1004 		hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
1005 		break;
1006 	case PVR_TYPE_P8NVL:
1007 		proc_gen = proc_gen_p8;
1008 		hile_supported = true;
1009 		hid0_hile = SPR_HID0_POWER8_HILE;
1010 		hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN;
1011 		break;
1012 	case PVR_TYPE_P9:
1013 	case PVR_TYPE_P9P:
1014 		proc_gen = proc_gen_p9;
1015 		hile_supported = true;
1016 		radix_supported = true;
1017 		hid0_hile = SPR_HID0_POWER9_HILE;
1018 		hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN;
1019 		break;
1020 	default:
1021 		proc_gen = proc_gen_unknown;
1022 	}
1023 
1024 	/* Get a CPU thread count based on family */
1025 	switch(proc_gen) {
1026 	case proc_gen_p8:
1027 		cpu_thread_count = 8;
1028 		prlog(PR_INFO, "CPU: P8 generation processor"
1029 		      " (max %d threads/core)\n", cpu_thread_count);
1030 		break;
1031 	case proc_gen_p9:
1032 		cpu_thread_count = 4;
1033 		prlog(PR_INFO, "CPU: P9 generation processor"
1034 		      " (max %d threads/core)\n", cpu_thread_count);
1035 		break;
1036 	default:
1037 		prerror("CPU: Unknown PVR, assuming 1 thread\n");
1038 		cpu_thread_count = 1;
1039 		cpu_max_pir = mfspr(SPR_PIR);
1040 	}
1041 
1042 	if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) {
1043 		prerror("CPU: POWER9N DD1 is not supported\n");
1044 		abort();
1045 	}
1046 
1047 	prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
1048 	      pir, pvr);
1049 
1050 	/*
1051 	 * Adjust top of RAM to include the boot CPU stack. If we have less
1052 	 * RAM than this, it's not possible to boot.
1053 	 */
1054 	cpu_max_pir = pir;
1055 	top_of_ram += (cpu_max_pir + 1) * STACK_SIZE;
1056 
1057 	/* Setup boot CPU state */
1058 	boot_cpu = &cpu_stacks[pir].cpu;
1059 	init_cpu_thread(boot_cpu, cpu_state_active, pir);
1060 	init_boot_tracebuf(boot_cpu);
1061 	assert(this_cpu() == boot_cpu);
1062 	init_hid();
1063 }
1064 
enable_large_dec(bool on)1065 static void enable_large_dec(bool on)
1066 {
1067 	u64 lpcr = mfspr(SPR_LPCR);
1068 
1069 	if (on)
1070 		lpcr |= SPR_LPCR_P9_LD;
1071 	else
1072 		lpcr &= ~SPR_LPCR_P9_LD;
1073 
1074 	mtspr(SPR_LPCR, lpcr);
1075 	isync();
1076 }
1077 
1078 #define HIGH_BIT (1ull << 63)
1079 
find_dec_bits(void)1080 static int find_dec_bits(void)
1081 {
1082 	int bits = 65; /* we always decrement once */
1083 	u64 mask = ~0ull;
1084 
1085 	if (proc_gen < proc_gen_p9)
1086 		return 32;
1087 
1088 	/* The ISA doesn't specify the width of the decrementer register so we
1089 	 * need to discover it. When in large mode (LPCR.LD = 1) reads from the
1090 	 * DEC SPR are sign extended to 64 bits and writes are truncated to the
1091 	 * physical register width. We can use this behaviour to detect the
1092 	 * width by starting from an all 1s value and left shifting until we
1093 	 * read a value from the DEC with it's high bit cleared.
1094 	 */
1095 
1096 	enable_large_dec(true);
1097 
1098 	do {
1099 		bits--;
1100 		mask = mask >> 1;
1101 		mtspr(SPR_DEC, mask);
1102 	} while (mfspr(SPR_DEC) & HIGH_BIT);
1103 
1104 	enable_large_dec(false);
1105 
1106 	prlog(PR_DEBUG, "CPU: decrementer bits %d\n", bits);
1107 	return bits;
1108 }
1109 
init_tm_suspend_mode_property(void)1110 static void init_tm_suspend_mode_property(void)
1111 {
1112 	struct dt_node *node;
1113 
1114 	/* If we don't find anything, assume TM suspend is enabled */
1115 	tm_suspend_enabled = true;
1116 
1117 	node = dt_find_by_path(dt_root, "/ibm,opal/fw-features/tm-suspend-mode");
1118 	if (!node)
1119 		return;
1120 
1121 	if (dt_find_property(node, "disabled"))
1122 		tm_suspend_enabled = false;
1123 }
1124 
init_cpu_max_pir(void)1125 void init_cpu_max_pir(void)
1126 {
1127 	struct dt_node *cpus, *cpu;
1128 
1129 	cpus = dt_find_by_path(dt_root, "/cpus");
1130 	assert(cpus);
1131 
1132 	/* Iterate all CPUs in the device-tree */
1133 	dt_for_each_child(cpus, cpu) {
1134 		unsigned int pir, server_no;
1135 
1136 		/* Skip cache nodes */
1137 		if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
1138 			continue;
1139 
1140 		server_no = dt_prop_get_u32(cpu, "reg");
1141 
1142 		/* If PIR property is absent, assume it's the same as the
1143 		 * server number
1144 		 */
1145 		pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
1146 
1147 		if (cpu_max_pir < pir + cpu_thread_count - 1)
1148 			cpu_max_pir = pir + cpu_thread_count - 1;
1149 	}
1150 
1151 	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir);
1152 }
1153 
init_all_cpus(void)1154 void init_all_cpus(void)
1155 {
1156 	struct dt_node *cpus, *cpu;
1157 	unsigned int thread;
1158 	int dec_bits = find_dec_bits();
1159 
1160 	cpus = dt_find_by_path(dt_root, "/cpus");
1161 	assert(cpus);
1162 
1163 	init_tm_suspend_mode_property();
1164 
1165 	/* Iterate all CPUs in the device-tree */
1166 	dt_for_each_child(cpus, cpu) {
1167 		unsigned int pir, server_no, chip_id;
1168 		enum cpu_thread_state state;
1169 		const struct dt_property *p;
1170 		struct cpu_thread *t, *pt;
1171 
1172 		/* Skip cache nodes */
1173 		if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
1174 			continue;
1175 
1176 		server_no = dt_prop_get_u32(cpu, "reg");
1177 
1178 		/* If PIR property is absent, assume it's the same as the
1179 		 * server number
1180 		 */
1181 		pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
1182 
1183 		/* We should always have an ibm,chip-id property */
1184 		chip_id = dt_get_chip_id(cpu);
1185 
1186 		/* Only use operational CPUs */
1187 		if (!strcmp(dt_prop_get(cpu, "status"), "okay"))
1188 			state = cpu_state_present;
1189 		else
1190 			state = cpu_state_unavailable;
1191 
1192 		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
1193 		      " State=%d\n", pir, server_no, state);
1194 
1195 		/* Setup thread 0 */
1196 		assert(pir <= cpu_max_pir);
1197 		t = pt = &cpu_stacks[pir].cpu;
1198 		if (t != boot_cpu) {
1199 			init_cpu_thread(t, state, pir);
1200 			/* Each cpu gets its own later in init_trace_buffers */
1201 			t->trace = boot_cpu->trace;
1202 		}
1203 		t->server_no = server_no;
1204 		t->primary = t;
1205 		t->node = cpu;
1206 		t->chip_id = chip_id;
1207 		t->icp_regs = NULL; /* Will be set later */
1208 #ifdef DEBUG_LOCKS
1209 		t->requested_lock = NULL;
1210 #endif
1211 		t->core_hmi_state = 0;
1212 		t->core_hmi_state_ptr = &t->core_hmi_state;
1213 
1214 		/* Add associativity properties */
1215 		add_core_associativity(t);
1216 
1217 		/* Add the decrementer width property */
1218 		dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits);
1219 
1220 		/* Iterate threads */
1221 		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
1222 		if (!p)
1223 			continue;
1224 		for (thread = 1; thread < (p->len / 4); thread++) {
1225 			prlog(PR_TRACE, "CPU:   secondary thread %d found\n",
1226 			      thread);
1227 			t = &cpu_stacks[pir + thread].cpu;
1228 			init_cpu_thread(t, state, pir + thread);
1229 			t->trace = boot_cpu->trace;
1230 			t->server_no = ((const u32 *)p->prop)[thread];
1231 			t->is_secondary = true;
1232 			t->primary = pt;
1233 			t->node = cpu;
1234 			t->chip_id = chip_id;
1235 			t->core_hmi_state_ptr = &pt->core_hmi_state;
1236 		}
1237 		prlog(PR_INFO, "CPU:  %d secondary threads\n", thread);
1238 	}
1239 }
1240 
cpu_bringup(void)1241 void cpu_bringup(void)
1242 {
1243 	struct cpu_thread *t;
1244 	uint32_t count = 0;
1245 
1246 	prlog(PR_INFO, "CPU: Setting up secondary CPU state\n");
1247 
1248 	op_display(OP_LOG, OP_MOD_CPU, 0x0000);
1249 
1250 	/* Tell everybody to chime in ! */
1251 	prlog(PR_INFO, "CPU: Calling in all processors...\n");
1252 	cpu_secondary_start = 1;
1253 	sync();
1254 
1255 	op_display(OP_LOG, OP_MOD_CPU, 0x0002);
1256 
1257 	for_each_cpu(t) {
1258 		if (t->state != cpu_state_present &&
1259 		    t->state != cpu_state_active)
1260 			continue;
1261 
1262 		/* Add a callin timeout ?  If so, call cpu_remove_node(t). */
1263 		while (t->state != cpu_state_active) {
1264 			smt_lowest();
1265 			sync();
1266 		}
1267 		smt_medium();
1268 		count++;
1269 	}
1270 
1271 	prlog(PR_NOTICE, "CPU: All %d processors called in...\n", count);
1272 
1273 	op_display(OP_LOG, OP_MOD_CPU, 0x0003);
1274 }
1275 
cpu_callin(struct cpu_thread * cpu)1276 void cpu_callin(struct cpu_thread *cpu)
1277 {
1278 	sync();
1279 	cpu->state = cpu_state_active;
1280 	sync();
1281 
1282 	cpu->job_has_no_return = false;
1283 	if (cpu_is_thread0(cpu))
1284 		init_hid();
1285 }
1286 
opal_start_thread_job(void * data)1287 static void opal_start_thread_job(void *data)
1288 {
1289 	cpu_give_self_os();
1290 
1291 	/* We do not return, so let's mark the job as
1292 	 * complete
1293 	 */
1294 	start_kernel_secondary((uint64_t)data);
1295 }
1296 
opal_start_cpu_thread(uint64_t server_no,uint64_t start_address)1297 static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
1298 {
1299 	struct cpu_thread *cpu;
1300 	struct cpu_job *job;
1301 
1302 	if (!opal_addr_valid((void *)start_address))
1303 		return OPAL_PARAMETER;
1304 
1305 	cpu = find_cpu_by_server(server_no);
1306 	if (!cpu) {
1307 		prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
1308 		return OPAL_PARAMETER;
1309 	}
1310 	prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
1311 	       server_no, cpu->pir, start_address);
1312 
1313 	lock(&reinit_lock);
1314 	if (!cpu_is_available(cpu)) {
1315 		unlock(&reinit_lock);
1316 		prerror("OPAL: CPU not active in OPAL !\n");
1317 		return OPAL_WRONG_STATE;
1318 	}
1319 	if (cpu->in_reinit) {
1320 		unlock(&reinit_lock);
1321 		prerror("OPAL: CPU being reinitialized !\n");
1322 		return OPAL_WRONG_STATE;
1323 	}
1324 	job = __cpu_queue_job(cpu, "start_thread",
1325 			      opal_start_thread_job, (void *)start_address,
1326 			      true);
1327 	unlock(&reinit_lock);
1328 	if (!job) {
1329 		prerror("OPAL: Failed to create CPU start job !\n");
1330 		return OPAL_INTERNAL_ERROR;
1331 	}
1332 	return OPAL_SUCCESS;
1333 }
1334 opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);
1335 
opal_query_cpu_status(uint64_t server_no,uint8_t * thread_status)1336 static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
1337 {
1338 	struct cpu_thread *cpu;
1339 
1340 	if (!opal_addr_valid(thread_status))
1341 		return OPAL_PARAMETER;
1342 
1343 	cpu = find_cpu_by_server(server_no);
1344 	if (!cpu) {
1345 		prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
1346 		return OPAL_PARAMETER;
1347 	}
1348 	if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
1349 		prerror("OPAL: CPU not active in OPAL nor OS !\n");
1350 		return OPAL_PARAMETER;
1351 	}
1352 	switch(cpu->state) {
1353 	case cpu_state_os:
1354 		*thread_status = OPAL_THREAD_STARTED;
1355 		break;
1356 	case cpu_state_active:
1357 		/* Active in skiboot -> inactive in OS */
1358 		*thread_status = OPAL_THREAD_INACTIVE;
1359 		break;
1360 	default:
1361 		*thread_status = OPAL_THREAD_UNAVAILABLE;
1362 	}
1363 
1364 	return OPAL_SUCCESS;
1365 }
1366 opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);
1367 
opal_return_cpu(void)1368 static int64_t opal_return_cpu(void)
1369 {
1370 	prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
1371 
1372 	this_cpu()->in_opal_call--;
1373 	if (this_cpu()->in_opal_call != 0) {
1374 		printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call);
1375 	}
1376 
1377 	__secondary_cpu_entry();
1378 
1379 	return OPAL_HARDWARE; /* Should not happen */
1380 }
1381 opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);
1382 
1383 struct hid0_change_req {
1384 	uint64_t clr_bits;
1385 	uint64_t set_bits;
1386 };
1387 
cpu_change_hid0(void * __req)1388 static void cpu_change_hid0(void *__req)
1389 {
1390 	struct hid0_change_req *req = __req;
1391 	unsigned long hid0, new_hid0;
1392 
1393 	hid0 = new_hid0 = mfspr(SPR_HID0);
1394 	new_hid0 &= ~req->clr_bits;
1395 	new_hid0 |= req->set_bits;
1396 	prlog(PR_DEBUG, "CPU: [%08x] HID0 change 0x%016lx -> 0x%016lx\n",
1397 		this_cpu()->pir, hid0, new_hid0);
1398 	set_hid0(new_hid0);
1399 }
1400 
cpu_change_all_hid0(struct hid0_change_req * req)1401 static int64_t cpu_change_all_hid0(struct hid0_change_req *req)
1402 {
1403 	struct cpu_thread *cpu;
1404 	struct cpu_job **jobs;
1405 
1406 	jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
1407 	assert(jobs);
1408 
1409 	for_each_available_cpu(cpu) {
1410 		if (!cpu_is_thread0(cpu))
1411 			continue;
1412 		if (cpu == this_cpu())
1413 			continue;
1414 		jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_change_hid0",
1415 						cpu_change_hid0, req);
1416 	}
1417 
1418 	/* this cpu */
1419 	cpu_change_hid0(req);
1420 
1421 	for_each_available_cpu(cpu) {
1422 		if (jobs[cpu->pir])
1423 			cpu_wait_job(jobs[cpu->pir], true);
1424 	}
1425 
1426 	free(jobs);
1427 
1428 	return OPAL_SUCCESS;
1429 }
1430 
cpu_cleanup_one(void * param __unused)1431 static void cpu_cleanup_one(void *param __unused)
1432 {
1433 	mtspr(SPR_AMR, 0);
1434 	mtspr(SPR_IAMR, 0);
1435 	mtspr(SPR_PCR, 0);
1436 }
1437 
cpu_cleanup_all(void)1438 static int64_t cpu_cleanup_all(void)
1439 {
1440 	struct cpu_thread *cpu;
1441 	struct cpu_job **jobs;
1442 
1443 	jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1));
1444 	assert(jobs);
1445 
1446 	for_each_available_cpu(cpu) {
1447 		if (cpu == this_cpu())
1448 			continue;
1449 		jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup",
1450 						cpu_cleanup_one, NULL);
1451 	}
1452 
1453 	/* this cpu */
1454 	cpu_cleanup_one(NULL);
1455 
1456 	for_each_available_cpu(cpu) {
1457 		if (jobs[cpu->pir])
1458 			cpu_wait_job(jobs[cpu->pir], true);
1459 	}
1460 
1461 	free(jobs);
1462 
1463 
1464 	return OPAL_SUCCESS;
1465 }
1466 
cpu_fast_reboot_complete(void)1467 void cpu_fast_reboot_complete(void)
1468 {
1469 	/* Fast reboot will have cleared HID0:HILE */
1470 	current_hile_mode = false;
1471 
1472 	/* and set HID0:RADIX */
1473 	current_radix_mode = true;
1474 }
1475 
opal_reinit_cpus(uint64_t flags)1476 static int64_t opal_reinit_cpus(uint64_t flags)
1477 {
1478 	struct hid0_change_req req = { 0, 0 };
1479 	struct cpu_thread *cpu;
1480 	int64_t rc = OPAL_SUCCESS;
1481 	int i;
1482 
1483 	prlog(PR_DEBUG, "OPAL: CPU re-init with flags: 0x%llx\n", flags);
1484 
1485 	if (flags & OPAL_REINIT_CPUS_HILE_LE)
1486 		prlog(PR_INFO, "OPAL: Switch to little-endian OS\n");
1487 	else if (flags & OPAL_REINIT_CPUS_HILE_BE)
1488 		prlog(PR_INFO, "OPAL: Switch to big-endian OS\n");
1489 
1490  again:
1491 	lock(&reinit_lock);
1492 
1493 	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
1494 		if (cpu == this_cpu() || cpu->in_reinit)
1495 			continue;
1496 		if (cpu->state == cpu_state_os) {
1497 			unlock(&reinit_lock);
1498 			/*
1499 			 * That might be a race with return CPU during kexec
1500 			 * where we are still, wait a bit and try again
1501 			 */
1502 			for (i = 0; (i < 1000) &&
1503 				     (cpu->state == cpu_state_os); i++) {
1504 				time_wait_ms(1);
1505 			}
1506 			if (cpu->state == cpu_state_os) {
1507 				prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
1508 				return OPAL_WRONG_STATE;
1509 			}
1510 			goto again;
1511 		}
1512 		cpu->in_reinit = true;
1513 	}
1514 	/*
1515 	 * Now we need to mark ourselves "active" or we'll be skipped
1516 	 * by the various "for_each_active_..." calls done by slw_reinit()
1517 	 */
1518 	this_cpu()->state = cpu_state_active;
1519 	this_cpu()->in_reinit = true;
1520 	unlock(&reinit_lock);
1521 
1522 	/*
1523 	 * This cleans up a few things left over by Linux
1524 	 * that can cause problems in cases such as radix->hash
1525 	 * transitions. Ideally Linux should do it but doing it
1526 	 * here works around existing broken kernels.
1527 	 */
1528 	cpu_cleanup_all();
1529 
1530 	/* If HILE change via HID0 is supported ... */
1531 	if (hile_supported &&
1532 	    (flags & (OPAL_REINIT_CPUS_HILE_BE |
1533 		      OPAL_REINIT_CPUS_HILE_LE))) {
1534 		bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);
1535 
1536 		flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
1537 		if (hile != current_hile_mode) {
1538 			if (hile)
1539 				req.set_bits |= hid0_hile;
1540 			else
1541 				req.clr_bits |= hid0_hile;
1542 			current_hile_mode = hile;
1543 		}
1544 	}
1545 
1546 	/* If MMU mode change is supported */
1547 	if (radix_supported &&
1548 	    (flags & (OPAL_REINIT_CPUS_MMU_HASH |
1549 		      OPAL_REINIT_CPUS_MMU_RADIX))) {
1550 		bool radix = !!(flags & OPAL_REINIT_CPUS_MMU_RADIX);
1551 
1552 		flags &= ~(OPAL_REINIT_CPUS_MMU_HASH |
1553 			   OPAL_REINIT_CPUS_MMU_RADIX);
1554 		if (radix != current_radix_mode) {
1555 			if (radix)
1556 				req.set_bits |= SPR_HID0_POWER9_RADIX;
1557 			else
1558 				req.clr_bits |= SPR_HID0_POWER9_RADIX;
1559 
1560 			current_radix_mode = radix;
1561 		}
1562 	}
1563 
1564 	/* Cleanup the TLB. We do that unconditionally, this works
1565 	 * around issues where OSes fail to invalidate the PWC in Radix
1566 	 * mode for example. This only works on P9 and later, but we
1567 	 * also know we don't have a problem with Linux cleanups on
1568 	 * P8 so this isn't a problem. If we wanted to cleanup the
1569 	 * TLB on P8 as well, we'd have to use jobs to do it locally
1570 	 * on each CPU.
1571 	 */
1572 	 cleanup_global_tlb();
1573 
1574 	 /* Apply HID bits changes if any */
1575 	if (req.set_bits || req.clr_bits)
1576 		cpu_change_all_hid0(&req);
1577 
1578 	if (flags & OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) {
1579 		flags &= ~OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED;
1580 
1581 		if (tm_suspend_enabled)
1582 			rc = OPAL_UNSUPPORTED;
1583 		else
1584 			rc = OPAL_SUCCESS;
1585 	}
1586 
1587 	/* Handle P8 DD1 SLW reinit */
1588 	if (flags != 0 && proc_gen == proc_gen_p8 && !hile_supported)
1589 		rc = slw_reinit(flags);
1590 	else if (flags != 0)
1591 		rc = OPAL_UNSUPPORTED;
1592 
1593 	/* And undo the above */
1594 	lock(&reinit_lock);
1595 	this_cpu()->state = cpu_state_os;
1596 	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
1597 		cpu->in_reinit = false;
1598 	unlock(&reinit_lock);
1599 
1600 	return rc;
1601 }
1602 opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
1603 
1604 #define NMMU_XLAT_CTL_PTCR 0xb
nmmu_set_ptcr(uint64_t chip_id,struct dt_node * node,uint64_t ptcr)1605 static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr)
1606 {
1607 	uint32_t nmmu_base_addr;
1608 
1609 	nmmu_base_addr = dt_get_address(node, 0, NULL);
1610 	return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr);
1611 }
1612 
1613 /*
1614  * Setup the the Nest MMU PTCR register for all chips in the system or
1615  * the specified chip id.
1616  *
1617  * The PTCR value may be overwritten so long as all users have been
1618  * quiesced. If it is set to an invalid memory address the system will
1619  * checkstop if anything attempts to use it.
1620  *
1621  * Returns OPAL_UNSUPPORTED if no nest mmu was found.
1622  */
opal_nmmu_set_ptcr(uint64_t chip_id,uint64_t ptcr)1623 static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr)
1624 {
1625 	struct dt_node *node;
1626 	int64_t rc = OPAL_UNSUPPORTED;
1627 
1628 	if (chip_id == -1ULL)
1629 		dt_for_each_compatible(dt_root, node, "ibm,power9-nest-mmu") {
1630 			chip_id = dt_get_chip_id(node);
1631 			if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
1632 				return rc;
1633 		}
1634 	else
1635 		dt_for_each_compatible_on_chip(dt_root, node, "ibm,power9-nest-mmu", chip_id)
1636 			if ((rc = nmmu_set_ptcr(chip_id, node, ptcr)))
1637 				return rc;
1638 
1639 	return rc;
1640 }
1641 opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2);
1642