1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TLB flush routines for radix kernels.
4 *
5 * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
6 */
7
8 #include <linux/mm.h>
9 #include <linux/hugetlb.h>
10 #include <linux/memblock.h>
11 #include <linux/mmu_context.h>
12 #include <linux/sched/mm.h>
13
14 #include <asm/ppc-opcode.h>
15 #include <asm/tlb.h>
16 #include <asm/tlbflush.h>
17 #include <asm/trace.h>
18 #include <asm/cputhreads.h>
19 #include <asm/plpar_wrappers.h>
20
21 #include "internal.h"
22
23 #define RIC_FLUSH_TLB 0
24 #define RIC_FLUSH_PWC 1
25 #define RIC_FLUSH_ALL 2
26
27 /*
28 * tlbiel instruction for radix, set invalidation
29 * i.e., r=1 and is=01 or is=10 or is=11
30 */
tlbiel_radix_set_isa300(unsigned int set,unsigned int is,unsigned int pid,unsigned int ric,unsigned int prs)31 static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
32 unsigned int pid,
33 unsigned int ric, unsigned int prs)
34 {
35 unsigned long rb;
36 unsigned long rs;
37
38 rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
39 rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
40
41 asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
42 : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
43 : "memory");
44 }
45
tlbiel_all_isa300(unsigned int num_sets,unsigned int is)46 static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
47 {
48 unsigned int set;
49
50 asm volatile("ptesync": : :"memory");
51
52 /*
53 * Flush the first set of the TLB, and the entire Page Walk Cache
54 * and partition table entries. Then flush the remaining sets of the
55 * TLB.
56 */
57
58 if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
59 /* MSR[HV] should flush partition scope translations first. */
60 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
61
62 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
63 for (set = 1; set < num_sets; set++)
64 tlbiel_radix_set_isa300(set, is, 0,
65 RIC_FLUSH_TLB, 0);
66 }
67 }
68
69 /* Flush process scoped entries. */
70 tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
71
72 if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
73 for (set = 1; set < num_sets; set++)
74 tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
75 }
76
77 ppc_after_tlbiel_barrier();
78 }
79
radix__tlbiel_all(unsigned int action)80 void radix__tlbiel_all(unsigned int action)
81 {
82 unsigned int is;
83
84 switch (action) {
85 case TLB_INVAL_SCOPE_GLOBAL:
86 is = 3;
87 break;
88 case TLB_INVAL_SCOPE_LPID:
89 is = 2;
90 break;
91 default:
92 BUG();
93 }
94
95 if (early_cpu_has_feature(CPU_FTR_ARCH_300))
96 tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
97 else
98 WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
99
100 asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
101 }
102
__tlbiel_pid(unsigned long pid,int set,unsigned long ric)103 static __always_inline void __tlbiel_pid(unsigned long pid, int set,
104 unsigned long ric)
105 {
106 unsigned long rb,rs,prs,r;
107
108 rb = PPC_BIT(53); /* IS = 1 */
109 rb |= set << PPC_BITLSHIFT(51);
110 rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
111 prs = 1; /* process scoped */
112 r = 1; /* radix format */
113
114 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
115 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
116 trace_tlbie(0, 1, rb, rs, ric, prs, r);
117 }
118
__tlbie_pid(unsigned long pid,unsigned long ric)119 static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
120 {
121 unsigned long rb,rs,prs,r;
122
123 rb = PPC_BIT(53); /* IS = 1 */
124 rs = pid << PPC_BITLSHIFT(31);
125 prs = 1; /* process scoped */
126 r = 1; /* radix format */
127
128 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
129 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
130 trace_tlbie(0, 0, rb, rs, ric, prs, r);
131 }
132
__tlbie_lpid(unsigned long lpid,unsigned long ric)133 static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
134 {
135 unsigned long rb,rs,prs,r;
136
137 rb = PPC_BIT(52); /* IS = 2 */
138 rs = lpid;
139 prs = 0; /* partition scoped */
140 r = 1; /* radix format */
141
142 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
143 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
144 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
145 }
146
__tlbie_lpid_guest(unsigned long lpid,unsigned long ric)147 static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
148 {
149 unsigned long rb,rs,prs,r;
150
151 rb = PPC_BIT(52); /* IS = 2 */
152 rs = lpid;
153 prs = 1; /* process scoped */
154 r = 1; /* radix format */
155
156 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
157 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
158 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
159 }
160
__tlbiel_va(unsigned long va,unsigned long pid,unsigned long ap,unsigned long ric)161 static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
162 unsigned long ap, unsigned long ric)
163 {
164 unsigned long rb,rs,prs,r;
165
166 rb = va & ~(PPC_BITMASK(52, 63));
167 rb |= ap << PPC_BITLSHIFT(58);
168 rs = pid << PPC_BITLSHIFT(31);
169 prs = 1; /* process scoped */
170 r = 1; /* radix format */
171
172 asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
173 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
174 trace_tlbie(0, 1, rb, rs, ric, prs, r);
175 }
176
__tlbie_va(unsigned long va,unsigned long pid,unsigned long ap,unsigned long ric)177 static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
178 unsigned long ap, unsigned long ric)
179 {
180 unsigned long rb,rs,prs,r;
181
182 rb = va & ~(PPC_BITMASK(52, 63));
183 rb |= ap << PPC_BITLSHIFT(58);
184 rs = pid << PPC_BITLSHIFT(31);
185 prs = 1; /* process scoped */
186 r = 1; /* radix format */
187
188 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
189 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
190 trace_tlbie(0, 0, rb, rs, ric, prs, r);
191 }
192
__tlbie_lpid_va(unsigned long va,unsigned long lpid,unsigned long ap,unsigned long ric)193 static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
194 unsigned long ap, unsigned long ric)
195 {
196 unsigned long rb,rs,prs,r;
197
198 rb = va & ~(PPC_BITMASK(52, 63));
199 rb |= ap << PPC_BITLSHIFT(58);
200 rs = lpid;
201 prs = 0; /* partition scoped */
202 r = 1; /* radix format */
203
204 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
205 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
206 trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
207 }
208
209
fixup_tlbie_va(unsigned long va,unsigned long pid,unsigned long ap)210 static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
211 unsigned long ap)
212 {
213 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
214 asm volatile("ptesync": : :"memory");
215 __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
216 }
217
218 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
219 asm volatile("ptesync": : :"memory");
220 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
221 }
222 }
223
fixup_tlbie_va_range(unsigned long va,unsigned long pid,unsigned long ap)224 static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
225 unsigned long ap)
226 {
227 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
228 asm volatile("ptesync": : :"memory");
229 __tlbie_pid(0, RIC_FLUSH_TLB);
230 }
231
232 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
233 asm volatile("ptesync": : :"memory");
234 __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
235 }
236 }
237
fixup_tlbie_pid(unsigned long pid)238 static inline void fixup_tlbie_pid(unsigned long pid)
239 {
240 /*
241 * We can use any address for the invalidation, pick one which is
242 * probably unused as an optimisation.
243 */
244 unsigned long va = ((1UL << 52) - 1);
245
246 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
247 asm volatile("ptesync": : :"memory");
248 __tlbie_pid(0, RIC_FLUSH_TLB);
249 }
250
251 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
252 asm volatile("ptesync": : :"memory");
253 __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
254 }
255 }
256
257
fixup_tlbie_lpid_va(unsigned long va,unsigned long lpid,unsigned long ap)258 static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
259 unsigned long ap)
260 {
261 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
262 asm volatile("ptesync": : :"memory");
263 __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
264 }
265
266 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
267 asm volatile("ptesync": : :"memory");
268 __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
269 }
270 }
271
fixup_tlbie_lpid(unsigned long lpid)272 static inline void fixup_tlbie_lpid(unsigned long lpid)
273 {
274 /*
275 * We can use any address for the invalidation, pick one which is
276 * probably unused as an optimisation.
277 */
278 unsigned long va = ((1UL << 52) - 1);
279
280 if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
281 asm volatile("ptesync": : :"memory");
282 __tlbie_lpid(0, RIC_FLUSH_TLB);
283 }
284
285 if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
286 asm volatile("ptesync": : :"memory");
287 __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
288 }
289 }
290
291 /*
292 * We use 128 set in radix mode and 256 set in hpt mode.
293 */
_tlbiel_pid(unsigned long pid,unsigned long ric)294 static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
295 {
296 int set;
297
298 asm volatile("ptesync": : :"memory");
299
300 /*
301 * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL,
302 * also flush the entire Page Walk Cache.
303 */
304 __tlbiel_pid(pid, 0, ric);
305
306 /* For PWC, only one flush is needed */
307 if (ric == RIC_FLUSH_PWC) {
308 ppc_after_tlbiel_barrier();
309 return;
310 }
311
312 if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
313 /* For the remaining sets, just flush the TLB */
314 for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
315 __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
316 }
317
318 ppc_after_tlbiel_barrier();
319 asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
320 }
321
_tlbie_pid(unsigned long pid,unsigned long ric)322 static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
323 {
324 asm volatile("ptesync": : :"memory");
325
326 /*
327 * Workaround the fact that the "ric" argument to __tlbie_pid
328 * must be a compile-time contraint to match the "i" constraint
329 * in the asm statement.
330 */
331 switch (ric) {
332 case RIC_FLUSH_TLB:
333 __tlbie_pid(pid, RIC_FLUSH_TLB);
334 fixup_tlbie_pid(pid);
335 break;
336 case RIC_FLUSH_PWC:
337 __tlbie_pid(pid, RIC_FLUSH_PWC);
338 break;
339 case RIC_FLUSH_ALL:
340 default:
341 __tlbie_pid(pid, RIC_FLUSH_ALL);
342 fixup_tlbie_pid(pid);
343 }
344 asm volatile("eieio; tlbsync; ptesync": : :"memory");
345 }
346
347 struct tlbiel_pid {
348 unsigned long pid;
349 unsigned long ric;
350 };
351
do_tlbiel_pid(void * info)352 static void do_tlbiel_pid(void *info)
353 {
354 struct tlbiel_pid *t = info;
355
356 if (t->ric == RIC_FLUSH_TLB)
357 _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
358 else if (t->ric == RIC_FLUSH_PWC)
359 _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
360 else
361 _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
362 }
363
_tlbiel_pid_multicast(struct mm_struct * mm,unsigned long pid,unsigned long ric)364 static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
365 unsigned long pid, unsigned long ric)
366 {
367 struct cpumask *cpus = mm_cpumask(mm);
368 struct tlbiel_pid t = { .pid = pid, .ric = ric };
369
370 on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
371 /*
372 * Always want the CPU translations to be invalidated with tlbiel in
373 * these paths, so while coprocessors must use tlbie, we can not
374 * optimise away the tlbiel component.
375 */
376 if (atomic_read(&mm->context.copros) > 0)
377 _tlbie_pid(pid, RIC_FLUSH_ALL);
378 }
379
_tlbie_lpid(unsigned long lpid,unsigned long ric)380 static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
381 {
382 asm volatile("ptesync": : :"memory");
383
384 /*
385 * Workaround the fact that the "ric" argument to __tlbie_pid
386 * must be a compile-time contraint to match the "i" constraint
387 * in the asm statement.
388 */
389 switch (ric) {
390 case RIC_FLUSH_TLB:
391 __tlbie_lpid(lpid, RIC_FLUSH_TLB);
392 fixup_tlbie_lpid(lpid);
393 break;
394 case RIC_FLUSH_PWC:
395 __tlbie_lpid(lpid, RIC_FLUSH_PWC);
396 break;
397 case RIC_FLUSH_ALL:
398 default:
399 __tlbie_lpid(lpid, RIC_FLUSH_ALL);
400 fixup_tlbie_lpid(lpid);
401 }
402 asm volatile("eieio; tlbsync; ptesync": : :"memory");
403 }
404
_tlbie_lpid_guest(unsigned long lpid,unsigned long ric)405 static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
406 {
407 /*
408 * Workaround the fact that the "ric" argument to __tlbie_pid
409 * must be a compile-time contraint to match the "i" constraint
410 * in the asm statement.
411 */
412 switch (ric) {
413 case RIC_FLUSH_TLB:
414 __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
415 break;
416 case RIC_FLUSH_PWC:
417 __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
418 break;
419 case RIC_FLUSH_ALL:
420 default:
421 __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
422 }
423 fixup_tlbie_lpid(lpid);
424 asm volatile("eieio; tlbsync; ptesync": : :"memory");
425 }
426
__tlbiel_va_range(unsigned long start,unsigned long end,unsigned long pid,unsigned long page_size,unsigned long psize)427 static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
428 unsigned long pid, unsigned long page_size,
429 unsigned long psize)
430 {
431 unsigned long addr;
432 unsigned long ap = mmu_get_ap(psize);
433
434 for (addr = start; addr < end; addr += page_size)
435 __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
436 }
437
_tlbiel_va(unsigned long va,unsigned long pid,unsigned long psize,unsigned long ric)438 static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
439 unsigned long psize, unsigned long ric)
440 {
441 unsigned long ap = mmu_get_ap(psize);
442
443 asm volatile("ptesync": : :"memory");
444 __tlbiel_va(va, pid, ap, ric);
445 ppc_after_tlbiel_barrier();
446 }
447
_tlbiel_va_range(unsigned long start,unsigned long end,unsigned long pid,unsigned long page_size,unsigned long psize,bool also_pwc)448 static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
449 unsigned long pid, unsigned long page_size,
450 unsigned long psize, bool also_pwc)
451 {
452 asm volatile("ptesync": : :"memory");
453 if (also_pwc)
454 __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
455 __tlbiel_va_range(start, end, pid, page_size, psize);
456 ppc_after_tlbiel_barrier();
457 }
458
__tlbie_va_range(unsigned long start,unsigned long end,unsigned long pid,unsigned long page_size,unsigned long psize)459 static inline void __tlbie_va_range(unsigned long start, unsigned long end,
460 unsigned long pid, unsigned long page_size,
461 unsigned long psize)
462 {
463 unsigned long addr;
464 unsigned long ap = mmu_get_ap(psize);
465
466 for (addr = start; addr < end; addr += page_size)
467 __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
468
469 fixup_tlbie_va_range(addr - page_size, pid, ap);
470 }
471
_tlbie_va(unsigned long va,unsigned long pid,unsigned long psize,unsigned long ric)472 static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
473 unsigned long psize, unsigned long ric)
474 {
475 unsigned long ap = mmu_get_ap(psize);
476
477 asm volatile("ptesync": : :"memory");
478 __tlbie_va(va, pid, ap, ric);
479 fixup_tlbie_va(va, pid, ap);
480 asm volatile("eieio; tlbsync; ptesync": : :"memory");
481 }
482
483 struct tlbiel_va {
484 unsigned long pid;
485 unsigned long va;
486 unsigned long psize;
487 unsigned long ric;
488 };
489
do_tlbiel_va(void * info)490 static void do_tlbiel_va(void *info)
491 {
492 struct tlbiel_va *t = info;
493
494 if (t->ric == RIC_FLUSH_TLB)
495 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
496 else if (t->ric == RIC_FLUSH_PWC)
497 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
498 else
499 _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
500 }
501
_tlbiel_va_multicast(struct mm_struct * mm,unsigned long va,unsigned long pid,unsigned long psize,unsigned long ric)502 static inline void _tlbiel_va_multicast(struct mm_struct *mm,
503 unsigned long va, unsigned long pid,
504 unsigned long psize, unsigned long ric)
505 {
506 struct cpumask *cpus = mm_cpumask(mm);
507 struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
508 on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
509 if (atomic_read(&mm->context.copros) > 0)
510 _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
511 }
512
513 struct tlbiel_va_range {
514 unsigned long pid;
515 unsigned long start;
516 unsigned long end;
517 unsigned long page_size;
518 unsigned long psize;
519 bool also_pwc;
520 };
521
do_tlbiel_va_range(void * info)522 static void do_tlbiel_va_range(void *info)
523 {
524 struct tlbiel_va_range *t = info;
525
526 _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
527 t->psize, t->also_pwc);
528 }
529
_tlbie_lpid_va(unsigned long va,unsigned long lpid,unsigned long psize,unsigned long ric)530 static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
531 unsigned long psize, unsigned long ric)
532 {
533 unsigned long ap = mmu_get_ap(psize);
534
535 asm volatile("ptesync": : :"memory");
536 __tlbie_lpid_va(va, lpid, ap, ric);
537 fixup_tlbie_lpid_va(va, lpid, ap);
538 asm volatile("eieio; tlbsync; ptesync": : :"memory");
539 }
540
_tlbie_va_range(unsigned long start,unsigned long end,unsigned long pid,unsigned long page_size,unsigned long psize,bool also_pwc)541 static inline void _tlbie_va_range(unsigned long start, unsigned long end,
542 unsigned long pid, unsigned long page_size,
543 unsigned long psize, bool also_pwc)
544 {
545 asm volatile("ptesync": : :"memory");
546 if (also_pwc)
547 __tlbie_pid(pid, RIC_FLUSH_PWC);
548 __tlbie_va_range(start, end, pid, page_size, psize);
549 asm volatile("eieio; tlbsync; ptesync": : :"memory");
550 }
551
_tlbiel_va_range_multicast(struct mm_struct * mm,unsigned long start,unsigned long end,unsigned long pid,unsigned long page_size,unsigned long psize,bool also_pwc)552 static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
553 unsigned long start, unsigned long end,
554 unsigned long pid, unsigned long page_size,
555 unsigned long psize, bool also_pwc)
556 {
557 struct cpumask *cpus = mm_cpumask(mm);
558 struct tlbiel_va_range t = { .start = start, .end = end,
559 .pid = pid, .page_size = page_size,
560 .psize = psize, .also_pwc = also_pwc };
561
562 on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
563 if (atomic_read(&mm->context.copros) > 0)
564 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
565 }
566
567 /*
568 * Base TLB flushing operations:
569 *
570 * - flush_tlb_mm(mm) flushes the specified mm context TLB's
571 * - flush_tlb_page(vma, vmaddr) flushes one page
572 * - flush_tlb_range(vma, start, end) flushes a range of pages
573 * - flush_tlb_kernel_range(start, end) flushes kernel pages
574 *
575 * - local_* variants of page and mm only apply to the current
576 * processor
577 */
radix__local_flush_tlb_mm(struct mm_struct * mm)578 void radix__local_flush_tlb_mm(struct mm_struct *mm)
579 {
580 unsigned long pid;
581
582 preempt_disable();
583 pid = mm->context.id;
584 if (pid != MMU_NO_CONTEXT)
585 _tlbiel_pid(pid, RIC_FLUSH_TLB);
586 preempt_enable();
587 }
588 EXPORT_SYMBOL(radix__local_flush_tlb_mm);
589
590 #ifndef CONFIG_SMP
radix__local_flush_all_mm(struct mm_struct * mm)591 void radix__local_flush_all_mm(struct mm_struct *mm)
592 {
593 unsigned long pid;
594
595 preempt_disable();
596 pid = mm->context.id;
597 if (pid != MMU_NO_CONTEXT)
598 _tlbiel_pid(pid, RIC_FLUSH_ALL);
599 preempt_enable();
600 }
601 EXPORT_SYMBOL(radix__local_flush_all_mm);
602
__flush_all_mm(struct mm_struct * mm,bool fullmm)603 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
604 {
605 radix__local_flush_all_mm(mm);
606 }
607 #endif /* CONFIG_SMP */
608
radix__local_flush_tlb_page_psize(struct mm_struct * mm,unsigned long vmaddr,int psize)609 void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
610 int psize)
611 {
612 unsigned long pid;
613
614 preempt_disable();
615 pid = mm->context.id;
616 if (pid != MMU_NO_CONTEXT)
617 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
618 preempt_enable();
619 }
620
radix__local_flush_tlb_page(struct vm_area_struct * vma,unsigned long vmaddr)621 void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
622 {
623 #ifdef CONFIG_HUGETLB_PAGE
624 /* need the return fix for nohash.c */
625 if (is_vm_hugetlb_page(vma))
626 return radix__local_flush_hugetlb_page(vma, vmaddr);
627 #endif
628 radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
629 }
630 EXPORT_SYMBOL(radix__local_flush_tlb_page);
631
mm_needs_flush_escalation(struct mm_struct * mm)632 static bool mm_needs_flush_escalation(struct mm_struct *mm)
633 {
634 /*
635 * P9 nest MMU has issues with the page walk cache
636 * caching PTEs and not flushing them properly when
637 * RIC = 0 for a PID/LPID invalidate
638 */
639 if (atomic_read(&mm->context.copros) > 0)
640 return true;
641 return false;
642 }
643
644 /*
645 * If always_flush is true, then flush even if this CPU can't be removed
646 * from mm_cpumask.
647 */
exit_lazy_flush_tlb(struct mm_struct * mm,bool always_flush)648 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
649 {
650 unsigned long pid = mm->context.id;
651 int cpu = smp_processor_id();
652
653 /*
654 * A kthread could have done a mmget_not_zero() after the flushing CPU
655 * checked mm_cpumask, and be in the process of kthread_use_mm when
656 * interrupted here. In that case, current->mm will be set to mm,
657 * because kthread_use_mm() setting ->mm and switching to the mm is
658 * done with interrupts off.
659 */
660 if (current->mm == mm)
661 goto out;
662
663 if (current->active_mm == mm) {
664 WARN_ON_ONCE(current->mm != NULL);
665 /* Is a kernel thread and is using mm as the lazy tlb */
666 mmgrab(&init_mm);
667 current->active_mm = &init_mm;
668 switch_mm_irqs_off(mm, &init_mm, current);
669 mmdrop(mm);
670 }
671
672 /*
673 * This IPI may be initiated from any source including those not
674 * running the mm, so there may be a racing IPI that comes after
675 * this one which finds the cpumask already clear. Check and avoid
676 * underflowing the active_cpus count in that case. The race should
677 * not otherwise be a problem, but the TLB must be flushed because
678 * that's what the caller expects.
679 */
680 if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
681 atomic_dec(&mm->context.active_cpus);
682 cpumask_clear_cpu(cpu, mm_cpumask(mm));
683 always_flush = true;
684 }
685
686 out:
687 if (always_flush)
688 _tlbiel_pid(pid, RIC_FLUSH_ALL);
689 }
690
691 #ifdef CONFIG_SMP
do_exit_flush_lazy_tlb(void * arg)692 static void do_exit_flush_lazy_tlb(void *arg)
693 {
694 struct mm_struct *mm = arg;
695 exit_lazy_flush_tlb(mm, true);
696 }
697
exit_flush_lazy_tlbs(struct mm_struct * mm)698 static void exit_flush_lazy_tlbs(struct mm_struct *mm)
699 {
700 /*
701 * Would be nice if this was async so it could be run in
702 * parallel with our local flush, but generic code does not
703 * give a good API for it. Could extend the generic code or
704 * make a special powerpc IPI for flushing TLBs.
705 * For now it's not too performance critical.
706 */
707 smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
708 (void *)mm, 1);
709 }
710
711 #else /* CONFIG_SMP */
exit_flush_lazy_tlbs(struct mm_struct * mm)712 static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
713 #endif /* CONFIG_SMP */
714
715 static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
716
717 /*
718 * Interval between flushes at which we send out IPIs to check whether the
719 * mm_cpumask can be trimmed for the case where it's not a single-threaded
720 * process flushing its own mm. The intent is to reduce the cost of later
721 * flushes. Don't want this to be so low that it adds noticable cost to TLB
722 * flushing, or so high that it doesn't help reduce global TLBIEs.
723 */
724 static unsigned long tlb_mm_cpumask_trim_timer = 1073;
725
tick_and_test_trim_clock(void)726 static bool tick_and_test_trim_clock(void)
727 {
728 if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
729 tlb_mm_cpumask_trim_timer) {
730 __this_cpu_write(mm_cpumask_trim_clock, 0);
731 return true;
732 }
733 return false;
734 }
735
736 enum tlb_flush_type {
737 FLUSH_TYPE_NONE,
738 FLUSH_TYPE_LOCAL,
739 FLUSH_TYPE_GLOBAL,
740 };
741
flush_type_needed(struct mm_struct * mm,bool fullmm)742 static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
743 {
744 int active_cpus = atomic_read(&mm->context.active_cpus);
745 int cpu = smp_processor_id();
746
747 if (active_cpus == 0)
748 return FLUSH_TYPE_NONE;
749 if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
750 if (current->mm != mm) {
751 /*
752 * Asynchronous flush sources may trim down to nothing
753 * if the process is not running, so occasionally try
754 * to trim.
755 */
756 if (tick_and_test_trim_clock()) {
757 exit_lazy_flush_tlb(mm, true);
758 return FLUSH_TYPE_NONE;
759 }
760 }
761 return FLUSH_TYPE_LOCAL;
762 }
763
764 /* Coprocessors require TLBIE to invalidate nMMU. */
765 if (atomic_read(&mm->context.copros) > 0)
766 return FLUSH_TYPE_GLOBAL;
767
768 /*
769 * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
770 * because the mm is being taken down anyway, and a TLBIE tends to
771 * be faster than an IPI+TLBIEL.
772 */
773 if (fullmm)
774 return FLUSH_TYPE_GLOBAL;
775
776 /*
777 * If we are running the only thread of a single-threaded process,
778 * then we should almost always be able to trim off the rest of the
779 * CPU mask (except in the case of use_mm() races), so always try
780 * trimming the mask.
781 */
782 if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
783 exit_flush_lazy_tlbs(mm);
784 /*
785 * use_mm() race could prevent IPIs from being able to clear
786 * the cpumask here, however those users are established
787 * after our first check (and so after the PTEs are removed),
788 * and the TLB still gets flushed by the IPI, so this CPU
789 * will only require a local flush.
790 */
791 return FLUSH_TYPE_LOCAL;
792 }
793
794 /*
795 * Occasionally try to trim down the cpumask. It's possible this can
796 * bring the mask to zero, which results in no flush.
797 */
798 if (tick_and_test_trim_clock()) {
799 exit_flush_lazy_tlbs(mm);
800 if (current->mm == mm)
801 return FLUSH_TYPE_LOCAL;
802 if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
803 exit_lazy_flush_tlb(mm, true);
804 return FLUSH_TYPE_NONE;
805 }
806
807 return FLUSH_TYPE_GLOBAL;
808 }
809
810 #ifdef CONFIG_SMP
radix__flush_tlb_mm(struct mm_struct * mm)811 void radix__flush_tlb_mm(struct mm_struct *mm)
812 {
813 unsigned long pid;
814 enum tlb_flush_type type;
815
816 pid = mm->context.id;
817 if (unlikely(pid == MMU_NO_CONTEXT))
818 return;
819
820 preempt_disable();
821 /*
822 * Order loads of mm_cpumask (in flush_type_needed) vs previous
823 * stores to clear ptes before the invalidate. See barrier in
824 * switch_mm_irqs_off
825 */
826 smp_mb();
827 type = flush_type_needed(mm, false);
828 if (type == FLUSH_TYPE_LOCAL) {
829 _tlbiel_pid(pid, RIC_FLUSH_TLB);
830 } else if (type == FLUSH_TYPE_GLOBAL) {
831 if (!mmu_has_feature(MMU_FTR_GTSE)) {
832 unsigned long tgt = H_RPTI_TARGET_CMMU;
833
834 if (atomic_read(&mm->context.copros) > 0)
835 tgt |= H_RPTI_TARGET_NMMU;
836 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
837 H_RPTI_PAGE_ALL, 0, -1UL);
838 } else if (cputlb_use_tlbie()) {
839 if (mm_needs_flush_escalation(mm))
840 _tlbie_pid(pid, RIC_FLUSH_ALL);
841 else
842 _tlbie_pid(pid, RIC_FLUSH_TLB);
843 } else {
844 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
845 }
846 }
847 preempt_enable();
848 }
849 EXPORT_SYMBOL(radix__flush_tlb_mm);
850
__flush_all_mm(struct mm_struct * mm,bool fullmm)851 static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
852 {
853 unsigned long pid;
854 enum tlb_flush_type type;
855
856 pid = mm->context.id;
857 if (unlikely(pid == MMU_NO_CONTEXT))
858 return;
859
860 preempt_disable();
861 smp_mb(); /* see radix__flush_tlb_mm */
862 type = flush_type_needed(mm, fullmm);
863 if (type == FLUSH_TYPE_LOCAL) {
864 _tlbiel_pid(pid, RIC_FLUSH_ALL);
865 } else if (type == FLUSH_TYPE_GLOBAL) {
866 if (!mmu_has_feature(MMU_FTR_GTSE)) {
867 unsigned long tgt = H_RPTI_TARGET_CMMU;
868 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
869 H_RPTI_TYPE_PRT;
870
871 if (atomic_read(&mm->context.copros) > 0)
872 tgt |= H_RPTI_TARGET_NMMU;
873 pseries_rpt_invalidate(pid, tgt, type,
874 H_RPTI_PAGE_ALL, 0, -1UL);
875 } else if (cputlb_use_tlbie())
876 _tlbie_pid(pid, RIC_FLUSH_ALL);
877 else
878 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
879 }
880 preempt_enable();
881 }
882
radix__flush_all_mm(struct mm_struct * mm)883 void radix__flush_all_mm(struct mm_struct *mm)
884 {
885 __flush_all_mm(mm, false);
886 }
887 EXPORT_SYMBOL(radix__flush_all_mm);
888
radix__flush_tlb_page_psize(struct mm_struct * mm,unsigned long vmaddr,int psize)889 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
890 int psize)
891 {
892 unsigned long pid;
893 enum tlb_flush_type type;
894
895 pid = mm->context.id;
896 if (unlikely(pid == MMU_NO_CONTEXT))
897 return;
898
899 preempt_disable();
900 smp_mb(); /* see radix__flush_tlb_mm */
901 type = flush_type_needed(mm, false);
902 if (type == FLUSH_TYPE_LOCAL) {
903 _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
904 } else if (type == FLUSH_TYPE_GLOBAL) {
905 if (!mmu_has_feature(MMU_FTR_GTSE)) {
906 unsigned long tgt, pg_sizes, size;
907
908 tgt = H_RPTI_TARGET_CMMU;
909 pg_sizes = psize_to_rpti_pgsize(psize);
910 size = 1UL << mmu_psize_to_shift(psize);
911
912 if (atomic_read(&mm->context.copros) > 0)
913 tgt |= H_RPTI_TARGET_NMMU;
914 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
915 pg_sizes, vmaddr,
916 vmaddr + size);
917 } else if (cputlb_use_tlbie())
918 _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
919 else
920 _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
921 }
922 preempt_enable();
923 }
924
radix__flush_tlb_page(struct vm_area_struct * vma,unsigned long vmaddr)925 void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
926 {
927 #ifdef CONFIG_HUGETLB_PAGE
928 if (is_vm_hugetlb_page(vma))
929 return radix__flush_hugetlb_page(vma, vmaddr);
930 #endif
931 radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
932 }
933 EXPORT_SYMBOL(radix__flush_tlb_page);
934
935 #endif /* CONFIG_SMP */
936
do_tlbiel_kernel(void * info)937 static void do_tlbiel_kernel(void *info)
938 {
939 _tlbiel_pid(0, RIC_FLUSH_ALL);
940 }
941
_tlbiel_kernel_broadcast(void)942 static inline void _tlbiel_kernel_broadcast(void)
943 {
944 on_each_cpu(do_tlbiel_kernel, NULL, 1);
945 if (tlbie_capable) {
946 /*
947 * Coherent accelerators don't refcount kernel memory mappings,
948 * so have to always issue a tlbie for them. This is quite a
949 * slow path anyway.
950 */
951 _tlbie_pid(0, RIC_FLUSH_ALL);
952 }
953 }
954
955 /*
956 * If kernel TLBIs ever become local rather than global, then
957 * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
958 * assumes kernel TLBIs are global.
959 */
radix__flush_tlb_kernel_range(unsigned long start,unsigned long end)960 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
961 {
962 if (!mmu_has_feature(MMU_FTR_GTSE)) {
963 unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
964 unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
965 H_RPTI_TYPE_PRT;
966
967 pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
968 start, end);
969 } else if (cputlb_use_tlbie())
970 _tlbie_pid(0, RIC_FLUSH_ALL);
971 else
972 _tlbiel_kernel_broadcast();
973 }
974 EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
975
976 #define TLB_FLUSH_ALL -1UL
977
978 /*
979 * Number of pages above which we invalidate the entire PID rather than
980 * flush individual pages, for local and global flushes respectively.
981 *
982 * tlbie goes out to the interconnect and individual ops are more costly.
983 * It also does not iterate over sets like the local tlbiel variant when
984 * invalidating a full PID, so it has a far lower threshold to change from
985 * individual page flushes to full-pid flushes.
986 */
987 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
988 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
989
__radix__flush_tlb_range(struct mm_struct * mm,unsigned long start,unsigned long end)990 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
991 unsigned long start, unsigned long end)
992
993 {
994 unsigned long pid;
995 unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
996 unsigned long page_size = 1UL << page_shift;
997 unsigned long nr_pages = (end - start) >> page_shift;
998 bool fullmm = (end == TLB_FLUSH_ALL);
999 bool flush_pid;
1000 enum tlb_flush_type type;
1001
1002 pid = mm->context.id;
1003 if (unlikely(pid == MMU_NO_CONTEXT))
1004 return;
1005
1006 preempt_disable();
1007 smp_mb(); /* see radix__flush_tlb_mm */
1008 type = flush_type_needed(mm, fullmm);
1009 if (type == FLUSH_TYPE_NONE)
1010 goto out;
1011
1012 if (fullmm)
1013 flush_pid = true;
1014 else if (type == FLUSH_TYPE_GLOBAL)
1015 flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1016 else
1017 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1018
1019 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1020 unsigned long tgt = H_RPTI_TARGET_CMMU;
1021 unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1022
1023 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
1024 pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
1025 if (atomic_read(&mm->context.copros) > 0)
1026 tgt |= H_RPTI_TARGET_NMMU;
1027 pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
1028 start, end);
1029 } else if (flush_pid) {
1030 if (type == FLUSH_TYPE_LOCAL) {
1031 _tlbiel_pid(pid, RIC_FLUSH_TLB);
1032 } else {
1033 if (cputlb_use_tlbie()) {
1034 if (mm_needs_flush_escalation(mm))
1035 _tlbie_pid(pid, RIC_FLUSH_ALL);
1036 else
1037 _tlbie_pid(pid, RIC_FLUSH_TLB);
1038 } else {
1039 _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
1040 }
1041 }
1042 } else {
1043 bool hflush = false;
1044 unsigned long hstart, hend;
1045
1046 if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
1047 hstart = (start + PMD_SIZE - 1) & PMD_MASK;
1048 hend = end & PMD_MASK;
1049 if (hstart < hend)
1050 hflush = true;
1051 }
1052
1053 if (type == FLUSH_TYPE_LOCAL) {
1054 asm volatile("ptesync": : :"memory");
1055 __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
1056 if (hflush)
1057 __tlbiel_va_range(hstart, hend, pid,
1058 PMD_SIZE, MMU_PAGE_2M);
1059 ppc_after_tlbiel_barrier();
1060 } else if (cputlb_use_tlbie()) {
1061 asm volatile("ptesync": : :"memory");
1062 __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
1063 if (hflush)
1064 __tlbie_va_range(hstart, hend, pid,
1065 PMD_SIZE, MMU_PAGE_2M);
1066 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1067 } else {
1068 _tlbiel_va_range_multicast(mm,
1069 start, end, pid, page_size, mmu_virtual_psize, false);
1070 if (hflush)
1071 _tlbiel_va_range_multicast(mm,
1072 hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
1073 }
1074 }
1075 out:
1076 preempt_enable();
1077 }
1078
radix__flush_tlb_range(struct vm_area_struct * vma,unsigned long start,unsigned long end)1079 void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
1080 unsigned long end)
1081
1082 {
1083 #ifdef CONFIG_HUGETLB_PAGE
1084 if (is_vm_hugetlb_page(vma))
1085 return radix__flush_hugetlb_tlb_range(vma, start, end);
1086 #endif
1087
1088 __radix__flush_tlb_range(vma->vm_mm, start, end);
1089 }
1090 EXPORT_SYMBOL(radix__flush_tlb_range);
1091
radix_get_mmu_psize(int page_size)1092 static int radix_get_mmu_psize(int page_size)
1093 {
1094 int psize;
1095
1096 if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
1097 psize = mmu_virtual_psize;
1098 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
1099 psize = MMU_PAGE_2M;
1100 else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
1101 psize = MMU_PAGE_1G;
1102 else
1103 return -1;
1104 return psize;
1105 }
1106
1107 /*
1108 * Flush partition scoped LPID address translation for all CPUs.
1109 */
radix__flush_tlb_lpid_page(unsigned int lpid,unsigned long addr,unsigned long page_size)1110 void radix__flush_tlb_lpid_page(unsigned int lpid,
1111 unsigned long addr,
1112 unsigned long page_size)
1113 {
1114 int psize = radix_get_mmu_psize(page_size);
1115
1116 _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
1117 }
1118 EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
1119
1120 /*
1121 * Flush partition scoped PWC from LPID for all CPUs.
1122 */
radix__flush_pwc_lpid(unsigned int lpid)1123 void radix__flush_pwc_lpid(unsigned int lpid)
1124 {
1125 _tlbie_lpid(lpid, RIC_FLUSH_PWC);
1126 }
1127 EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
1128
1129 /*
1130 * Flush partition scoped translations from LPID (=LPIDR)
1131 */
radix__flush_all_lpid(unsigned int lpid)1132 void radix__flush_all_lpid(unsigned int lpid)
1133 {
1134 _tlbie_lpid(lpid, RIC_FLUSH_ALL);
1135 }
1136 EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
1137
1138 /*
1139 * Flush process scoped translations from LPID (=LPIDR)
1140 */
radix__flush_all_lpid_guest(unsigned int lpid)1141 void radix__flush_all_lpid_guest(unsigned int lpid)
1142 {
1143 _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
1144 }
1145
1146 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1147 unsigned long end, int psize);
1148
radix__tlb_flush(struct mmu_gather * tlb)1149 void radix__tlb_flush(struct mmu_gather *tlb)
1150 {
1151 int psize = 0;
1152 struct mm_struct *mm = tlb->mm;
1153 int page_size = tlb->page_size;
1154 unsigned long start = tlb->start;
1155 unsigned long end = tlb->end;
1156
1157 /*
1158 * if page size is not something we understand, do a full mm flush
1159 *
1160 * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
1161 * that flushes the process table entry cache upon process teardown.
1162 * See the comment for radix in arch_exit_mmap().
1163 */
1164 if (tlb->fullmm || tlb->need_flush_all) {
1165 __flush_all_mm(mm, true);
1166 } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
1167 if (!tlb->freed_tables)
1168 radix__flush_tlb_mm(mm);
1169 else
1170 radix__flush_all_mm(mm);
1171 } else {
1172 if (!tlb->freed_tables)
1173 radix__flush_tlb_range_psize(mm, start, end, psize);
1174 else
1175 radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
1176 }
1177 }
1178
__radix__flush_tlb_range_psize(struct mm_struct * mm,unsigned long start,unsigned long end,int psize,bool also_pwc)1179 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
1180 unsigned long start, unsigned long end,
1181 int psize, bool also_pwc)
1182 {
1183 unsigned long pid;
1184 unsigned int page_shift = mmu_psize_defs[psize].shift;
1185 unsigned long page_size = 1UL << page_shift;
1186 unsigned long nr_pages = (end - start) >> page_shift;
1187 bool fullmm = (end == TLB_FLUSH_ALL);
1188 bool flush_pid;
1189 enum tlb_flush_type type;
1190
1191 pid = mm->context.id;
1192 if (unlikely(pid == MMU_NO_CONTEXT))
1193 return;
1194
1195 fullmm = (end == TLB_FLUSH_ALL);
1196
1197 preempt_disable();
1198 smp_mb(); /* see radix__flush_tlb_mm */
1199 type = flush_type_needed(mm, fullmm);
1200 if (type == FLUSH_TYPE_NONE)
1201 goto out;
1202
1203 if (fullmm)
1204 flush_pid = true;
1205 else if (type == FLUSH_TYPE_GLOBAL)
1206 flush_pid = nr_pages > tlb_single_page_flush_ceiling;
1207 else
1208 flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
1209
1210 if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
1211 unsigned long tgt = H_RPTI_TARGET_CMMU;
1212 unsigned long type = H_RPTI_TYPE_TLB;
1213 unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
1214
1215 if (also_pwc)
1216 type |= H_RPTI_TYPE_PWC;
1217 if (atomic_read(&mm->context.copros) > 0)
1218 tgt |= H_RPTI_TARGET_NMMU;
1219 pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
1220 } else if (flush_pid) {
1221 if (type == FLUSH_TYPE_LOCAL) {
1222 _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1223 } else {
1224 if (cputlb_use_tlbie()) {
1225 if (mm_needs_flush_escalation(mm))
1226 also_pwc = true;
1227
1228 _tlbie_pid(pid,
1229 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1230 } else {
1231 _tlbiel_pid_multicast(mm, pid,
1232 also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
1233 }
1234
1235 }
1236 } else {
1237 if (type == FLUSH_TYPE_LOCAL)
1238 _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
1239 else if (cputlb_use_tlbie())
1240 _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
1241 else
1242 _tlbiel_va_range_multicast(mm,
1243 start, end, pid, page_size, psize, also_pwc);
1244 }
1245 out:
1246 preempt_enable();
1247 }
1248
radix__flush_tlb_range_psize(struct mm_struct * mm,unsigned long start,unsigned long end,int psize)1249 void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
1250 unsigned long end, int psize)
1251 {
1252 return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
1253 }
1254
radix__flush_tlb_pwc_range_psize(struct mm_struct * mm,unsigned long start,unsigned long end,int psize)1255 static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
1256 unsigned long end, int psize)
1257 {
1258 __radix__flush_tlb_range_psize(mm, start, end, psize, true);
1259 }
1260
1261 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
radix__flush_tlb_collapsed_pmd(struct mm_struct * mm,unsigned long addr)1262 void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
1263 {
1264 unsigned long pid, end;
1265 enum tlb_flush_type type;
1266
1267 pid = mm->context.id;
1268 if (unlikely(pid == MMU_NO_CONTEXT))
1269 return;
1270
1271 /* 4k page size, just blow the world */
1272 if (PAGE_SIZE == 0x1000) {
1273 radix__flush_all_mm(mm);
1274 return;
1275 }
1276
1277 end = addr + HPAGE_PMD_SIZE;
1278
1279 /* Otherwise first do the PWC, then iterate the pages. */
1280 preempt_disable();
1281 smp_mb(); /* see radix__flush_tlb_mm */
1282 type = flush_type_needed(mm, false);
1283 if (type == FLUSH_TYPE_LOCAL) {
1284 _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1285 } else if (type == FLUSH_TYPE_GLOBAL) {
1286 if (!mmu_has_feature(MMU_FTR_GTSE)) {
1287 unsigned long tgt, type, pg_sizes;
1288
1289 tgt = H_RPTI_TARGET_CMMU;
1290 type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
1291 H_RPTI_TYPE_PRT;
1292 pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
1293
1294 if (atomic_read(&mm->context.copros) > 0)
1295 tgt |= H_RPTI_TARGET_NMMU;
1296 pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
1297 addr, end);
1298 } else if (cputlb_use_tlbie())
1299 _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1300 else
1301 _tlbiel_va_range_multicast(mm,
1302 addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
1303 }
1304
1305 preempt_enable();
1306 }
1307 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1308
radix__flush_pmd_tlb_range(struct vm_area_struct * vma,unsigned long start,unsigned long end)1309 void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
1310 unsigned long start, unsigned long end)
1311 {
1312 radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
1313 }
1314 EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
1315
radix__flush_tlb_all(void)1316 void radix__flush_tlb_all(void)
1317 {
1318 unsigned long rb,prs,r,rs;
1319 unsigned long ric = RIC_FLUSH_ALL;
1320
1321 rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
1322 prs = 0; /* partition scoped */
1323 r = 1; /* radix format */
1324 rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
1325
1326 asm volatile("ptesync": : :"memory");
1327 /*
1328 * now flush guest entries by passing PRS = 1 and LPID != 0
1329 */
1330 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1331 : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
1332 /*
1333 * now flush host entires by passing PRS = 0 and LPID == 0
1334 */
1335 asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
1336 : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
1337 asm volatile("eieio; tlbsync; ptesync": : :"memory");
1338 }
1339
1340 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
radix_kvm_prefetch_workaround(struct mm_struct * mm)1341 extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
1342 {
1343 unsigned long pid = mm->context.id;
1344
1345 if (unlikely(pid == MMU_NO_CONTEXT))
1346 return;
1347
1348 if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
1349 return;
1350
1351 /*
1352 * If this context hasn't run on that CPU before and KVM is
1353 * around, there's a slim chance that the guest on another
1354 * CPU just brought in obsolete translation into the TLB of
1355 * this CPU due to a bad prefetch using the guest PID on
1356 * the way into the hypervisor.
1357 *
1358 * We work around this here. If KVM is possible, we check if
1359 * any sibling thread is in KVM. If it is, the window may exist
1360 * and thus we flush that PID from the core.
1361 *
1362 * A potential future improvement would be to mark which PIDs
1363 * have never been used on the system and avoid it if the PID
1364 * is new and the process has no other cpumask bit set.
1365 */
1366 if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
1367 int cpu = smp_processor_id();
1368 int sib = cpu_first_thread_sibling(cpu);
1369 bool flush = false;
1370
1371 for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
1372 if (sib == cpu)
1373 continue;
1374 if (!cpu_possible(sib))
1375 continue;
1376 if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
1377 flush = true;
1378 }
1379 if (flush)
1380 _tlbiel_pid(pid, RIC_FLUSH_ALL);
1381 }
1382 }
1383 EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
1384 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
1385