1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * IOMMU API for ARM architected SMMUv3 implementations.
4  *
5  * Copyright (C) 2015 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver is powered by bad coffee and bombay mix.
10  */
11 
12 #include <linux/acpi.h>
13 #include <linux/acpi_iort.h>
14 #include <linux/bitops.h>
15 #include <linux/crash_dump.h>
16 #include <linux/delay.h>
17 #include <linux/err.h>
18 #include <linux/interrupt.h>
19 #include <linux/io-pgtable.h>
20 #include <linux/iopoll.h>
21 #include <linux/module.h>
22 #include <linux/msi.h>
23 #include <linux/of.h>
24 #include <linux/of_address.h>
25 #include <linux/of_platform.h>
26 #include <linux/pci.h>
27 #include <linux/pci-ats.h>
28 #include <linux/platform_device.h>
29 
30 #include "arm-smmu-v3.h"
31 #include "../../dma-iommu.h"
32 #include "../../iommu-sva.h"
33 
34 static bool disable_bypass = true;
35 module_param(disable_bypass, bool, 0444);
36 MODULE_PARM_DESC(disable_bypass,
37 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
38 
39 static bool disable_msipolling;
40 module_param(disable_msipolling, bool, 0444);
41 MODULE_PARM_DESC(disable_msipolling,
42 	"Disable MSI-based polling for CMD_SYNC completion.");
43 
44 enum arm_smmu_msi_index {
45 	EVTQ_MSI_INDEX,
46 	GERROR_MSI_INDEX,
47 	PRIQ_MSI_INDEX,
48 	ARM_SMMU_MAX_MSIS,
49 };
50 
51 static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
52 	[EVTQ_MSI_INDEX] = {
53 		ARM_SMMU_EVTQ_IRQ_CFG0,
54 		ARM_SMMU_EVTQ_IRQ_CFG1,
55 		ARM_SMMU_EVTQ_IRQ_CFG2,
56 	},
57 	[GERROR_MSI_INDEX] = {
58 		ARM_SMMU_GERROR_IRQ_CFG0,
59 		ARM_SMMU_GERROR_IRQ_CFG1,
60 		ARM_SMMU_GERROR_IRQ_CFG2,
61 	},
62 	[PRIQ_MSI_INDEX] = {
63 		ARM_SMMU_PRIQ_IRQ_CFG0,
64 		ARM_SMMU_PRIQ_IRQ_CFG1,
65 		ARM_SMMU_PRIQ_IRQ_CFG2,
66 	},
67 };
68 
69 struct arm_smmu_option_prop {
70 	u32 opt;
71 	const char *prop;
72 };
73 
74 DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
75 DEFINE_MUTEX(arm_smmu_asid_lock);
76 
77 /*
78  * Special value used by SVA when a process dies, to quiesce a CD without
79  * disabling it.
80  */
81 struct arm_smmu_ctx_desc quiet_cd = { 0 };
82 
83 static struct arm_smmu_option_prop arm_smmu_options[] = {
84 	{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
85 	{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
86 	{ 0, NULL},
87 };
88 
89 static void parse_driver_options(struct arm_smmu_device *smmu)
90 {
91 	int i = 0;
92 
93 	do {
94 		if (of_property_read_bool(smmu->dev->of_node,
95 						arm_smmu_options[i].prop)) {
96 			smmu->options |= arm_smmu_options[i].opt;
97 			dev_notice(smmu->dev, "option %s\n",
98 				arm_smmu_options[i].prop);
99 		}
100 	} while (arm_smmu_options[++i].opt);
101 }
102 
103 /* Low-level queue manipulation functions */
104 static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
105 {
106 	u32 space, prod, cons;
107 
108 	prod = Q_IDX(q, q->prod);
109 	cons = Q_IDX(q, q->cons);
110 
111 	if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
112 		space = (1 << q->max_n_shift) - (prod - cons);
113 	else
114 		space = cons - prod;
115 
116 	return space >= n;
117 }
118 
119 static bool queue_full(struct arm_smmu_ll_queue *q)
120 {
121 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
122 	       Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
123 }
124 
125 static bool queue_empty(struct arm_smmu_ll_queue *q)
126 {
127 	return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
128 	       Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
129 }
130 
131 static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
132 {
133 	return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
134 		(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
135 	       ((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
136 		(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
137 }
138 
139 static void queue_sync_cons_out(struct arm_smmu_queue *q)
140 {
141 	/*
142 	 * Ensure that all CPU accesses (reads and writes) to the queue
143 	 * are complete before we update the cons pointer.
144 	 */
145 	__iomb();
146 	writel_relaxed(q->llq.cons, q->cons_reg);
147 }
148 
149 static void queue_inc_cons(struct arm_smmu_ll_queue *q)
150 {
151 	u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
152 	q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
153 }
154 
155 static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
156 {
157 	struct arm_smmu_ll_queue *llq = &q->llq;
158 
159 	if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
160 		return;
161 
162 	llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
163 		      Q_IDX(llq, llq->cons);
164 	queue_sync_cons_out(q);
165 }
166 
167 static int queue_sync_prod_in(struct arm_smmu_queue *q)
168 {
169 	u32 prod;
170 	int ret = 0;
171 
172 	/*
173 	 * We can't use the _relaxed() variant here, as we must prevent
174 	 * speculative reads of the queue before we have determined that
175 	 * prod has indeed moved.
176 	 */
177 	prod = readl(q->prod_reg);
178 
179 	if (Q_OVF(prod) != Q_OVF(q->llq.prod))
180 		ret = -EOVERFLOW;
181 
182 	q->llq.prod = prod;
183 	return ret;
184 }
185 
186 static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
187 {
188 	u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
189 	return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
190 }
191 
192 static void queue_poll_init(struct arm_smmu_device *smmu,
193 			    struct arm_smmu_queue_poll *qp)
194 {
195 	qp->delay = 1;
196 	qp->spin_cnt = 0;
197 	qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
198 	qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
199 }
200 
201 static int queue_poll(struct arm_smmu_queue_poll *qp)
202 {
203 	if (ktime_compare(ktime_get(), qp->timeout) > 0)
204 		return -ETIMEDOUT;
205 
206 	if (qp->wfe) {
207 		wfe();
208 	} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
209 		cpu_relax();
210 	} else {
211 		udelay(qp->delay);
212 		qp->delay *= 2;
213 		qp->spin_cnt = 0;
214 	}
215 
216 	return 0;
217 }
218 
219 static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
220 {
221 	int i;
222 
223 	for (i = 0; i < n_dwords; ++i)
224 		*dst++ = cpu_to_le64(*src++);
225 }
226 
227 static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
228 {
229 	int i;
230 
231 	for (i = 0; i < n_dwords; ++i)
232 		*dst++ = le64_to_cpu(*src++);
233 }
234 
235 static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
236 {
237 	if (queue_empty(&q->llq))
238 		return -EAGAIN;
239 
240 	queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
241 	queue_inc_cons(&q->llq);
242 	queue_sync_cons_out(q);
243 	return 0;
244 }
245 
246 /* High-level queue accessors */
247 static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
248 {
249 	memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
250 	cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
251 
252 	switch (ent->opcode) {
253 	case CMDQ_OP_TLBI_EL2_ALL:
254 	case CMDQ_OP_TLBI_NSNH_ALL:
255 		break;
256 	case CMDQ_OP_PREFETCH_CFG:
257 		cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
258 		break;
259 	case CMDQ_OP_CFGI_CD:
260 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
261 		fallthrough;
262 	case CMDQ_OP_CFGI_STE:
263 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
264 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
265 		break;
266 	case CMDQ_OP_CFGI_CD_ALL:
267 		cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
268 		break;
269 	case CMDQ_OP_CFGI_ALL:
270 		/* Cover the entire SID range */
271 		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
272 		break;
273 	case CMDQ_OP_TLBI_NH_VA:
274 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
275 		fallthrough;
276 	case CMDQ_OP_TLBI_EL2_VA:
277 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
278 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
279 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
280 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
281 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
282 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
283 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
284 		break;
285 	case CMDQ_OP_TLBI_S2_IPA:
286 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
287 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
288 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
289 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
290 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
291 		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
292 		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
293 		break;
294 	case CMDQ_OP_TLBI_NH_ASID:
295 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
296 		fallthrough;
297 	case CMDQ_OP_TLBI_S12_VMALL:
298 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
299 		break;
300 	case CMDQ_OP_TLBI_EL2_ASID:
301 		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
302 		break;
303 	case CMDQ_OP_ATC_INV:
304 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
305 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
306 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
307 		cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
308 		cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
309 		cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
310 		break;
311 	case CMDQ_OP_PRI_RESP:
312 		cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
313 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
314 		cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
315 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
316 		switch (ent->pri.resp) {
317 		case PRI_RESP_DENY:
318 		case PRI_RESP_FAIL:
319 		case PRI_RESP_SUCC:
320 			break;
321 		default:
322 			return -EINVAL;
323 		}
324 		cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
325 		break;
326 	case CMDQ_OP_RESUME:
327 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
328 		cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
329 		cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
330 		break;
331 	case CMDQ_OP_CMD_SYNC:
332 		if (ent->sync.msiaddr) {
333 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
334 			cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
335 		} else {
336 			cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
337 		}
338 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
339 		cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
340 		break;
341 	default:
342 		return -ENOENT;
343 	}
344 
345 	return 0;
346 }
347 
348 static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
349 {
350 	return &smmu->cmdq;
351 }
352 
353 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
354 					 struct arm_smmu_queue *q, u32 prod)
355 {
356 	struct arm_smmu_cmdq_ent ent = {
357 		.opcode = CMDQ_OP_CMD_SYNC,
358 	};
359 
360 	/*
361 	 * Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
362 	 * payload, so the write will zero the entire command on that platform.
363 	 */
364 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
365 		ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
366 				   q->ent_dwords * 8;
367 	}
368 
369 	arm_smmu_cmdq_build_cmd(cmd, &ent);
370 }
371 
372 static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
373 				     struct arm_smmu_queue *q)
374 {
375 	static const char * const cerror_str[] = {
376 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
377 		[CMDQ_ERR_CERROR_ILL_IDX]	= "Illegal command",
378 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
379 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
380 	};
381 
382 	int i;
383 	u64 cmd[CMDQ_ENT_DWORDS];
384 	u32 cons = readl_relaxed(q->cons_reg);
385 	u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
386 	struct arm_smmu_cmdq_ent cmd_sync = {
387 		.opcode = CMDQ_OP_CMD_SYNC,
388 	};
389 
390 	dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
391 		idx < ARRAY_SIZE(cerror_str) ?  cerror_str[idx] : "Unknown");
392 
393 	switch (idx) {
394 	case CMDQ_ERR_CERROR_ABT_IDX:
395 		dev_err(smmu->dev, "retrying command fetch\n");
396 		return;
397 	case CMDQ_ERR_CERROR_NONE_IDX:
398 		return;
399 	case CMDQ_ERR_CERROR_ATC_INV_IDX:
400 		/*
401 		 * ATC Invalidation Completion timeout. CONS is still pointing
402 		 * at the CMD_SYNC. Attempt to complete other pending commands
403 		 * by repeating the CMD_SYNC, though we might well end up back
404 		 * here since the ATC invalidation may still be pending.
405 		 */
406 		return;
407 	case CMDQ_ERR_CERROR_ILL_IDX:
408 	default:
409 		break;
410 	}
411 
412 	/*
413 	 * We may have concurrent producers, so we need to be careful
414 	 * not to touch any of the shadow cmdq state.
415 	 */
416 	queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
417 	dev_err(smmu->dev, "skipping command in error state:\n");
418 	for (i = 0; i < ARRAY_SIZE(cmd); ++i)
419 		dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
420 
421 	/* Convert the erroneous command into a CMD_SYNC */
422 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
423 
424 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
425 }
426 
427 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
428 {
429 	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
430 }
431 
432 /*
433  * Command queue locking.
434  * This is a form of bastardised rwlock with the following major changes:
435  *
436  * - The only LOCK routines are exclusive_trylock() and shared_lock().
437  *   Neither have barrier semantics, and instead provide only a control
438  *   dependency.
439  *
440  * - The UNLOCK routines are supplemented with shared_tryunlock(), which
441  *   fails if the caller appears to be the last lock holder (yes, this is
442  *   racy). All successful UNLOCK routines have RELEASE semantics.
443  */
444 static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
445 {
446 	int val;
447 
448 	/*
449 	 * We can try to avoid the cmpxchg() loop by simply incrementing the
450 	 * lock counter. When held in exclusive state, the lock counter is set
451 	 * to INT_MIN so these increments won't hurt as the value will remain
452 	 * negative.
453 	 */
454 	if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
455 		return;
456 
457 	do {
458 		val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
459 	} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
460 }
461 
462 static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
463 {
464 	(void)atomic_dec_return_release(&cmdq->lock);
465 }
466 
467 static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
468 {
469 	if (atomic_read(&cmdq->lock) == 1)
470 		return false;
471 
472 	arm_smmu_cmdq_shared_unlock(cmdq);
473 	return true;
474 }
475 
476 #define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)		\
477 ({									\
478 	bool __ret;							\
479 	local_irq_save(flags);						\
480 	__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN);	\
481 	if (!__ret)							\
482 		local_irq_restore(flags);				\
483 	__ret;								\
484 })
485 
486 #define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags)		\
487 ({									\
488 	atomic_set_release(&cmdq->lock, 0);				\
489 	local_irq_restore(flags);					\
490 })
491 
492 
493 /*
494  * Command queue insertion.
495  * This is made fiddly by our attempts to achieve some sort of scalability
496  * since there is one queue shared amongst all of the CPUs in the system.  If
497  * you like mixed-size concurrency, dependency ordering and relaxed atomics,
498  * then you'll *love* this monstrosity.
499  *
500  * The basic idea is to split the queue up into ranges of commands that are
501  * owned by a given CPU; the owner may not have written all of the commands
502  * itself, but is responsible for advancing the hardware prod pointer when
503  * the time comes. The algorithm is roughly:
504  *
505  * 	1. Allocate some space in the queue. At this point we also discover
506  *	   whether the head of the queue is currently owned by another CPU,
507  *	   or whether we are the owner.
508  *
509  *	2. Write our commands into our allocated slots in the queue.
510  *
511  *	3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
512  *
513  *	4. If we are an owner:
514  *		a. Wait for the previous owner to finish.
515  *		b. Mark the queue head as unowned, which tells us the range
516  *		   that we are responsible for publishing.
517  *		c. Wait for all commands in our owned range to become valid.
518  *		d. Advance the hardware prod pointer.
519  *		e. Tell the next owner we've finished.
520  *
521  *	5. If we are inserting a CMD_SYNC (we may or may not have been an
522  *	   owner), then we need to stick around until it has completed:
523  *		a. If we have MSIs, the SMMU can write back into the CMD_SYNC
524  *		   to clear the first 4 bytes.
525  *		b. Otherwise, we spin waiting for the hardware cons pointer to
526  *		   advance past our command.
527  *
528  * The devil is in the details, particularly the use of locking for handling
529  * SYNC completion and freeing up space in the queue before we think that it is
530  * full.
531  */
532 static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
533 					       u32 sprod, u32 eprod, bool set)
534 {
535 	u32 swidx, sbidx, ewidx, ebidx;
536 	struct arm_smmu_ll_queue llq = {
537 		.max_n_shift	= cmdq->q.llq.max_n_shift,
538 		.prod		= sprod,
539 	};
540 
541 	ewidx = BIT_WORD(Q_IDX(&llq, eprod));
542 	ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
543 
544 	while (llq.prod != eprod) {
545 		unsigned long mask;
546 		atomic_long_t *ptr;
547 		u32 limit = BITS_PER_LONG;
548 
549 		swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
550 		sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
551 
552 		ptr = &cmdq->valid_map[swidx];
553 
554 		if ((swidx == ewidx) && (sbidx < ebidx))
555 			limit = ebidx;
556 
557 		mask = GENMASK(limit - 1, sbidx);
558 
559 		/*
560 		 * The valid bit is the inverse of the wrap bit. This means
561 		 * that a zero-initialised queue is invalid and, after marking
562 		 * all entries as valid, they become invalid again when we
563 		 * wrap.
564 		 */
565 		if (set) {
566 			atomic_long_xor(mask, ptr);
567 		} else { /* Poll */
568 			unsigned long valid;
569 
570 			valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
571 			atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
572 		}
573 
574 		llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
575 	}
576 }
577 
578 /* Mark all entries in the range [sprod, eprod) as valid */
579 static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
580 					u32 sprod, u32 eprod)
581 {
582 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
583 }
584 
585 /* Wait for all entries in the range [sprod, eprod) to become valid */
586 static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
587 					 u32 sprod, u32 eprod)
588 {
589 	__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
590 }
591 
592 /* Wait for the command queue to become non-full */
593 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
594 					     struct arm_smmu_ll_queue *llq)
595 {
596 	unsigned long flags;
597 	struct arm_smmu_queue_poll qp;
598 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
599 	int ret = 0;
600 
601 	/*
602 	 * Try to update our copy of cons by grabbing exclusive cmdq access. If
603 	 * that fails, spin until somebody else updates it for us.
604 	 */
605 	if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
606 		WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
607 		arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
608 		llq->val = READ_ONCE(cmdq->q.llq.val);
609 		return 0;
610 	}
611 
612 	queue_poll_init(smmu, &qp);
613 	do {
614 		llq->val = READ_ONCE(cmdq->q.llq.val);
615 		if (!queue_full(llq))
616 			break;
617 
618 		ret = queue_poll(&qp);
619 	} while (!ret);
620 
621 	return ret;
622 }
623 
624 /*
625  * Wait until the SMMU signals a CMD_SYNC completion MSI.
626  * Must be called with the cmdq lock held in some capacity.
627  */
628 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
629 					  struct arm_smmu_ll_queue *llq)
630 {
631 	int ret = 0;
632 	struct arm_smmu_queue_poll qp;
633 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
634 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
635 
636 	queue_poll_init(smmu, &qp);
637 
638 	/*
639 	 * The MSI won't generate an event, since it's being written back
640 	 * into the command queue.
641 	 */
642 	qp.wfe = false;
643 	smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
644 	llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
645 	return ret;
646 }
647 
648 /*
649  * Wait until the SMMU cons index passes llq->prod.
650  * Must be called with the cmdq lock held in some capacity.
651  */
652 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
653 					       struct arm_smmu_ll_queue *llq)
654 {
655 	struct arm_smmu_queue_poll qp;
656 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
657 	u32 prod = llq->prod;
658 	int ret = 0;
659 
660 	queue_poll_init(smmu, &qp);
661 	llq->val = READ_ONCE(cmdq->q.llq.val);
662 	do {
663 		if (queue_consumed(llq, prod))
664 			break;
665 
666 		ret = queue_poll(&qp);
667 
668 		/*
669 		 * This needs to be a readl() so that our subsequent call
670 		 * to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
671 		 *
672 		 * Specifically, we need to ensure that we observe all
673 		 * shared_lock()s by other CMD_SYNCs that share our owner,
674 		 * so that a failing call to tryunlock() means that we're
675 		 * the last one out and therefore we can safely advance
676 		 * cmdq->q.llq.cons. Roughly speaking:
677 		 *
678 		 * CPU 0		CPU1			CPU2 (us)
679 		 *
680 		 * if (sync)
681 		 * 	shared_lock();
682 		 *
683 		 * dma_wmb();
684 		 * set_valid_map();
685 		 *
686 		 * 			if (owner) {
687 		 *				poll_valid_map();
688 		 *				<control dependency>
689 		 *				writel(prod_reg);
690 		 *
691 		 *						readl(cons_reg);
692 		 *						tryunlock();
693 		 *
694 		 * Requires us to see CPU 0's shared_lock() acquisition.
695 		 */
696 		llq->cons = readl(cmdq->q.cons_reg);
697 	} while (!ret);
698 
699 	return ret;
700 }
701 
702 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
703 					 struct arm_smmu_ll_queue *llq)
704 {
705 	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
706 		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
707 
708 	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
709 }
710 
711 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
712 					u32 prod, int n)
713 {
714 	int i;
715 	struct arm_smmu_ll_queue llq = {
716 		.max_n_shift	= cmdq->q.llq.max_n_shift,
717 		.prod		= prod,
718 	};
719 
720 	for (i = 0; i < n; ++i) {
721 		u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
722 
723 		prod = queue_inc_prod_n(&llq, i);
724 		queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
725 	}
726 }
727 
728 /*
729  * This is the actual insertion function, and provides the following
730  * ordering guarantees to callers:
731  *
732  * - There is a dma_wmb() before publishing any commands to the queue.
733  *   This can be relied upon to order prior writes to data structures
734  *   in memory (such as a CD or an STE) before the command.
735  *
736  * - On completion of a CMD_SYNC, there is a control dependency.
737  *   This can be relied upon to order subsequent writes to memory (e.g.
738  *   freeing an IOVA) after completion of the CMD_SYNC.
739  *
740  * - Command insertion is totally ordered, so if two CPUs each race to
741  *   insert their own list of commands then all of the commands from one
742  *   CPU will appear before any of the commands from the other CPU.
743  */
744 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
745 				       u64 *cmds, int n, bool sync)
746 {
747 	u64 cmd_sync[CMDQ_ENT_DWORDS];
748 	u32 prod;
749 	unsigned long flags;
750 	bool owner;
751 	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
752 	struct arm_smmu_ll_queue llq, head;
753 	int ret = 0;
754 
755 	llq.max_n_shift = cmdq->q.llq.max_n_shift;
756 
757 	/* 1. Allocate some space in the queue */
758 	local_irq_save(flags);
759 	llq.val = READ_ONCE(cmdq->q.llq.val);
760 	do {
761 		u64 old;
762 
763 		while (!queue_has_space(&llq, n + sync)) {
764 			local_irq_restore(flags);
765 			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
766 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
767 			local_irq_save(flags);
768 		}
769 
770 		head.cons = llq.cons;
771 		head.prod = queue_inc_prod_n(&llq, n + sync) |
772 					     CMDQ_PROD_OWNED_FLAG;
773 
774 		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
775 		if (old == llq.val)
776 			break;
777 
778 		llq.val = old;
779 	} while (1);
780 	owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
781 	head.prod &= ~CMDQ_PROD_OWNED_FLAG;
782 	llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
783 
784 	/*
785 	 * 2. Write our commands into the queue
786 	 * Dependency ordering from the cmpxchg() loop above.
787 	 */
788 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
789 	if (sync) {
790 		prod = queue_inc_prod_n(&llq, n);
791 		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
792 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
793 
794 		/*
795 		 * In order to determine completion of our CMD_SYNC, we must
796 		 * ensure that the queue can't wrap twice without us noticing.
797 		 * We achieve that by taking the cmdq lock as shared before
798 		 * marking our slot as valid.
799 		 */
800 		arm_smmu_cmdq_shared_lock(cmdq);
801 	}
802 
803 	/* 3. Mark our slots as valid, ensuring commands are visible first */
804 	dma_wmb();
805 	arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
806 
807 	/* 4. If we are the owner, take control of the SMMU hardware */
808 	if (owner) {
809 		/* a. Wait for previous owner to finish */
810 		atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
811 
812 		/* b. Stop gathering work by clearing the owned flag */
813 		prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
814 						   &cmdq->q.llq.atomic.prod);
815 		prod &= ~CMDQ_PROD_OWNED_FLAG;
816 
817 		/*
818 		 * c. Wait for any gathered work to be written to the queue.
819 		 * Note that we read our own entries so that we have the control
820 		 * dependency required by (d).
821 		 */
822 		arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
823 
824 		/*
825 		 * d. Advance the hardware prod pointer
826 		 * Control dependency ordering from the entries becoming valid.
827 		 */
828 		writel_relaxed(prod, cmdq->q.prod_reg);
829 
830 		/*
831 		 * e. Tell the next owner we're done
832 		 * Make sure we've updated the hardware first, so that we don't
833 		 * race to update prod and potentially move it backwards.
834 		 */
835 		atomic_set_release(&cmdq->owner_prod, prod);
836 	}
837 
838 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
839 	if (sync) {
840 		llq.prod = queue_inc_prod_n(&llq, n);
841 		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
842 		if (ret) {
843 			dev_err_ratelimited(smmu->dev,
844 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
845 					    llq.prod,
846 					    readl_relaxed(cmdq->q.prod_reg),
847 					    readl_relaxed(cmdq->q.cons_reg));
848 		}
849 
850 		/*
851 		 * Try to unlock the cmdq lock. This will fail if we're the last
852 		 * reader, in which case we can safely update cmdq->q.llq.cons
853 		 */
854 		if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
855 			WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
856 			arm_smmu_cmdq_shared_unlock(cmdq);
857 		}
858 	}
859 
860 	local_irq_restore(flags);
861 	return ret;
862 }
863 
864 static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
865 				     struct arm_smmu_cmdq_ent *ent,
866 				     bool sync)
867 {
868 	u64 cmd[CMDQ_ENT_DWORDS];
869 
870 	if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
871 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
872 			 ent->opcode);
873 		return -EINVAL;
874 	}
875 
876 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
877 }
878 
879 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
880 				   struct arm_smmu_cmdq_ent *ent)
881 {
882 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
883 }
884 
885 static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
886 					     struct arm_smmu_cmdq_ent *ent)
887 {
888 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
889 }
890 
891 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
892 				    struct arm_smmu_cmdq_batch *cmds,
893 				    struct arm_smmu_cmdq_ent *cmd)
894 {
895 	int index;
896 
897 	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
898 	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
899 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
900 		cmds->num = 0;
901 	}
902 
903 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
904 		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
905 		cmds->num = 0;
906 	}
907 
908 	index = cmds->num * CMDQ_ENT_DWORDS;
909 	if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
910 		dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
911 			 cmd->opcode);
912 		return;
913 	}
914 
915 	cmds->num++;
916 }
917 
918 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
919 				      struct arm_smmu_cmdq_batch *cmds)
920 {
921 	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
922 }
923 
924 static int arm_smmu_page_response(struct device *dev,
925 				  struct iommu_fault_event *unused,
926 				  struct iommu_page_response *resp)
927 {
928 	struct arm_smmu_cmdq_ent cmd = {0};
929 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
930 	int sid = master->streams[0].id;
931 
932 	if (master->stall_enabled) {
933 		cmd.opcode		= CMDQ_OP_RESUME;
934 		cmd.resume.sid		= sid;
935 		cmd.resume.stag		= resp->grpid;
936 		switch (resp->code) {
937 		case IOMMU_PAGE_RESP_INVALID:
938 		case IOMMU_PAGE_RESP_FAILURE:
939 			cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
940 			break;
941 		case IOMMU_PAGE_RESP_SUCCESS:
942 			cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
943 			break;
944 		default:
945 			return -EINVAL;
946 		}
947 	} else {
948 		return -ENODEV;
949 	}
950 
951 	arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
952 	/*
953 	 * Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
954 	 * RESUME consumption guarantees that the stalled transaction will be
955 	 * terminated... at some point in the future. PRI_RESP is fire and
956 	 * forget.
957 	 */
958 
959 	return 0;
960 }
961 
962 /* Context descriptor manipulation functions */
963 void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
964 {
965 	struct arm_smmu_cmdq_ent cmd = {
966 		.opcode	= smmu->features & ARM_SMMU_FEAT_E2H ?
967 			CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
968 		.tlbi.asid = asid,
969 	};
970 
971 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
972 }
973 
974 static void arm_smmu_sync_cd(struct arm_smmu_master *master,
975 			     int ssid, bool leaf)
976 {
977 	size_t i;
978 	struct arm_smmu_cmdq_batch cmds;
979 	struct arm_smmu_device *smmu = master->smmu;
980 	struct arm_smmu_cmdq_ent cmd = {
981 		.opcode	= CMDQ_OP_CFGI_CD,
982 		.cfgi	= {
983 			.ssid	= ssid,
984 			.leaf	= leaf,
985 		},
986 	};
987 
988 	cmds.num = 0;
989 	for (i = 0; i < master->num_streams; i++) {
990 		cmd.cfgi.sid = master->streams[i].id;
991 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
992 	}
993 
994 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
995 }
996 
997 static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
998 					struct arm_smmu_l1_ctx_desc *l1_desc)
999 {
1000 	size_t size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1001 
1002 	l1_desc->l2ptr = dmam_alloc_coherent(smmu->dev, size,
1003 					     &l1_desc->l2ptr_dma, GFP_KERNEL);
1004 	if (!l1_desc->l2ptr) {
1005 		dev_warn(smmu->dev,
1006 			 "failed to allocate context descriptor table\n");
1007 		return -ENOMEM;
1008 	}
1009 	return 0;
1010 }
1011 
1012 static void arm_smmu_write_cd_l1_desc(__le64 *dst,
1013 				      struct arm_smmu_l1_ctx_desc *l1_desc)
1014 {
1015 	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
1016 		  CTXDESC_L1_DESC_V;
1017 
1018 	/* See comment in arm_smmu_write_ctx_desc() */
1019 	WRITE_ONCE(*dst, cpu_to_le64(val));
1020 }
1021 
1022 static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
1023 {
1024 	__le64 *l1ptr;
1025 	unsigned int idx;
1026 	struct arm_smmu_l1_ctx_desc *l1_desc;
1027 	struct arm_smmu_device *smmu = master->smmu;
1028 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1029 
1030 	if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
1031 		return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
1032 
1033 	idx = ssid >> CTXDESC_SPLIT;
1034 	l1_desc = &cd_table->l1_desc[idx];
1035 	if (!l1_desc->l2ptr) {
1036 		if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
1037 			return NULL;
1038 
1039 		l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
1040 		arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
1041 		/* An invalid L1CD can be cached */
1042 		arm_smmu_sync_cd(master, ssid, false);
1043 	}
1044 	idx = ssid & (CTXDESC_L2_ENTRIES - 1);
1045 	return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
1046 }
1047 
1048 int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
1049 			    struct arm_smmu_ctx_desc *cd)
1050 {
1051 	/*
1052 	 * This function handles the following cases:
1053 	 *
1054 	 * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0).
1055 	 * (2) Install a secondary CD, for SID+SSID traffic.
1056 	 * (3) Update ASID of a CD. Atomically write the first 64 bits of the
1057 	 *     CD, then invalidate the old entry and mappings.
1058 	 * (4) Quiesce the context without clearing the valid bit. Disable
1059 	 *     translation, and ignore any translation fault.
1060 	 * (5) Remove a secondary CD.
1061 	 */
1062 	u64 val;
1063 	bool cd_live;
1064 	__le64 *cdptr;
1065 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1066 	struct arm_smmu_device *smmu = master->smmu;
1067 
1068 	if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
1069 		return -E2BIG;
1070 
1071 	cdptr = arm_smmu_get_cd_ptr(master, ssid);
1072 	if (!cdptr)
1073 		return -ENOMEM;
1074 
1075 	val = le64_to_cpu(cdptr[0]);
1076 	cd_live = !!(val & CTXDESC_CD_0_V);
1077 
1078 	if (!cd) { /* (5) */
1079 		val = 0;
1080 	} else if (cd == &quiet_cd) { /* (4) */
1081 		if (!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
1082 			val &= ~(CTXDESC_CD_0_S | CTXDESC_CD_0_R);
1083 		val |= CTXDESC_CD_0_TCR_EPD0;
1084 	} else if (cd_live) { /* (3) */
1085 		val &= ~CTXDESC_CD_0_ASID;
1086 		val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid);
1087 		/*
1088 		 * Until CD+TLB invalidation, both ASIDs may be used for tagging
1089 		 * this substream's traffic
1090 		 */
1091 	} else { /* (1) and (2) */
1092 		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
1093 		cdptr[2] = 0;
1094 		cdptr[3] = cpu_to_le64(cd->mair);
1095 
1096 		/*
1097 		 * STE may be live, and the SMMU might read dwords of this CD in any
1098 		 * order. Ensure that it observes valid values before reading
1099 		 * V=1.
1100 		 */
1101 		arm_smmu_sync_cd(master, ssid, true);
1102 
1103 		val = cd->tcr |
1104 #ifdef __BIG_ENDIAN
1105 			CTXDESC_CD_0_ENDI |
1106 #endif
1107 			CTXDESC_CD_0_R | CTXDESC_CD_0_A |
1108 			(cd->mm ? 0 : CTXDESC_CD_0_ASET) |
1109 			CTXDESC_CD_0_AA64 |
1110 			FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
1111 			CTXDESC_CD_0_V;
1112 
1113 		if (cd_table->stall_enabled)
1114 			val |= CTXDESC_CD_0_S;
1115 	}
1116 
1117 	/*
1118 	 * The SMMU accesses 64-bit values atomically. See IHI0070Ca 3.21.3
1119 	 * "Configuration structures and configuration invalidation completion"
1120 	 *
1121 	 *   The size of single-copy atomic reads made by the SMMU is
1122 	 *   IMPLEMENTATION DEFINED but must be at least 64 bits. Any single
1123 	 *   field within an aligned 64-bit span of a structure can be altered
1124 	 *   without first making the structure invalid.
1125 	 */
1126 	WRITE_ONCE(cdptr[0], cpu_to_le64(val));
1127 	arm_smmu_sync_cd(master, ssid, true);
1128 	return 0;
1129 }
1130 
1131 static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
1132 {
1133 	int ret;
1134 	size_t l1size;
1135 	size_t max_contexts;
1136 	struct arm_smmu_device *smmu = master->smmu;
1137 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1138 
1139 	cd_table->stall_enabled = master->stall_enabled;
1140 	cd_table->s1cdmax = master->ssid_bits;
1141 	max_contexts = 1 << cd_table->s1cdmax;
1142 
1143 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
1144 	    max_contexts <= CTXDESC_L2_ENTRIES) {
1145 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
1146 		cd_table->num_l1_ents = max_contexts;
1147 
1148 		l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
1149 	} else {
1150 		cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
1151 		cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
1152 						  CTXDESC_L2_ENTRIES);
1153 
1154 		cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
1155 					      sizeof(*cd_table->l1_desc),
1156 					      GFP_KERNEL);
1157 		if (!cd_table->l1_desc)
1158 			return -ENOMEM;
1159 
1160 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1161 	}
1162 
1163 	cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
1164 					   GFP_KERNEL);
1165 	if (!cd_table->cdtab) {
1166 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
1167 		ret = -ENOMEM;
1168 		goto err_free_l1;
1169 	}
1170 
1171 	return 0;
1172 
1173 err_free_l1:
1174 	if (cd_table->l1_desc) {
1175 		devm_kfree(smmu->dev, cd_table->l1_desc);
1176 		cd_table->l1_desc = NULL;
1177 	}
1178 	return ret;
1179 }
1180 
1181 static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
1182 {
1183 	int i;
1184 	size_t size, l1size;
1185 	struct arm_smmu_device *smmu = master->smmu;
1186 	struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
1187 
1188 	if (cd_table->l1_desc) {
1189 		size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
1190 
1191 		for (i = 0; i < cd_table->num_l1_ents; i++) {
1192 			if (!cd_table->l1_desc[i].l2ptr)
1193 				continue;
1194 
1195 			dmam_free_coherent(smmu->dev, size,
1196 					   cd_table->l1_desc[i].l2ptr,
1197 					   cd_table->l1_desc[i].l2ptr_dma);
1198 		}
1199 		devm_kfree(smmu->dev, cd_table->l1_desc);
1200 		cd_table->l1_desc = NULL;
1201 
1202 		l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
1203 	} else {
1204 		l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
1205 	}
1206 
1207 	dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
1208 	cd_table->cdtab_dma = 0;
1209 	cd_table->cdtab = NULL;
1210 }
1211 
1212 bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
1213 {
1214 	bool free;
1215 	struct arm_smmu_ctx_desc *old_cd;
1216 
1217 	if (!cd->asid)
1218 		return false;
1219 
1220 	free = refcount_dec_and_test(&cd->refs);
1221 	if (free) {
1222 		old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid);
1223 		WARN_ON(old_cd != cd);
1224 	}
1225 	return free;
1226 }
1227 
1228 /* Stream table manipulation functions */
1229 static void
1230 arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
1231 {
1232 	u64 val = 0;
1233 
1234 	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
1235 	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
1236 
1237 	/* See comment in arm_smmu_write_ctx_desc() */
1238 	WRITE_ONCE(*dst, cpu_to_le64(val));
1239 }
1240 
1241 static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
1242 {
1243 	struct arm_smmu_cmdq_ent cmd = {
1244 		.opcode	= CMDQ_OP_CFGI_STE,
1245 		.cfgi	= {
1246 			.sid	= sid,
1247 			.leaf	= true,
1248 		},
1249 	};
1250 
1251 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1252 }
1253 
1254 static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
1255 				      struct arm_smmu_ste *dst)
1256 {
1257 	/*
1258 	 * This is hideously complicated, but we only really care about
1259 	 * three cases at the moment:
1260 	 *
1261 	 * 1. Invalid (all zero) -> bypass/fault (init)
1262 	 * 2. Bypass/fault -> translation/bypass (attach)
1263 	 * 3. Translation/bypass -> bypass/fault (detach)
1264 	 *
1265 	 * Given that we can't update the STE atomically and the SMMU
1266 	 * doesn't read the thing in a defined order, that leaves us
1267 	 * with the following maintenance requirements:
1268 	 *
1269 	 * 1. Update Config, return (init time STEs aren't live)
1270 	 * 2. Write everything apart from dword 0, sync, write dword 0, sync
1271 	 * 3. Update Config, sync
1272 	 */
1273 	u64 val = le64_to_cpu(dst->data[0]);
1274 	bool ste_live = false;
1275 	struct arm_smmu_device *smmu = master->smmu;
1276 	struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
1277 	struct arm_smmu_s2_cfg *s2_cfg = NULL;
1278 	struct arm_smmu_domain *smmu_domain = master->domain;
1279 	struct arm_smmu_cmdq_ent prefetch_cmd = {
1280 		.opcode		= CMDQ_OP_PREFETCH_CFG,
1281 		.prefetch	= {
1282 			.sid	= sid,
1283 		},
1284 	};
1285 
1286 	if (smmu_domain) {
1287 		switch (smmu_domain->stage) {
1288 		case ARM_SMMU_DOMAIN_S1:
1289 			cd_table = &master->cd_table;
1290 			break;
1291 		case ARM_SMMU_DOMAIN_S2:
1292 			s2_cfg = &smmu_domain->s2_cfg;
1293 			break;
1294 		default:
1295 			break;
1296 		}
1297 	}
1298 
1299 	if (val & STRTAB_STE_0_V) {
1300 		switch (FIELD_GET(STRTAB_STE_0_CFG, val)) {
1301 		case STRTAB_STE_0_CFG_BYPASS:
1302 			break;
1303 		case STRTAB_STE_0_CFG_S1_TRANS:
1304 		case STRTAB_STE_0_CFG_S2_TRANS:
1305 			ste_live = true;
1306 			break;
1307 		case STRTAB_STE_0_CFG_ABORT:
1308 			BUG_ON(!disable_bypass);
1309 			break;
1310 		default:
1311 			BUG(); /* STE corruption */
1312 		}
1313 	}
1314 
1315 	/* Nuke the existing STE_0 value, as we're going to rewrite it */
1316 	val = STRTAB_STE_0_V;
1317 
1318 	/* Bypass/fault */
1319 	if (!smmu_domain || !(cd_table || s2_cfg)) {
1320 		if (!smmu_domain && disable_bypass)
1321 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1322 		else
1323 			val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1324 
1325 		dst->data[0] = cpu_to_le64(val);
1326 		dst->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
1327 						STRTAB_STE_1_SHCFG_INCOMING));
1328 		dst->data[2] = 0; /* Nuke the VMID */
1329 		/*
1330 		 * The SMMU can perform negative caching, so we must sync
1331 		 * the STE regardless of whether the old value was live.
1332 		 */
1333 		if (smmu)
1334 			arm_smmu_sync_ste_for_sid(smmu, sid);
1335 		return;
1336 	}
1337 
1338 	if (cd_table) {
1339 		u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
1340 			STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
1341 
1342 		BUG_ON(ste_live);
1343 		dst->data[1] = cpu_to_le64(
1344 			 FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) |
1345 			 FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1346 			 FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
1347 			 FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
1348 			 FIELD_PREP(STRTAB_STE_1_STRW, strw));
1349 
1350 		if (smmu->features & ARM_SMMU_FEAT_STALLS &&
1351 		    !master->stall_enabled)
1352 			dst->data[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
1353 
1354 		val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
1355 			FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
1356 			FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
1357 			FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
1358 	}
1359 
1360 	if (s2_cfg) {
1361 		BUG_ON(ste_live);
1362 		dst->data[2] = cpu_to_le64(
1363 			 FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
1364 			 FIELD_PREP(STRTAB_STE_2_VTCR, s2_cfg->vtcr) |
1365 #ifdef __BIG_ENDIAN
1366 			 STRTAB_STE_2_S2ENDI |
1367 #endif
1368 			 STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2AA64 |
1369 			 STRTAB_STE_2_S2R);
1370 
1371 		dst->data[3] = cpu_to_le64(s2_cfg->vttbr & STRTAB_STE_3_S2TTB_MASK);
1372 
1373 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS);
1374 	}
1375 
1376 	if (master->ats_enabled)
1377 		dst->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_EATS,
1378 						 STRTAB_STE_1_EATS_TRANS));
1379 
1380 	arm_smmu_sync_ste_for_sid(smmu, sid);
1381 	/* See comment in arm_smmu_write_ctx_desc() */
1382 	WRITE_ONCE(dst->data[0], cpu_to_le64(val));
1383 	arm_smmu_sync_ste_for_sid(smmu, sid);
1384 
1385 	/* It's likely that we'll want to use the new STE soon */
1386 	if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH))
1387 		arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
1388 }
1389 
1390 static void arm_smmu_init_bypass_stes(struct arm_smmu_ste *strtab,
1391 				      unsigned int nent, bool force)
1392 {
1393 	unsigned int i;
1394 	u64 val = STRTAB_STE_0_V;
1395 
1396 	if (disable_bypass && !force)
1397 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
1398 	else
1399 		val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS);
1400 
1401 	for (i = 0; i < nent; ++i) {
1402 		strtab->data[0] = cpu_to_le64(val);
1403 		strtab->data[1] = cpu_to_le64(FIELD_PREP(
1404 			STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
1405 		strtab->data[2] = 0;
1406 		strtab++;
1407 	}
1408 }
1409 
1410 static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
1411 {
1412 	size_t size;
1413 	void *strtab;
1414 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
1415 	struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT];
1416 
1417 	if (desc->l2ptr)
1418 		return 0;
1419 
1420 	size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3);
1421 	strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS];
1422 
1423 	desc->span = STRTAB_SPLIT + 1;
1424 	desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma,
1425 					  GFP_KERNEL);
1426 	if (!desc->l2ptr) {
1427 		dev_err(smmu->dev,
1428 			"failed to allocate l2 stream table for SID %u\n",
1429 			sid);
1430 		return -ENOMEM;
1431 	}
1432 
1433 	arm_smmu_init_bypass_stes(desc->l2ptr, 1 << STRTAB_SPLIT, false);
1434 	arm_smmu_write_strtab_l1_desc(strtab, desc);
1435 	return 0;
1436 }
1437 
1438 static struct arm_smmu_master *
1439 arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
1440 {
1441 	struct rb_node *node;
1442 	struct arm_smmu_stream *stream;
1443 
1444 	lockdep_assert_held(&smmu->streams_mutex);
1445 
1446 	node = smmu->streams.rb_node;
1447 	while (node) {
1448 		stream = rb_entry(node, struct arm_smmu_stream, node);
1449 		if (stream->id < sid)
1450 			node = node->rb_right;
1451 		else if (stream->id > sid)
1452 			node = node->rb_left;
1453 		else
1454 			return stream->master;
1455 	}
1456 
1457 	return NULL;
1458 }
1459 
1460 /* IRQ and event handlers */
1461 static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
1462 {
1463 	int ret;
1464 	u32 reason;
1465 	u32 perm = 0;
1466 	struct arm_smmu_master *master;
1467 	bool ssid_valid = evt[0] & EVTQ_0_SSV;
1468 	u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
1469 	struct iommu_fault_event fault_evt = { };
1470 	struct iommu_fault *flt = &fault_evt.fault;
1471 
1472 	switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
1473 	case EVT_ID_TRANSLATION_FAULT:
1474 		reason = IOMMU_FAULT_REASON_PTE_FETCH;
1475 		break;
1476 	case EVT_ID_ADDR_SIZE_FAULT:
1477 		reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
1478 		break;
1479 	case EVT_ID_ACCESS_FAULT:
1480 		reason = IOMMU_FAULT_REASON_ACCESS;
1481 		break;
1482 	case EVT_ID_PERMISSION_FAULT:
1483 		reason = IOMMU_FAULT_REASON_PERMISSION;
1484 		break;
1485 	default:
1486 		return -EOPNOTSUPP;
1487 	}
1488 
1489 	/* Stage-2 is always pinned at the moment */
1490 	if (evt[1] & EVTQ_1_S2)
1491 		return -EFAULT;
1492 
1493 	if (evt[1] & EVTQ_1_RnW)
1494 		perm |= IOMMU_FAULT_PERM_READ;
1495 	else
1496 		perm |= IOMMU_FAULT_PERM_WRITE;
1497 
1498 	if (evt[1] & EVTQ_1_InD)
1499 		perm |= IOMMU_FAULT_PERM_EXEC;
1500 
1501 	if (evt[1] & EVTQ_1_PnU)
1502 		perm |= IOMMU_FAULT_PERM_PRIV;
1503 
1504 	if (evt[1] & EVTQ_1_STALL) {
1505 		flt->type = IOMMU_FAULT_PAGE_REQ;
1506 		flt->prm = (struct iommu_fault_page_request) {
1507 			.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
1508 			.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
1509 			.perm = perm,
1510 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1511 		};
1512 
1513 		if (ssid_valid) {
1514 			flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1515 			flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1516 		}
1517 	} else {
1518 		flt->type = IOMMU_FAULT_DMA_UNRECOV;
1519 		flt->event = (struct iommu_fault_unrecoverable) {
1520 			.reason = reason,
1521 			.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
1522 			.perm = perm,
1523 			.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
1524 		};
1525 
1526 		if (ssid_valid) {
1527 			flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
1528 			flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
1529 		}
1530 	}
1531 
1532 	mutex_lock(&smmu->streams_mutex);
1533 	master = arm_smmu_find_master(smmu, sid);
1534 	if (!master) {
1535 		ret = -EINVAL;
1536 		goto out_unlock;
1537 	}
1538 
1539 	ret = iommu_report_device_fault(master->dev, &fault_evt);
1540 	if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
1541 		/* Nobody cared, abort the access */
1542 		struct iommu_page_response resp = {
1543 			.pasid		= flt->prm.pasid,
1544 			.grpid		= flt->prm.grpid,
1545 			.code		= IOMMU_PAGE_RESP_FAILURE,
1546 		};
1547 		arm_smmu_page_response(master->dev, &fault_evt, &resp);
1548 	}
1549 
1550 out_unlock:
1551 	mutex_unlock(&smmu->streams_mutex);
1552 	return ret;
1553 }
1554 
1555 static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
1556 {
1557 	int i, ret;
1558 	struct arm_smmu_device *smmu = dev;
1559 	struct arm_smmu_queue *q = &smmu->evtq.q;
1560 	struct arm_smmu_ll_queue *llq = &q->llq;
1561 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
1562 				      DEFAULT_RATELIMIT_BURST);
1563 	u64 evt[EVTQ_ENT_DWORDS];
1564 
1565 	do {
1566 		while (!queue_remove_raw(q, evt)) {
1567 			u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
1568 
1569 			ret = arm_smmu_handle_evt(smmu, evt);
1570 			if (!ret || !__ratelimit(&rs))
1571 				continue;
1572 
1573 			dev_info(smmu->dev, "event 0x%02x received:\n", id);
1574 			for (i = 0; i < ARRAY_SIZE(evt); ++i)
1575 				dev_info(smmu->dev, "\t0x%016llx\n",
1576 					 (unsigned long long)evt[i]);
1577 
1578 			cond_resched();
1579 		}
1580 
1581 		/*
1582 		 * Not much we can do on overflow, so scream and pretend we're
1583 		 * trying harder.
1584 		 */
1585 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1586 			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
1587 	} while (!queue_empty(llq));
1588 
1589 	/* Sync our overflow flag, as we believe we're up to speed */
1590 	queue_sync_cons_ovf(q);
1591 	return IRQ_HANDLED;
1592 }
1593 
1594 static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
1595 {
1596 	u32 sid, ssid;
1597 	u16 grpid;
1598 	bool ssv, last;
1599 
1600 	sid = FIELD_GET(PRIQ_0_SID, evt[0]);
1601 	ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
1602 	ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
1603 	last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
1604 	grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
1605 
1606 	dev_info(smmu->dev, "unexpected PRI request received:\n");
1607 	dev_info(smmu->dev,
1608 		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
1609 		 sid, ssid, grpid, last ? "L" : "",
1610 		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
1611 		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
1612 		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
1613 		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
1614 		 evt[1] & PRIQ_1_ADDR_MASK);
1615 
1616 	if (last) {
1617 		struct arm_smmu_cmdq_ent cmd = {
1618 			.opcode			= CMDQ_OP_PRI_RESP,
1619 			.substream_valid	= ssv,
1620 			.pri			= {
1621 				.sid	= sid,
1622 				.ssid	= ssid,
1623 				.grpid	= grpid,
1624 				.resp	= PRI_RESP_DENY,
1625 			},
1626 		};
1627 
1628 		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
1629 	}
1630 }
1631 
1632 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
1633 {
1634 	struct arm_smmu_device *smmu = dev;
1635 	struct arm_smmu_queue *q = &smmu->priq.q;
1636 	struct arm_smmu_ll_queue *llq = &q->llq;
1637 	u64 evt[PRIQ_ENT_DWORDS];
1638 
1639 	do {
1640 		while (!queue_remove_raw(q, evt))
1641 			arm_smmu_handle_ppr(smmu, evt);
1642 
1643 		if (queue_sync_prod_in(q) == -EOVERFLOW)
1644 			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
1645 	} while (!queue_empty(llq));
1646 
1647 	/* Sync our overflow flag, as we believe we're up to speed */
1648 	queue_sync_cons_ovf(q);
1649 	return IRQ_HANDLED;
1650 }
1651 
1652 static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
1653 
1654 static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
1655 {
1656 	u32 gerror, gerrorn, active;
1657 	struct arm_smmu_device *smmu = dev;
1658 
1659 	gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
1660 	gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
1661 
1662 	active = gerror ^ gerrorn;
1663 	if (!(active & GERROR_ERR_MASK))
1664 		return IRQ_NONE; /* No errors pending */
1665 
1666 	dev_warn(smmu->dev,
1667 		 "unexpected global error reported (0x%08x), this could be serious\n",
1668 		 active);
1669 
1670 	if (active & GERROR_SFM_ERR) {
1671 		dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
1672 		arm_smmu_device_disable(smmu);
1673 	}
1674 
1675 	if (active & GERROR_MSI_GERROR_ABT_ERR)
1676 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
1677 
1678 	if (active & GERROR_MSI_PRIQ_ABT_ERR)
1679 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
1680 
1681 	if (active & GERROR_MSI_EVTQ_ABT_ERR)
1682 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
1683 
1684 	if (active & GERROR_MSI_CMDQ_ABT_ERR)
1685 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
1686 
1687 	if (active & GERROR_PRIQ_ABT_ERR)
1688 		dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
1689 
1690 	if (active & GERROR_EVTQ_ABT_ERR)
1691 		dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
1692 
1693 	if (active & GERROR_CMDQ_ERR)
1694 		arm_smmu_cmdq_skip_err(smmu);
1695 
1696 	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
1697 	return IRQ_HANDLED;
1698 }
1699 
1700 static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
1701 {
1702 	struct arm_smmu_device *smmu = dev;
1703 
1704 	arm_smmu_evtq_thread(irq, dev);
1705 	if (smmu->features & ARM_SMMU_FEAT_PRI)
1706 		arm_smmu_priq_thread(irq, dev);
1707 
1708 	return IRQ_HANDLED;
1709 }
1710 
1711 static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
1712 {
1713 	arm_smmu_gerror_handler(irq, dev);
1714 	return IRQ_WAKE_THREAD;
1715 }
1716 
1717 static void
1718 arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
1719 			struct arm_smmu_cmdq_ent *cmd)
1720 {
1721 	size_t log2_span;
1722 	size_t span_mask;
1723 	/* ATC invalidates are always on 4096-bytes pages */
1724 	size_t inval_grain_shift = 12;
1725 	unsigned long page_start, page_end;
1726 
1727 	/*
1728 	 * ATS and PASID:
1729 	 *
1730 	 * If substream_valid is clear, the PCIe TLP is sent without a PASID
1731 	 * prefix. In that case all ATC entries within the address range are
1732 	 * invalidated, including those that were requested with a PASID! There
1733 	 * is no way to invalidate only entries without PASID.
1734 	 *
1735 	 * When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
1736 	 * traffic), translation requests without PASID create ATC entries
1737 	 * without PASID, which must be invalidated with substream_valid clear.
1738 	 * This has the unpleasant side-effect of invalidating all PASID-tagged
1739 	 * ATC entries within the address range.
1740 	 */
1741 	*cmd = (struct arm_smmu_cmdq_ent) {
1742 		.opcode			= CMDQ_OP_ATC_INV,
1743 		.substream_valid	= (ssid != IOMMU_NO_PASID),
1744 		.atc.ssid		= ssid,
1745 	};
1746 
1747 	if (!size) {
1748 		cmd->atc.size = ATC_INV_SIZE_ALL;
1749 		return;
1750 	}
1751 
1752 	page_start	= iova >> inval_grain_shift;
1753 	page_end	= (iova + size - 1) >> inval_grain_shift;
1754 
1755 	/*
1756 	 * In an ATS Invalidate Request, the address must be aligned on the
1757 	 * range size, which must be a power of two number of page sizes. We
1758 	 * thus have to choose between grossly over-invalidating the region, or
1759 	 * splitting the invalidation into multiple commands. For simplicity
1760 	 * we'll go with the first solution, but should refine it in the future
1761 	 * if multiple commands are shown to be more efficient.
1762 	 *
1763 	 * Find the smallest power of two that covers the range. The most
1764 	 * significant differing bit between the start and end addresses,
1765 	 * fls(start ^ end), indicates the required span. For example:
1766 	 *
1767 	 * We want to invalidate pages [8; 11]. This is already the ideal range:
1768 	 *		x = 0b1000 ^ 0b1011 = 0b11
1769 	 *		span = 1 << fls(x) = 4
1770 	 *
1771 	 * To invalidate pages [7; 10], we need to invalidate [0; 15]:
1772 	 *		x = 0b0111 ^ 0b1010 = 0b1101
1773 	 *		span = 1 << fls(x) = 16
1774 	 */
1775 	log2_span	= fls_long(page_start ^ page_end);
1776 	span_mask	= (1ULL << log2_span) - 1;
1777 
1778 	page_start	&= ~span_mask;
1779 
1780 	cmd->atc.addr	= page_start << inval_grain_shift;
1781 	cmd->atc.size	= log2_span;
1782 }
1783 
1784 static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
1785 {
1786 	int i;
1787 	struct arm_smmu_cmdq_ent cmd;
1788 	struct arm_smmu_cmdq_batch cmds;
1789 
1790 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
1791 
1792 	cmds.num = 0;
1793 	for (i = 0; i < master->num_streams; i++) {
1794 		cmd.atc.sid = master->streams[i].id;
1795 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
1796 	}
1797 
1798 	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
1799 }
1800 
1801 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
1802 			    unsigned long iova, size_t size)
1803 {
1804 	int i;
1805 	unsigned long flags;
1806 	struct arm_smmu_cmdq_ent cmd;
1807 	struct arm_smmu_master *master;
1808 	struct arm_smmu_cmdq_batch cmds;
1809 
1810 	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
1811 		return 0;
1812 
1813 	/*
1814 	 * Ensure that we've completed prior invalidation of the main TLBs
1815 	 * before we read 'nr_ats_masters' in case of a concurrent call to
1816 	 * arm_smmu_enable_ats():
1817 	 *
1818 	 *	// unmap()			// arm_smmu_enable_ats()
1819 	 *	TLBI+SYNC			atomic_inc(&nr_ats_masters);
1820 	 *	smp_mb();			[...]
1821 	 *	atomic_read(&nr_ats_masters);	pci_enable_ats() // writel()
1822 	 *
1823 	 * Ensures that we always see the incremented 'nr_ats_masters' count if
1824 	 * ATS was enabled at the PCI device before completion of the TLBI.
1825 	 */
1826 	smp_mb();
1827 	if (!atomic_read(&smmu_domain->nr_ats_masters))
1828 		return 0;
1829 
1830 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
1831 
1832 	cmds.num = 0;
1833 
1834 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
1835 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
1836 		if (!master->ats_enabled)
1837 			continue;
1838 
1839 		for (i = 0; i < master->num_streams; i++) {
1840 			cmd.atc.sid = master->streams[i].id;
1841 			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
1842 		}
1843 	}
1844 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
1845 
1846 	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
1847 }
1848 
1849 /* IO_PGTABLE API */
1850 static void arm_smmu_tlb_inv_context(void *cookie)
1851 {
1852 	struct arm_smmu_domain *smmu_domain = cookie;
1853 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1854 	struct arm_smmu_cmdq_ent cmd;
1855 
1856 	/*
1857 	 * NOTE: when io-pgtable is in non-strict mode, we may get here with
1858 	 * PTEs previously cleared by unmaps on the current CPU not yet visible
1859 	 * to the SMMU. We are relying on the dma_wmb() implicit during cmd
1860 	 * insertion to guarantee those are observed before the TLBI. Do be
1861 	 * careful, 007.
1862 	 */
1863 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1864 		arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
1865 	} else {
1866 		cmd.opcode	= CMDQ_OP_TLBI_S12_VMALL;
1867 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1868 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
1869 	}
1870 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
1871 }
1872 
1873 static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
1874 				     unsigned long iova, size_t size,
1875 				     size_t granule,
1876 				     struct arm_smmu_domain *smmu_domain)
1877 {
1878 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1879 	unsigned long end = iova + size, num_pages = 0, tg = 0;
1880 	size_t inv_range = granule;
1881 	struct arm_smmu_cmdq_batch cmds;
1882 
1883 	if (!size)
1884 		return;
1885 
1886 	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1887 		/* Get the leaf page size */
1888 		tg = __ffs(smmu_domain->domain.pgsize_bitmap);
1889 
1890 		num_pages = size >> tg;
1891 
1892 		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
1893 		cmd->tlbi.tg = (tg - 10) / 2;
1894 
1895 		/*
1896 		 * Determine what level the granule is at. For non-leaf, both
1897 		 * io-pgtable and SVA pass a nominal last-level granule because
1898 		 * they don't know what level(s) actually apply, so ignore that
1899 		 * and leave TTL=0. However for various errata reasons we still
1900 		 * want to use a range command, so avoid the SVA corner case
1901 		 * where both scale and num could be 0 as well.
1902 		 */
1903 		if (cmd->tlbi.leaf)
1904 			cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
1905 		else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
1906 			num_pages++;
1907 	}
1908 
1909 	cmds.num = 0;
1910 
1911 	while (iova < end) {
1912 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
1913 			/*
1914 			 * On each iteration of the loop, the range is 5 bits
1915 			 * worth of the aligned size remaining.
1916 			 * The range in pages is:
1917 			 *
1918 			 * range = (num_pages & (0x1f << __ffs(num_pages)))
1919 			 */
1920 			unsigned long scale, num;
1921 
1922 			/* Determine the power of 2 multiple number of pages */
1923 			scale = __ffs(num_pages);
1924 			cmd->tlbi.scale = scale;
1925 
1926 			/* Determine how many chunks of 2^scale size we have */
1927 			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
1928 			cmd->tlbi.num = num - 1;
1929 
1930 			/* range is num * 2^scale * pgsize */
1931 			inv_range = num << (scale + tg);
1932 
1933 			/* Clear out the lower order bits for the next iteration */
1934 			num_pages -= num << scale;
1935 		}
1936 
1937 		cmd->tlbi.addr = iova;
1938 		arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
1939 		iova += inv_range;
1940 	}
1941 	arm_smmu_cmdq_batch_submit(smmu, &cmds);
1942 }
1943 
1944 static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
1945 					  size_t granule, bool leaf,
1946 					  struct arm_smmu_domain *smmu_domain)
1947 {
1948 	struct arm_smmu_cmdq_ent cmd = {
1949 		.tlbi = {
1950 			.leaf	= leaf,
1951 		},
1952 	};
1953 
1954 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1955 		cmd.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1956 				  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
1957 		cmd.tlbi.asid	= smmu_domain->cd.asid;
1958 	} else {
1959 		cmd.opcode	= CMDQ_OP_TLBI_S2_IPA;
1960 		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
1961 	}
1962 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1963 
1964 	/*
1965 	 * Unfortunately, this can't be leaf-only since we may have
1966 	 * zapped an entire table.
1967 	 */
1968 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size);
1969 }
1970 
1971 void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
1972 				 size_t granule, bool leaf,
1973 				 struct arm_smmu_domain *smmu_domain)
1974 {
1975 	struct arm_smmu_cmdq_ent cmd = {
1976 		.opcode	= smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
1977 			  CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
1978 		.tlbi = {
1979 			.asid	= asid,
1980 			.leaf	= leaf,
1981 		},
1982 	};
1983 
1984 	__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
1985 }
1986 
1987 static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
1988 					 unsigned long iova, size_t granule,
1989 					 void *cookie)
1990 {
1991 	struct arm_smmu_domain *smmu_domain = cookie;
1992 	struct iommu_domain *domain = &smmu_domain->domain;
1993 
1994 	iommu_iotlb_gather_add_page(domain, gather, iova, granule);
1995 }
1996 
1997 static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
1998 				  size_t granule, void *cookie)
1999 {
2000 	arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
2001 }
2002 
2003 static const struct iommu_flush_ops arm_smmu_flush_ops = {
2004 	.tlb_flush_all	= arm_smmu_tlb_inv_context,
2005 	.tlb_flush_walk = arm_smmu_tlb_inv_walk,
2006 	.tlb_add_page	= arm_smmu_tlb_inv_page_nosync,
2007 };
2008 
2009 /* IOMMU API */
2010 static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
2011 {
2012 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2013 
2014 	switch (cap) {
2015 	case IOMMU_CAP_CACHE_COHERENCY:
2016 		/* Assume that a coherent TCU implies coherent TBUs */
2017 		return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
2018 	case IOMMU_CAP_NOEXEC:
2019 	case IOMMU_CAP_DEFERRED_FLUSH:
2020 		return true;
2021 	default:
2022 		return false;
2023 	}
2024 }
2025 
2026 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
2027 {
2028 	struct arm_smmu_domain *smmu_domain;
2029 
2030 	if (type == IOMMU_DOMAIN_SVA)
2031 		return arm_smmu_sva_domain_alloc();
2032 
2033 	if (type != IOMMU_DOMAIN_UNMANAGED &&
2034 	    type != IOMMU_DOMAIN_DMA &&
2035 	    type != IOMMU_DOMAIN_IDENTITY)
2036 		return NULL;
2037 
2038 	/*
2039 	 * Allocate the domain and initialise some of its data structures.
2040 	 * We can't really do anything meaningful until we've added a
2041 	 * master.
2042 	 */
2043 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
2044 	if (!smmu_domain)
2045 		return NULL;
2046 
2047 	mutex_init(&smmu_domain->init_mutex);
2048 	INIT_LIST_HEAD(&smmu_domain->devices);
2049 	spin_lock_init(&smmu_domain->devices_lock);
2050 	INIT_LIST_HEAD(&smmu_domain->mmu_notifiers);
2051 
2052 	return &smmu_domain->domain;
2053 }
2054 
2055 static void arm_smmu_domain_free(struct iommu_domain *domain)
2056 {
2057 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2058 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2059 
2060 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
2061 
2062 	/* Free the ASID or VMID */
2063 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2064 		/* Prevent SVA from touching the CD while we're freeing it */
2065 		mutex_lock(&arm_smmu_asid_lock);
2066 		arm_smmu_free_asid(&smmu_domain->cd);
2067 		mutex_unlock(&arm_smmu_asid_lock);
2068 	} else {
2069 		struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2070 		if (cfg->vmid)
2071 			ida_free(&smmu->vmid_map, cfg->vmid);
2072 	}
2073 
2074 	kfree(smmu_domain);
2075 }
2076 
2077 static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
2078 				       struct io_pgtable_cfg *pgtbl_cfg)
2079 {
2080 	int ret;
2081 	u32 asid;
2082 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2083 	struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
2084 	typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
2085 
2086 	refcount_set(&cd->refs, 1);
2087 
2088 	/* Prevent SVA from modifying the ASID until it is written to the CD */
2089 	mutex_lock(&arm_smmu_asid_lock);
2090 	ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
2091 		       XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
2092 	if (ret)
2093 		goto out_unlock;
2094 
2095 	cd->asid	= (u16)asid;
2096 	cd->ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
2097 	cd->tcr		= FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
2098 			  FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
2099 			  FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
2100 			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
2101 			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
2102 			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
2103 			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
2104 	cd->mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;
2105 
2106 	mutex_unlock(&arm_smmu_asid_lock);
2107 	return 0;
2108 
2109 out_unlock:
2110 	mutex_unlock(&arm_smmu_asid_lock);
2111 	return ret;
2112 }
2113 
2114 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
2115 				       struct io_pgtable_cfg *pgtbl_cfg)
2116 {
2117 	int vmid;
2118 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2119 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
2120 	typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr;
2121 
2122 	/* Reserve VMID 0 for stage-2 bypass STEs */
2123 	vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
2124 			       GFP_KERNEL);
2125 	if (vmid < 0)
2126 		return vmid;
2127 
2128 	vtcr = &pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
2129 	cfg->vmid	= (u16)vmid;
2130 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
2131 	cfg->vtcr	= FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
2132 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
2133 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
2134 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
2135 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
2136 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
2137 			  FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
2138 	return 0;
2139 }
2140 
2141 static int arm_smmu_domain_finalise(struct iommu_domain *domain)
2142 {
2143 	int ret;
2144 	unsigned long ias, oas;
2145 	enum io_pgtable_fmt fmt;
2146 	struct io_pgtable_cfg pgtbl_cfg;
2147 	struct io_pgtable_ops *pgtbl_ops;
2148 	int (*finalise_stage_fn)(struct arm_smmu_domain *,
2149 				 struct io_pgtable_cfg *);
2150 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2151 	struct arm_smmu_device *smmu = smmu_domain->smmu;
2152 
2153 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
2154 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
2155 		return 0;
2156 	}
2157 
2158 	/* Restrict the stage to what we can actually support */
2159 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
2160 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2161 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
2162 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
2163 
2164 	switch (smmu_domain->stage) {
2165 	case ARM_SMMU_DOMAIN_S1:
2166 		ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48;
2167 		ias = min_t(unsigned long, ias, VA_BITS);
2168 		oas = smmu->ias;
2169 		fmt = ARM_64_LPAE_S1;
2170 		finalise_stage_fn = arm_smmu_domain_finalise_s1;
2171 		break;
2172 	case ARM_SMMU_DOMAIN_S2:
2173 		ias = smmu->ias;
2174 		oas = smmu->oas;
2175 		fmt = ARM_64_LPAE_S2;
2176 		finalise_stage_fn = arm_smmu_domain_finalise_s2;
2177 		break;
2178 	default:
2179 		return -EINVAL;
2180 	}
2181 
2182 	pgtbl_cfg = (struct io_pgtable_cfg) {
2183 		.pgsize_bitmap	= smmu->pgsize_bitmap,
2184 		.ias		= ias,
2185 		.oas		= oas,
2186 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENCY,
2187 		.tlb		= &arm_smmu_flush_ops,
2188 		.iommu_dev	= smmu->dev,
2189 	};
2190 
2191 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
2192 	if (!pgtbl_ops)
2193 		return -ENOMEM;
2194 
2195 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
2196 	domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
2197 	domain->geometry.force_aperture = true;
2198 
2199 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
2200 	if (ret < 0) {
2201 		free_io_pgtable_ops(pgtbl_ops);
2202 		return ret;
2203 	}
2204 
2205 	smmu_domain->pgtbl_ops = pgtbl_ops;
2206 	return 0;
2207 }
2208 
2209 static struct arm_smmu_ste *
2210 arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
2211 {
2212 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2213 
2214 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
2215 		unsigned int idx1, idx2;
2216 
2217 		/* Two-level walk */
2218 		idx1 = (sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS;
2219 		idx2 = sid & ((1 << STRTAB_SPLIT) - 1);
2220 		return &cfg->l1_desc[idx1].l2ptr[idx2];
2221 	} else {
2222 		/* Simple linear lookup */
2223 		return (struct arm_smmu_ste *)&cfg
2224 			       ->strtab[sid * STRTAB_STE_DWORDS];
2225 	}
2226 }
2227 
2228 static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master)
2229 {
2230 	int i, j;
2231 	struct arm_smmu_device *smmu = master->smmu;
2232 
2233 	for (i = 0; i < master->num_streams; ++i) {
2234 		u32 sid = master->streams[i].id;
2235 		struct arm_smmu_ste *step =
2236 			arm_smmu_get_step_for_sid(smmu, sid);
2237 
2238 		/* Bridged PCI devices may end up with duplicated IDs */
2239 		for (j = 0; j < i; j++)
2240 			if (master->streams[j].id == sid)
2241 				break;
2242 		if (j < i)
2243 			continue;
2244 
2245 		arm_smmu_write_strtab_ent(master, sid, step);
2246 	}
2247 }
2248 
2249 static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
2250 {
2251 	struct device *dev = master->dev;
2252 	struct arm_smmu_device *smmu = master->smmu;
2253 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2254 
2255 	if (!(smmu->features & ARM_SMMU_FEAT_ATS))
2256 		return false;
2257 
2258 	if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
2259 		return false;
2260 
2261 	return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
2262 }
2263 
2264 static void arm_smmu_enable_ats(struct arm_smmu_master *master)
2265 {
2266 	size_t stu;
2267 	struct pci_dev *pdev;
2268 	struct arm_smmu_device *smmu = master->smmu;
2269 	struct arm_smmu_domain *smmu_domain = master->domain;
2270 
2271 	/* Don't enable ATS at the endpoint if it's not enabled in the STE */
2272 	if (!master->ats_enabled)
2273 		return;
2274 
2275 	/* Smallest Translation Unit: log2 of the smallest supported granule */
2276 	stu = __ffs(smmu->pgsize_bitmap);
2277 	pdev = to_pci_dev(master->dev);
2278 
2279 	atomic_inc(&smmu_domain->nr_ats_masters);
2280 	arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0);
2281 	if (pci_enable_ats(pdev, stu))
2282 		dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
2283 }
2284 
2285 static void arm_smmu_disable_ats(struct arm_smmu_master *master)
2286 {
2287 	struct arm_smmu_domain *smmu_domain = master->domain;
2288 
2289 	if (!master->ats_enabled)
2290 		return;
2291 
2292 	pci_disable_ats(to_pci_dev(master->dev));
2293 	/*
2294 	 * Ensure ATS is disabled at the endpoint before we issue the
2295 	 * ATC invalidation via the SMMU.
2296 	 */
2297 	wmb();
2298 	arm_smmu_atc_inv_master(master);
2299 	atomic_dec(&smmu_domain->nr_ats_masters);
2300 }
2301 
2302 static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
2303 {
2304 	int ret;
2305 	int features;
2306 	int num_pasids;
2307 	struct pci_dev *pdev;
2308 
2309 	if (!dev_is_pci(master->dev))
2310 		return -ENODEV;
2311 
2312 	pdev = to_pci_dev(master->dev);
2313 
2314 	features = pci_pasid_features(pdev);
2315 	if (features < 0)
2316 		return features;
2317 
2318 	num_pasids = pci_max_pasids(pdev);
2319 	if (num_pasids <= 0)
2320 		return num_pasids;
2321 
2322 	ret = pci_enable_pasid(pdev, features);
2323 	if (ret) {
2324 		dev_err(&pdev->dev, "Failed to enable PASID\n");
2325 		return ret;
2326 	}
2327 
2328 	master->ssid_bits = min_t(u8, ilog2(num_pasids),
2329 				  master->smmu->ssid_bits);
2330 	return 0;
2331 }
2332 
2333 static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
2334 {
2335 	struct pci_dev *pdev;
2336 
2337 	if (!dev_is_pci(master->dev))
2338 		return;
2339 
2340 	pdev = to_pci_dev(master->dev);
2341 
2342 	if (!pdev->pasid_enabled)
2343 		return;
2344 
2345 	master->ssid_bits = 0;
2346 	pci_disable_pasid(pdev);
2347 }
2348 
2349 static void arm_smmu_detach_dev(struct arm_smmu_master *master)
2350 {
2351 	unsigned long flags;
2352 	struct arm_smmu_domain *smmu_domain = master->domain;
2353 
2354 	if (!smmu_domain)
2355 		return;
2356 
2357 	arm_smmu_disable_ats(master);
2358 
2359 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2360 	list_del(&master->domain_head);
2361 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2362 
2363 	master->domain = NULL;
2364 	master->ats_enabled = false;
2365 	arm_smmu_install_ste_for_dev(master);
2366 	/*
2367 	 * Clearing the CD entry isn't strictly required to detach the domain
2368 	 * since the table is uninstalled anyway, but it helps avoid confusion
2369 	 * in the call to arm_smmu_write_ctx_desc on the next attach (which
2370 	 * expects the entry to be empty).
2371 	 */
2372 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
2373 		arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
2374 }
2375 
2376 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
2377 {
2378 	int ret = 0;
2379 	unsigned long flags;
2380 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2381 	struct arm_smmu_device *smmu;
2382 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2383 	struct arm_smmu_master *master;
2384 
2385 	if (!fwspec)
2386 		return -ENOENT;
2387 
2388 	master = dev_iommu_priv_get(dev);
2389 	smmu = master->smmu;
2390 
2391 	/*
2392 	 * Checking that SVA is disabled ensures that this device isn't bound to
2393 	 * any mm, and can be safely detached from its old domain. Bonds cannot
2394 	 * be removed concurrently since we're holding the group mutex.
2395 	 */
2396 	if (arm_smmu_master_sva_enabled(master)) {
2397 		dev_err(dev, "cannot attach - SVA enabled\n");
2398 		return -EBUSY;
2399 	}
2400 
2401 	arm_smmu_detach_dev(master);
2402 
2403 	mutex_lock(&smmu_domain->init_mutex);
2404 
2405 	if (!smmu_domain->smmu) {
2406 		smmu_domain->smmu = smmu;
2407 		ret = arm_smmu_domain_finalise(domain);
2408 		if (ret)
2409 			smmu_domain->smmu = NULL;
2410 	} else if (smmu_domain->smmu != smmu)
2411 		ret = -EINVAL;
2412 
2413 	mutex_unlock(&smmu_domain->init_mutex);
2414 	if (ret)
2415 		return ret;
2416 
2417 	master->domain = smmu_domain;
2418 
2419 	/*
2420 	 * The SMMU does not support enabling ATS with bypass. When the STE is
2421 	 * in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests and
2422 	 * Translated transactions are denied as though ATS is disabled for the
2423 	 * stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and
2424 	 * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry).
2425 	 */
2426 	if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
2427 		master->ats_enabled = arm_smmu_ats_supported(master);
2428 
2429 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2430 	list_add(&master->domain_head, &smmu_domain->devices);
2431 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2432 
2433 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
2434 		if (!master->cd_table.cdtab) {
2435 			ret = arm_smmu_alloc_cd_tables(master);
2436 			if (ret) {
2437 				master->domain = NULL;
2438 				goto out_list_del;
2439 			}
2440 		}
2441 
2442 		/*
2443 		 * Prevent SVA from concurrently modifying the CD or writing to
2444 		 * the CD entry
2445 		 */
2446 		mutex_lock(&arm_smmu_asid_lock);
2447 		ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
2448 		mutex_unlock(&arm_smmu_asid_lock);
2449 		if (ret) {
2450 			master->domain = NULL;
2451 			goto out_list_del;
2452 		}
2453 	}
2454 
2455 	arm_smmu_install_ste_for_dev(master);
2456 
2457 	arm_smmu_enable_ats(master);
2458 	return 0;
2459 
2460 out_list_del:
2461 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
2462 	list_del(&master->domain_head);
2463 	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
2464 
2465 	return ret;
2466 }
2467 
2468 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
2469 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
2470 			      int prot, gfp_t gfp, size_t *mapped)
2471 {
2472 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2473 
2474 	if (!ops)
2475 		return -ENODEV;
2476 
2477 	return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
2478 }
2479 
2480 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
2481 				   size_t pgsize, size_t pgcount,
2482 				   struct iommu_iotlb_gather *gather)
2483 {
2484 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2485 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
2486 
2487 	if (!ops)
2488 		return 0;
2489 
2490 	return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
2491 }
2492 
2493 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
2494 {
2495 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2496 
2497 	if (smmu_domain->smmu)
2498 		arm_smmu_tlb_inv_context(smmu_domain);
2499 }
2500 
2501 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
2502 				struct iommu_iotlb_gather *gather)
2503 {
2504 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2505 
2506 	if (!gather->pgsize)
2507 		return;
2508 
2509 	arm_smmu_tlb_inv_range_domain(gather->start,
2510 				      gather->end - gather->start + 1,
2511 				      gather->pgsize, true, smmu_domain);
2512 }
2513 
2514 static phys_addr_t
2515 arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
2516 {
2517 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
2518 
2519 	if (!ops)
2520 		return 0;
2521 
2522 	return ops->iova_to_phys(ops, iova);
2523 }
2524 
2525 static struct platform_driver arm_smmu_driver;
2526 
2527 static
2528 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
2529 {
2530 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
2531 							  fwnode);
2532 	put_device(dev);
2533 	return dev ? dev_get_drvdata(dev) : NULL;
2534 }
2535 
2536 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
2537 {
2538 	unsigned long limit = smmu->strtab_cfg.num_l1_ents;
2539 
2540 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2541 		limit *= 1UL << STRTAB_SPLIT;
2542 
2543 	return sid < limit;
2544 }
2545 
2546 static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
2547 {
2548 	/* Check the SIDs are in range of the SMMU and our stream table */
2549 	if (!arm_smmu_sid_in_range(smmu, sid))
2550 		return -ERANGE;
2551 
2552 	/* Ensure l2 strtab is initialised */
2553 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
2554 		return arm_smmu_init_l2_strtab(smmu, sid);
2555 
2556 	return 0;
2557 }
2558 
2559 static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
2560 				  struct arm_smmu_master *master)
2561 {
2562 	int i;
2563 	int ret = 0;
2564 	struct arm_smmu_stream *new_stream, *cur_stream;
2565 	struct rb_node **new_node, *parent_node = NULL;
2566 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2567 
2568 	master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
2569 				  GFP_KERNEL);
2570 	if (!master->streams)
2571 		return -ENOMEM;
2572 	master->num_streams = fwspec->num_ids;
2573 
2574 	mutex_lock(&smmu->streams_mutex);
2575 	for (i = 0; i < fwspec->num_ids; i++) {
2576 		u32 sid = fwspec->ids[i];
2577 
2578 		new_stream = &master->streams[i];
2579 		new_stream->id = sid;
2580 		new_stream->master = master;
2581 
2582 		ret = arm_smmu_init_sid_strtab(smmu, sid);
2583 		if (ret)
2584 			break;
2585 
2586 		/* Insert into SID tree */
2587 		new_node = &(smmu->streams.rb_node);
2588 		while (*new_node) {
2589 			cur_stream = rb_entry(*new_node, struct arm_smmu_stream,
2590 					      node);
2591 			parent_node = *new_node;
2592 			if (cur_stream->id > new_stream->id) {
2593 				new_node = &((*new_node)->rb_left);
2594 			} else if (cur_stream->id < new_stream->id) {
2595 				new_node = &((*new_node)->rb_right);
2596 			} else {
2597 				dev_warn(master->dev,
2598 					 "stream %u already in tree\n",
2599 					 cur_stream->id);
2600 				ret = -EINVAL;
2601 				break;
2602 			}
2603 		}
2604 		if (ret)
2605 			break;
2606 
2607 		rb_link_node(&new_stream->node, parent_node, new_node);
2608 		rb_insert_color(&new_stream->node, &smmu->streams);
2609 	}
2610 
2611 	if (ret) {
2612 		for (i--; i >= 0; i--)
2613 			rb_erase(&master->streams[i].node, &smmu->streams);
2614 		kfree(master->streams);
2615 	}
2616 	mutex_unlock(&smmu->streams_mutex);
2617 
2618 	return ret;
2619 }
2620 
2621 static void arm_smmu_remove_master(struct arm_smmu_master *master)
2622 {
2623 	int i;
2624 	struct arm_smmu_device *smmu = master->smmu;
2625 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
2626 
2627 	if (!smmu || !master->streams)
2628 		return;
2629 
2630 	mutex_lock(&smmu->streams_mutex);
2631 	for (i = 0; i < fwspec->num_ids; i++)
2632 		rb_erase(&master->streams[i].node, &smmu->streams);
2633 	mutex_unlock(&smmu->streams_mutex);
2634 
2635 	kfree(master->streams);
2636 }
2637 
2638 static struct iommu_ops arm_smmu_ops;
2639 
2640 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
2641 {
2642 	int ret;
2643 	struct arm_smmu_device *smmu;
2644 	struct arm_smmu_master *master;
2645 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
2646 
2647 	if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
2648 		return ERR_PTR(-EBUSY);
2649 
2650 	smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
2651 	if (!smmu)
2652 		return ERR_PTR(-ENODEV);
2653 
2654 	master = kzalloc(sizeof(*master), GFP_KERNEL);
2655 	if (!master)
2656 		return ERR_PTR(-ENOMEM);
2657 
2658 	master->dev = dev;
2659 	master->smmu = smmu;
2660 	INIT_LIST_HEAD(&master->bonds);
2661 	dev_iommu_priv_set(dev, master);
2662 
2663 	ret = arm_smmu_insert_master(smmu, master);
2664 	if (ret)
2665 		goto err_free_master;
2666 
2667 	device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
2668 	master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
2669 
2670 	/*
2671 	 * Note that PASID must be enabled before, and disabled after ATS:
2672 	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
2673 	 *
2674 	 *   Behavior is undefined if this bit is Set and the value of the PASID
2675 	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
2676 	 *   are changed.
2677 	 */
2678 	arm_smmu_enable_pasid(master);
2679 
2680 	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
2681 		master->ssid_bits = min_t(u8, master->ssid_bits,
2682 					  CTXDESC_LINEAR_CDMAX);
2683 
2684 	if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
2685 	     device_property_read_bool(dev, "dma-can-stall")) ||
2686 	    smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
2687 		master->stall_enabled = true;
2688 
2689 	return &smmu->iommu;
2690 
2691 err_free_master:
2692 	kfree(master);
2693 	return ERR_PTR(ret);
2694 }
2695 
2696 static void arm_smmu_release_device(struct device *dev)
2697 {
2698 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2699 
2700 	if (WARN_ON(arm_smmu_master_sva_enabled(master)))
2701 		iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
2702 	arm_smmu_detach_dev(master);
2703 	arm_smmu_disable_pasid(master);
2704 	arm_smmu_remove_master(master);
2705 	if (master->cd_table.cdtab)
2706 		arm_smmu_free_cd_tables(master);
2707 	kfree(master);
2708 }
2709 
2710 static struct iommu_group *arm_smmu_device_group(struct device *dev)
2711 {
2712 	struct iommu_group *group;
2713 
2714 	/*
2715 	 * We don't support devices sharing stream IDs other than PCI RID
2716 	 * aliases, since the necessary ID-to-device lookup becomes rather
2717 	 * impractical given a potential sparse 32-bit stream ID space.
2718 	 */
2719 	if (dev_is_pci(dev))
2720 		group = pci_device_group(dev);
2721 	else
2722 		group = generic_device_group(dev);
2723 
2724 	return group;
2725 }
2726 
2727 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
2728 {
2729 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
2730 	int ret = 0;
2731 
2732 	mutex_lock(&smmu_domain->init_mutex);
2733 	if (smmu_domain->smmu)
2734 		ret = -EPERM;
2735 	else
2736 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
2737 	mutex_unlock(&smmu_domain->init_mutex);
2738 
2739 	return ret;
2740 }
2741 
2742 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
2743 {
2744 	return iommu_fwspec_add_ids(dev, args->args, 1);
2745 }
2746 
2747 static void arm_smmu_get_resv_regions(struct device *dev,
2748 				      struct list_head *head)
2749 {
2750 	struct iommu_resv_region *region;
2751 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
2752 
2753 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
2754 					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
2755 	if (!region)
2756 		return;
2757 
2758 	list_add_tail(&region->list, head);
2759 
2760 	iommu_dma_get_resv_regions(dev, head);
2761 }
2762 
2763 static int arm_smmu_dev_enable_feature(struct device *dev,
2764 				       enum iommu_dev_features feat)
2765 {
2766 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2767 
2768 	if (!master)
2769 		return -ENODEV;
2770 
2771 	switch (feat) {
2772 	case IOMMU_DEV_FEAT_IOPF:
2773 		if (!arm_smmu_master_iopf_supported(master))
2774 			return -EINVAL;
2775 		if (master->iopf_enabled)
2776 			return -EBUSY;
2777 		master->iopf_enabled = true;
2778 		return 0;
2779 	case IOMMU_DEV_FEAT_SVA:
2780 		if (!arm_smmu_master_sva_supported(master))
2781 			return -EINVAL;
2782 		if (arm_smmu_master_sva_enabled(master))
2783 			return -EBUSY;
2784 		return arm_smmu_master_enable_sva(master);
2785 	default:
2786 		return -EINVAL;
2787 	}
2788 }
2789 
2790 static int arm_smmu_dev_disable_feature(struct device *dev,
2791 					enum iommu_dev_features feat)
2792 {
2793 	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
2794 
2795 	if (!master)
2796 		return -EINVAL;
2797 
2798 	switch (feat) {
2799 	case IOMMU_DEV_FEAT_IOPF:
2800 		if (!master->iopf_enabled)
2801 			return -EINVAL;
2802 		if (master->sva_enabled)
2803 			return -EBUSY;
2804 		master->iopf_enabled = false;
2805 		return 0;
2806 	case IOMMU_DEV_FEAT_SVA:
2807 		if (!arm_smmu_master_sva_enabled(master))
2808 			return -EINVAL;
2809 		return arm_smmu_master_disable_sva(master);
2810 	default:
2811 		return -EINVAL;
2812 	}
2813 }
2814 
2815 /*
2816  * HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
2817  * PCIe link and save the data to memory by DMA. The hardware is restricted to
2818  * use identity mapping only.
2819  */
2820 #define IS_HISI_PTT_DEVICE(pdev)	((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
2821 					 (pdev)->device == 0xa12e)
2822 
2823 static int arm_smmu_def_domain_type(struct device *dev)
2824 {
2825 	if (dev_is_pci(dev)) {
2826 		struct pci_dev *pdev = to_pci_dev(dev);
2827 
2828 		if (IS_HISI_PTT_DEVICE(pdev))
2829 			return IOMMU_DOMAIN_IDENTITY;
2830 	}
2831 
2832 	return 0;
2833 }
2834 
2835 static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
2836 {
2837 	struct iommu_domain *domain;
2838 
2839 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
2840 	if (WARN_ON(IS_ERR(domain)) || !domain)
2841 		return;
2842 
2843 	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
2844 }
2845 
2846 static struct iommu_ops arm_smmu_ops = {
2847 	.capable		= arm_smmu_capable,
2848 	.domain_alloc		= arm_smmu_domain_alloc,
2849 	.probe_device		= arm_smmu_probe_device,
2850 	.release_device		= arm_smmu_release_device,
2851 	.device_group		= arm_smmu_device_group,
2852 	.of_xlate		= arm_smmu_of_xlate,
2853 	.get_resv_regions	= arm_smmu_get_resv_regions,
2854 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
2855 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
2856 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
2857 	.page_response		= arm_smmu_page_response,
2858 	.def_domain_type	= arm_smmu_def_domain_type,
2859 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
2860 	.owner			= THIS_MODULE,
2861 	.default_domain_ops = &(const struct iommu_domain_ops) {
2862 		.attach_dev		= arm_smmu_attach_dev,
2863 		.map_pages		= arm_smmu_map_pages,
2864 		.unmap_pages		= arm_smmu_unmap_pages,
2865 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
2866 		.iotlb_sync		= arm_smmu_iotlb_sync,
2867 		.iova_to_phys		= arm_smmu_iova_to_phys,
2868 		.enable_nesting		= arm_smmu_enable_nesting,
2869 		.free			= arm_smmu_domain_free,
2870 	}
2871 };
2872 
2873 /* Probing and initialisation functions */
2874 static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
2875 				   struct arm_smmu_queue *q,
2876 				   void __iomem *page,
2877 				   unsigned long prod_off,
2878 				   unsigned long cons_off,
2879 				   size_t dwords, const char *name)
2880 {
2881 	size_t qsz;
2882 
2883 	do {
2884 		qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
2885 		q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
2886 					      GFP_KERNEL);
2887 		if (q->base || qsz < PAGE_SIZE)
2888 			break;
2889 
2890 		q->llq.max_n_shift--;
2891 	} while (1);
2892 
2893 	if (!q->base) {
2894 		dev_err(smmu->dev,
2895 			"failed to allocate queue (0x%zx bytes) for %s\n",
2896 			qsz, name);
2897 		return -ENOMEM;
2898 	}
2899 
2900 	if (!WARN_ON(q->base_dma & (qsz - 1))) {
2901 		dev_info(smmu->dev, "allocated %u entries for %s\n",
2902 			 1 << q->llq.max_n_shift, name);
2903 	}
2904 
2905 	q->prod_reg	= page + prod_off;
2906 	q->cons_reg	= page + cons_off;
2907 	q->ent_dwords	= dwords;
2908 
2909 	q->q_base  = Q_BASE_RWA;
2910 	q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
2911 	q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
2912 
2913 	q->llq.prod = q->llq.cons = 0;
2914 	return 0;
2915 }
2916 
2917 static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
2918 {
2919 	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
2920 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
2921 
2922 	atomic_set(&cmdq->owner_prod, 0);
2923 	atomic_set(&cmdq->lock, 0);
2924 
2925 	cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
2926 							      GFP_KERNEL);
2927 	if (!cmdq->valid_map)
2928 		return -ENOMEM;
2929 
2930 	return 0;
2931 }
2932 
2933 static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
2934 {
2935 	int ret;
2936 
2937 	/* cmdq */
2938 	ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
2939 				      ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
2940 				      CMDQ_ENT_DWORDS, "cmdq");
2941 	if (ret)
2942 		return ret;
2943 
2944 	ret = arm_smmu_cmdq_init(smmu);
2945 	if (ret)
2946 		return ret;
2947 
2948 	/* evtq */
2949 	ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
2950 				      ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
2951 				      EVTQ_ENT_DWORDS, "evtq");
2952 	if (ret)
2953 		return ret;
2954 
2955 	if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
2956 	    (smmu->features & ARM_SMMU_FEAT_STALLS)) {
2957 		smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
2958 		if (!smmu->evtq.iopf)
2959 			return -ENOMEM;
2960 	}
2961 
2962 	/* priq */
2963 	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
2964 		return 0;
2965 
2966 	return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
2967 				       ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
2968 				       PRIQ_ENT_DWORDS, "priq");
2969 }
2970 
2971 static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
2972 {
2973 	unsigned int i;
2974 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2975 	void *strtab = smmu->strtab_cfg.strtab;
2976 
2977 	cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents,
2978 				    sizeof(*cfg->l1_desc), GFP_KERNEL);
2979 	if (!cfg->l1_desc)
2980 		return -ENOMEM;
2981 
2982 	for (i = 0; i < cfg->num_l1_ents; ++i) {
2983 		arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
2984 		strtab += STRTAB_L1_DESC_DWORDS << 3;
2985 	}
2986 
2987 	return 0;
2988 }
2989 
2990 static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
2991 {
2992 	void *strtab;
2993 	u64 reg;
2994 	u32 size, l1size;
2995 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
2996 
2997 	/* Calculate the L1 size, capped to the SIDSIZE. */
2998 	size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3);
2999 	size = min(size, smmu->sid_bits - STRTAB_SPLIT);
3000 	cfg->num_l1_ents = 1 << size;
3001 
3002 	size += STRTAB_SPLIT;
3003 	if (size < smmu->sid_bits)
3004 		dev_warn(smmu->dev,
3005 			 "2-level strtab only covers %u/%u bits of SID\n",
3006 			 size, smmu->sid_bits);
3007 
3008 	l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3);
3009 	strtab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->strtab_dma,
3010 				     GFP_KERNEL);
3011 	if (!strtab) {
3012 		dev_err(smmu->dev,
3013 			"failed to allocate l1 stream table (%u bytes)\n",
3014 			l1size);
3015 		return -ENOMEM;
3016 	}
3017 	cfg->strtab = strtab;
3018 
3019 	/* Configure strtab_base_cfg for 2 levels */
3020 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_2LVL);
3021 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size);
3022 	reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
3023 	cfg->strtab_base_cfg = reg;
3024 
3025 	return arm_smmu_init_l1_strtab(smmu);
3026 }
3027 
3028 static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
3029 {
3030 	void *strtab;
3031 	u64 reg;
3032 	u32 size;
3033 	struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
3034 
3035 	size = (1 << smmu->sid_bits) * (STRTAB_STE_DWORDS << 3);
3036 	strtab = dmam_alloc_coherent(smmu->dev, size, &cfg->strtab_dma,
3037 				     GFP_KERNEL);
3038 	if (!strtab) {
3039 		dev_err(smmu->dev,
3040 			"failed to allocate linear stream table (%u bytes)\n",
3041 			size);
3042 		return -ENOMEM;
3043 	}
3044 	cfg->strtab = strtab;
3045 	cfg->num_l1_ents = 1 << smmu->sid_bits;
3046 
3047 	/* Configure strtab_base_cfg for a linear table covering all SIDs */
3048 	reg  = FIELD_PREP(STRTAB_BASE_CFG_FMT, STRTAB_BASE_CFG_FMT_LINEAR);
3049 	reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
3050 	cfg->strtab_base_cfg = reg;
3051 
3052 	arm_smmu_init_bypass_stes(strtab, cfg->num_l1_ents, false);
3053 	return 0;
3054 }
3055 
3056 static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
3057 {
3058 	u64 reg;
3059 	int ret;
3060 
3061 	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
3062 		ret = arm_smmu_init_strtab_2lvl(smmu);
3063 	else
3064 		ret = arm_smmu_init_strtab_linear(smmu);
3065 
3066 	if (ret)
3067 		return ret;
3068 
3069 	/* Set the strtab base address */
3070 	reg  = smmu->strtab_cfg.strtab_dma & STRTAB_BASE_ADDR_MASK;
3071 	reg |= STRTAB_BASE_RA;
3072 	smmu->strtab_cfg.strtab_base = reg;
3073 
3074 	ida_init(&smmu->vmid_map);
3075 
3076 	return 0;
3077 }
3078 
3079 static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
3080 {
3081 	int ret;
3082 
3083 	mutex_init(&smmu->streams_mutex);
3084 	smmu->streams = RB_ROOT;
3085 
3086 	ret = arm_smmu_init_queues(smmu);
3087 	if (ret)
3088 		return ret;
3089 
3090 	return arm_smmu_init_strtab(smmu);
3091 }
3092 
3093 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
3094 				   unsigned int reg_off, unsigned int ack_off)
3095 {
3096 	u32 reg;
3097 
3098 	writel_relaxed(val, smmu->base + reg_off);
3099 	return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
3100 					  1, ARM_SMMU_POLL_TIMEOUT_US);
3101 }
3102 
3103 /* GBPA is "special" */
3104 static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
3105 {
3106 	int ret;
3107 	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
3108 
3109 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3110 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3111 	if (ret)
3112 		return ret;
3113 
3114 	reg &= ~clr;
3115 	reg |= set;
3116 	writel_relaxed(reg | GBPA_UPDATE, gbpa);
3117 	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
3118 					 1, ARM_SMMU_POLL_TIMEOUT_US);
3119 
3120 	if (ret)
3121 		dev_err(smmu->dev, "GBPA not responding to update\n");
3122 	return ret;
3123 }
3124 
3125 static void arm_smmu_free_msis(void *data)
3126 {
3127 	struct device *dev = data;
3128 	platform_msi_domain_free_irqs(dev);
3129 }
3130 
3131 static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
3132 {
3133 	phys_addr_t doorbell;
3134 	struct device *dev = msi_desc_to_dev(desc);
3135 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
3136 	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
3137 
3138 	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
3139 	doorbell &= MSI_CFG0_ADDR_MASK;
3140 
3141 	writeq_relaxed(doorbell, smmu->base + cfg[0]);
3142 	writel_relaxed(msg->data, smmu->base + cfg[1]);
3143 	writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
3144 }
3145 
3146 static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
3147 {
3148 	int ret, nvec = ARM_SMMU_MAX_MSIS;
3149 	struct device *dev = smmu->dev;
3150 
3151 	/* Clear the MSI address regs */
3152 	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
3153 	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
3154 
3155 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3156 		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
3157 	else
3158 		nvec--;
3159 
3160 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
3161 		return;
3162 
3163 	if (!dev->msi.domain) {
3164 		dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
3165 		return;
3166 	}
3167 
3168 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
3169 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
3170 	if (ret) {
3171 		dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
3172 		return;
3173 	}
3174 
3175 	smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
3176 	smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
3177 	smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
3178 
3179 	/* Add callback to free MSIs on teardown */
3180 	devm_add_action(dev, arm_smmu_free_msis, dev);
3181 }
3182 
3183 static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
3184 {
3185 	int irq, ret;
3186 
3187 	arm_smmu_setup_msis(smmu);
3188 
3189 	/* Request interrupt lines */
3190 	irq = smmu->evtq.q.irq;
3191 	if (irq) {
3192 		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3193 						arm_smmu_evtq_thread,
3194 						IRQF_ONESHOT,
3195 						"arm-smmu-v3-evtq", smmu);
3196 		if (ret < 0)
3197 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
3198 	} else {
3199 		dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
3200 	}
3201 
3202 	irq = smmu->gerr_irq;
3203 	if (irq) {
3204 		ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
3205 				       0, "arm-smmu-v3-gerror", smmu);
3206 		if (ret < 0)
3207 			dev_warn(smmu->dev, "failed to enable gerror irq\n");
3208 	} else {
3209 		dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
3210 	}
3211 
3212 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3213 		irq = smmu->priq.q.irq;
3214 		if (irq) {
3215 			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
3216 							arm_smmu_priq_thread,
3217 							IRQF_ONESHOT,
3218 							"arm-smmu-v3-priq",
3219 							smmu);
3220 			if (ret < 0)
3221 				dev_warn(smmu->dev,
3222 					 "failed to enable priq irq\n");
3223 		} else {
3224 			dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
3225 		}
3226 	}
3227 }
3228 
3229 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
3230 {
3231 	int ret, irq;
3232 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
3233 
3234 	/* Disable IRQs first */
3235 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
3236 				      ARM_SMMU_IRQ_CTRLACK);
3237 	if (ret) {
3238 		dev_err(smmu->dev, "failed to disable irqs\n");
3239 		return ret;
3240 	}
3241 
3242 	irq = smmu->combined_irq;
3243 	if (irq) {
3244 		/*
3245 		 * Cavium ThunderX2 implementation doesn't support unique irq
3246 		 * lines. Use a single irq line for all the SMMUv3 interrupts.
3247 		 */
3248 		ret = devm_request_threaded_irq(smmu->dev, irq,
3249 					arm_smmu_combined_irq_handler,
3250 					arm_smmu_combined_irq_thread,
3251 					IRQF_ONESHOT,
3252 					"arm-smmu-v3-combined-irq", smmu);
3253 		if (ret < 0)
3254 			dev_warn(smmu->dev, "failed to enable combined irq\n");
3255 	} else
3256 		arm_smmu_setup_unique_irqs(smmu);
3257 
3258 	if (smmu->features & ARM_SMMU_FEAT_PRI)
3259 		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
3260 
3261 	/* Enable interrupt generation on the SMMU */
3262 	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
3263 				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
3264 	if (ret)
3265 		dev_warn(smmu->dev, "failed to enable irqs\n");
3266 
3267 	return 0;
3268 }
3269 
3270 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
3271 {
3272 	int ret;
3273 
3274 	ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
3275 	if (ret)
3276 		dev_err(smmu->dev, "failed to clear cr0\n");
3277 
3278 	return ret;
3279 }
3280 
3281 static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
3282 {
3283 	int ret;
3284 	u32 reg, enables;
3285 	struct arm_smmu_cmdq_ent cmd;
3286 
3287 	/* Clear CR0 and sync (disables SMMU and queue processing) */
3288 	reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
3289 	if (reg & CR0_SMMUEN) {
3290 		dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
3291 		WARN_ON(is_kdump_kernel() && !disable_bypass);
3292 		arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
3293 	}
3294 
3295 	ret = arm_smmu_device_disable(smmu);
3296 	if (ret)
3297 		return ret;
3298 
3299 	/* CR1 (table and queue memory attributes) */
3300 	reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
3301 	      FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
3302 	      FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
3303 	      FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
3304 	      FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
3305 	      FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
3306 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
3307 
3308 	/* CR2 (random crap) */
3309 	reg = CR2_PTM | CR2_RECINVSID;
3310 
3311 	if (smmu->features & ARM_SMMU_FEAT_E2H)
3312 		reg |= CR2_E2H;
3313 
3314 	writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
3315 
3316 	/* Stream table */
3317 	writeq_relaxed(smmu->strtab_cfg.strtab_base,
3318 		       smmu->base + ARM_SMMU_STRTAB_BASE);
3319 	writel_relaxed(smmu->strtab_cfg.strtab_base_cfg,
3320 		       smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
3321 
3322 	/* Command queue */
3323 	writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
3324 	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
3325 	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
3326 
3327 	enables = CR0_CMDQEN;
3328 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3329 				      ARM_SMMU_CR0ACK);
3330 	if (ret) {
3331 		dev_err(smmu->dev, "failed to enable command queue\n");
3332 		return ret;
3333 	}
3334 
3335 	/* Invalidate any cached configuration */
3336 	cmd.opcode = CMDQ_OP_CFGI_ALL;
3337 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3338 
3339 	/* Invalidate any stale TLB entries */
3340 	if (smmu->features & ARM_SMMU_FEAT_HYP) {
3341 		cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
3342 		arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3343 	}
3344 
3345 	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
3346 	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
3347 
3348 	/* Event queue */
3349 	writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
3350 	writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
3351 	writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
3352 
3353 	enables |= CR0_EVTQEN;
3354 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3355 				      ARM_SMMU_CR0ACK);
3356 	if (ret) {
3357 		dev_err(smmu->dev, "failed to enable event queue\n");
3358 		return ret;
3359 	}
3360 
3361 	/* PRI queue */
3362 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
3363 		writeq_relaxed(smmu->priq.q.q_base,
3364 			       smmu->base + ARM_SMMU_PRIQ_BASE);
3365 		writel_relaxed(smmu->priq.q.llq.prod,
3366 			       smmu->page1 + ARM_SMMU_PRIQ_PROD);
3367 		writel_relaxed(smmu->priq.q.llq.cons,
3368 			       smmu->page1 + ARM_SMMU_PRIQ_CONS);
3369 
3370 		enables |= CR0_PRIQEN;
3371 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3372 					      ARM_SMMU_CR0ACK);
3373 		if (ret) {
3374 			dev_err(smmu->dev, "failed to enable PRI queue\n");
3375 			return ret;
3376 		}
3377 	}
3378 
3379 	if (smmu->features & ARM_SMMU_FEAT_ATS) {
3380 		enables |= CR0_ATSCHK;
3381 		ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3382 					      ARM_SMMU_CR0ACK);
3383 		if (ret) {
3384 			dev_err(smmu->dev, "failed to enable ATS check\n");
3385 			return ret;
3386 		}
3387 	}
3388 
3389 	ret = arm_smmu_setup_irqs(smmu);
3390 	if (ret) {
3391 		dev_err(smmu->dev, "failed to setup irqs\n");
3392 		return ret;
3393 	}
3394 
3395 	if (is_kdump_kernel())
3396 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
3397 
3398 	/* Enable the SMMU interface, or ensure bypass */
3399 	if (!bypass || disable_bypass) {
3400 		enables |= CR0_SMMUEN;
3401 	} else {
3402 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
3403 		if (ret)
3404 			return ret;
3405 	}
3406 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
3407 				      ARM_SMMU_CR0ACK);
3408 	if (ret) {
3409 		dev_err(smmu->dev, "failed to enable SMMU interface\n");
3410 		return ret;
3411 	}
3412 
3413 	return 0;
3414 }
3415 
3416 #define IIDR_IMPLEMENTER_ARM		0x43b
3417 #define IIDR_PRODUCTID_ARM_MMU_600	0x483
3418 #define IIDR_PRODUCTID_ARM_MMU_700	0x487
3419 
3420 static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
3421 {
3422 	u32 reg;
3423 	unsigned int implementer, productid, variant, revision;
3424 
3425 	reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
3426 	implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
3427 	productid = FIELD_GET(IIDR_PRODUCTID, reg);
3428 	variant = FIELD_GET(IIDR_VARIANT, reg);
3429 	revision = FIELD_GET(IIDR_REVISION, reg);
3430 
3431 	switch (implementer) {
3432 	case IIDR_IMPLEMENTER_ARM:
3433 		switch (productid) {
3434 		case IIDR_PRODUCTID_ARM_MMU_600:
3435 			/* Arm erratum 1076982 */
3436 			if (variant == 0 && revision <= 2)
3437 				smmu->features &= ~ARM_SMMU_FEAT_SEV;
3438 			/* Arm erratum 1209401 */
3439 			if (variant < 2)
3440 				smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3441 			break;
3442 		case IIDR_PRODUCTID_ARM_MMU_700:
3443 			/* Arm erratum 2812531 */
3444 			smmu->features &= ~ARM_SMMU_FEAT_BTM;
3445 			smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
3446 			/* Arm errata 2268618, 2812531 */
3447 			smmu->features &= ~ARM_SMMU_FEAT_NESTING;
3448 			break;
3449 		}
3450 		break;
3451 	}
3452 }
3453 
3454 static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
3455 {
3456 	u32 reg;
3457 	bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
3458 
3459 	/* IDR0 */
3460 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
3461 
3462 	/* 2-level structures */
3463 	if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
3464 		smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
3465 
3466 	if (reg & IDR0_CD2L)
3467 		smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
3468 
3469 	/*
3470 	 * Translation table endianness.
3471 	 * We currently require the same endianness as the CPU, but this
3472 	 * could be changed later by adding a new IO_PGTABLE_QUIRK.
3473 	 */
3474 	switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
3475 	case IDR0_TTENDIAN_MIXED:
3476 		smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
3477 		break;
3478 #ifdef __BIG_ENDIAN
3479 	case IDR0_TTENDIAN_BE:
3480 		smmu->features |= ARM_SMMU_FEAT_TT_BE;
3481 		break;
3482 #else
3483 	case IDR0_TTENDIAN_LE:
3484 		smmu->features |= ARM_SMMU_FEAT_TT_LE;
3485 		break;
3486 #endif
3487 	default:
3488 		dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
3489 		return -ENXIO;
3490 	}
3491 
3492 	/* Boolean feature flags */
3493 	if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
3494 		smmu->features |= ARM_SMMU_FEAT_PRI;
3495 
3496 	if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
3497 		smmu->features |= ARM_SMMU_FEAT_ATS;
3498 
3499 	if (reg & IDR0_SEV)
3500 		smmu->features |= ARM_SMMU_FEAT_SEV;
3501 
3502 	if (reg & IDR0_MSI) {
3503 		smmu->features |= ARM_SMMU_FEAT_MSI;
3504 		if (coherent && !disable_msipolling)
3505 			smmu->options |= ARM_SMMU_OPT_MSIPOLL;
3506 	}
3507 
3508 	if (reg & IDR0_HYP) {
3509 		smmu->features |= ARM_SMMU_FEAT_HYP;
3510 		if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
3511 			smmu->features |= ARM_SMMU_FEAT_E2H;
3512 	}
3513 
3514 	/*
3515 	 * The coherency feature as set by FW is used in preference to the ID
3516 	 * register, but warn on mismatch.
3517 	 */
3518 	if (!!(reg & IDR0_COHACC) != coherent)
3519 		dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
3520 			 coherent ? "true" : "false");
3521 
3522 	switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
3523 	case IDR0_STALL_MODEL_FORCE:
3524 		smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
3525 		fallthrough;
3526 	case IDR0_STALL_MODEL_STALL:
3527 		smmu->features |= ARM_SMMU_FEAT_STALLS;
3528 	}
3529 
3530 	if (reg & IDR0_S1P)
3531 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
3532 
3533 	if (reg & IDR0_S2P)
3534 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
3535 
3536 	if (!(reg & (IDR0_S1P | IDR0_S2P))) {
3537 		dev_err(smmu->dev, "no translation support!\n");
3538 		return -ENXIO;
3539 	}
3540 
3541 	/* We only support the AArch64 table format at present */
3542 	switch (FIELD_GET(IDR0_TTF, reg)) {
3543 	case IDR0_TTF_AARCH32_64:
3544 		smmu->ias = 40;
3545 		fallthrough;
3546 	case IDR0_TTF_AARCH64:
3547 		break;
3548 	default:
3549 		dev_err(smmu->dev, "AArch64 table format not supported!\n");
3550 		return -ENXIO;
3551 	}
3552 
3553 	/* ASID/VMID sizes */
3554 	smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
3555 	smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
3556 
3557 	/* IDR1 */
3558 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
3559 	if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
3560 		dev_err(smmu->dev, "embedded implementation not supported\n");
3561 		return -ENXIO;
3562 	}
3563 
3564 	/* Queue sizes, capped to ensure natural alignment */
3565 	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
3566 					     FIELD_GET(IDR1_CMDQS, reg));
3567 	if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
3568 		/*
3569 		 * We don't support splitting up batches, so one batch of
3570 		 * commands plus an extra sync needs to fit inside the command
3571 		 * queue. There's also no way we can handle the weird alignment
3572 		 * restrictions on the base pointer for a unit-length queue.
3573 		 */
3574 		dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
3575 			CMDQ_BATCH_ENTRIES);
3576 		return -ENXIO;
3577 	}
3578 
3579 	smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
3580 					     FIELD_GET(IDR1_EVTQS, reg));
3581 	smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
3582 					     FIELD_GET(IDR1_PRIQS, reg));
3583 
3584 	/* SID/SSID sizes */
3585 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
3586 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
3587 	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
3588 
3589 	/*
3590 	 * If the SMMU supports fewer bits than would fill a single L2 stream
3591 	 * table, use a linear table instead.
3592 	 */
3593 	if (smmu->sid_bits <= STRTAB_SPLIT)
3594 		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
3595 
3596 	/* IDR3 */
3597 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
3598 	if (FIELD_GET(IDR3_RIL, reg))
3599 		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
3600 
3601 	/* IDR5 */
3602 	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
3603 
3604 	/* Maximum number of outstanding stalls */
3605 	smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
3606 
3607 	/* Page sizes */
3608 	if (reg & IDR5_GRAN64K)
3609 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
3610 	if (reg & IDR5_GRAN16K)
3611 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
3612 	if (reg & IDR5_GRAN4K)
3613 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
3614 
3615 	/* Input address size */
3616 	if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
3617 		smmu->features |= ARM_SMMU_FEAT_VAX;
3618 
3619 	/* Output address size */
3620 	switch (FIELD_GET(IDR5_OAS, reg)) {
3621 	case IDR5_OAS_32_BIT:
3622 		smmu->oas = 32;
3623 		break;
3624 	case IDR5_OAS_36_BIT:
3625 		smmu->oas = 36;
3626 		break;
3627 	case IDR5_OAS_40_BIT:
3628 		smmu->oas = 40;
3629 		break;
3630 	case IDR5_OAS_42_BIT:
3631 		smmu->oas = 42;
3632 		break;
3633 	case IDR5_OAS_44_BIT:
3634 		smmu->oas = 44;
3635 		break;
3636 	case IDR5_OAS_52_BIT:
3637 		smmu->oas = 52;
3638 		smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
3639 		break;
3640 	default:
3641 		dev_info(smmu->dev,
3642 			"unknown output address size. Truncating to 48-bit\n");
3643 		fallthrough;
3644 	case IDR5_OAS_48_BIT:
3645 		smmu->oas = 48;
3646 	}
3647 
3648 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
3649 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
3650 	else
3651 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
3652 
3653 	/* Set the DMA mask for our table walker */
3654 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
3655 		dev_warn(smmu->dev,
3656 			 "failed to set DMA mask for table walker\n");
3657 
3658 	smmu->ias = max(smmu->ias, smmu->oas);
3659 
3660 	if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
3661 	    (smmu->features & ARM_SMMU_FEAT_TRANS_S2))
3662 		smmu->features |= ARM_SMMU_FEAT_NESTING;
3663 
3664 	arm_smmu_device_iidr_probe(smmu);
3665 
3666 	if (arm_smmu_sva_supported(smmu))
3667 		smmu->features |= ARM_SMMU_FEAT_SVA;
3668 
3669 	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
3670 		 smmu->ias, smmu->oas, smmu->features);
3671 	return 0;
3672 }
3673 
3674 #ifdef CONFIG_ACPI
3675 static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
3676 {
3677 	switch (model) {
3678 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
3679 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
3680 		break;
3681 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
3682 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
3683 		break;
3684 	}
3685 
3686 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
3687 }
3688 
3689 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3690 				      struct arm_smmu_device *smmu)
3691 {
3692 	struct acpi_iort_smmu_v3 *iort_smmu;
3693 	struct device *dev = smmu->dev;
3694 	struct acpi_iort_node *node;
3695 
3696 	node = *(struct acpi_iort_node **)dev_get_platdata(dev);
3697 
3698 	/* Retrieve SMMUv3 specific data */
3699 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
3700 
3701 	acpi_smmu_get_options(iort_smmu->model, smmu);
3702 
3703 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
3704 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3705 
3706 	return 0;
3707 }
3708 #else
3709 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
3710 					     struct arm_smmu_device *smmu)
3711 {
3712 	return -ENODEV;
3713 }
3714 #endif
3715 
3716 static int arm_smmu_device_dt_probe(struct platform_device *pdev,
3717 				    struct arm_smmu_device *smmu)
3718 {
3719 	struct device *dev = &pdev->dev;
3720 	u32 cells;
3721 	int ret = -EINVAL;
3722 
3723 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
3724 		dev_err(dev, "missing #iommu-cells property\n");
3725 	else if (cells != 1)
3726 		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
3727 	else
3728 		ret = 0;
3729 
3730 	parse_driver_options(smmu);
3731 
3732 	if (of_dma_is_coherent(dev->of_node))
3733 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;
3734 
3735 	return ret;
3736 }
3737 
3738 static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
3739 {
3740 	if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
3741 		return SZ_64K;
3742 	else
3743 		return SZ_128K;
3744 }
3745 
3746 static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
3747 				      resource_size_t size)
3748 {
3749 	struct resource res = DEFINE_RES_MEM(start, size);
3750 
3751 	return devm_ioremap_resource(dev, &res);
3752 }
3753 
3754 static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
3755 {
3756 	struct list_head rmr_list;
3757 	struct iommu_resv_region *e;
3758 
3759 	INIT_LIST_HEAD(&rmr_list);
3760 	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3761 
3762 	list_for_each_entry(e, &rmr_list, list) {
3763 		struct arm_smmu_ste *step;
3764 		struct iommu_iort_rmr_data *rmr;
3765 		int ret, i;
3766 
3767 		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
3768 		for (i = 0; i < rmr->num_sids; i++) {
3769 			ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
3770 			if (ret) {
3771 				dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
3772 					rmr->sids[i]);
3773 				continue;
3774 			}
3775 
3776 			step = arm_smmu_get_step_for_sid(smmu, rmr->sids[i]);
3777 			arm_smmu_init_bypass_stes(step, 1, true);
3778 		}
3779 	}
3780 
3781 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
3782 }
3783 
3784 static int arm_smmu_device_probe(struct platform_device *pdev)
3785 {
3786 	int irq, ret;
3787 	struct resource *res;
3788 	resource_size_t ioaddr;
3789 	struct arm_smmu_device *smmu;
3790 	struct device *dev = &pdev->dev;
3791 	bool bypass;
3792 
3793 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
3794 	if (!smmu)
3795 		return -ENOMEM;
3796 	smmu->dev = dev;
3797 
3798 	if (dev->of_node) {
3799 		ret = arm_smmu_device_dt_probe(pdev, smmu);
3800 	} else {
3801 		ret = arm_smmu_device_acpi_probe(pdev, smmu);
3802 		if (ret == -ENODEV)
3803 			return ret;
3804 	}
3805 
3806 	/* Set bypass mode according to firmware probing result */
3807 	bypass = !!ret;
3808 
3809 	/* Base address */
3810 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
3811 	if (!res)
3812 		return -EINVAL;
3813 	if (resource_size(res) < arm_smmu_resource_size(smmu)) {
3814 		dev_err(dev, "MMIO region too small (%pr)\n", res);
3815 		return -EINVAL;
3816 	}
3817 	ioaddr = res->start;
3818 
3819 	/*
3820 	 * Don't map the IMPLEMENTATION DEFINED regions, since they may contain
3821 	 * the PMCG registers which are reserved by the PMU driver.
3822 	 */
3823 	smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
3824 	if (IS_ERR(smmu->base))
3825 		return PTR_ERR(smmu->base);
3826 
3827 	if (arm_smmu_resource_size(smmu) > SZ_64K) {
3828 		smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
3829 					       ARM_SMMU_REG_SZ);
3830 		if (IS_ERR(smmu->page1))
3831 			return PTR_ERR(smmu->page1);
3832 	} else {
3833 		smmu->page1 = smmu->base;
3834 	}
3835 
3836 	/* Interrupt lines */
3837 
3838 	irq = platform_get_irq_byname_optional(pdev, "combined");
3839 	if (irq > 0)
3840 		smmu->combined_irq = irq;
3841 	else {
3842 		irq = platform_get_irq_byname_optional(pdev, "eventq");
3843 		if (irq > 0)
3844 			smmu->evtq.q.irq = irq;
3845 
3846 		irq = platform_get_irq_byname_optional(pdev, "priq");
3847 		if (irq > 0)
3848 			smmu->priq.q.irq = irq;
3849 
3850 		irq = platform_get_irq_byname_optional(pdev, "gerror");
3851 		if (irq > 0)
3852 			smmu->gerr_irq = irq;
3853 	}
3854 	/* Probe the h/w */
3855 	ret = arm_smmu_device_hw_probe(smmu);
3856 	if (ret)
3857 		return ret;
3858 
3859 	/* Initialise in-memory data structures */
3860 	ret = arm_smmu_init_structures(smmu);
3861 	if (ret)
3862 		return ret;
3863 
3864 	/* Record our private device structure */
3865 	platform_set_drvdata(pdev, smmu);
3866 
3867 	/* Check for RMRs and install bypass STEs if any */
3868 	arm_smmu_rmr_install_bypass_ste(smmu);
3869 
3870 	/* Reset the device */
3871 	ret = arm_smmu_device_reset(smmu, bypass);
3872 	if (ret)
3873 		return ret;
3874 
3875 	/* And we're up. Go go go! */
3876 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
3877 				     "smmu3.%pa", &ioaddr);
3878 	if (ret)
3879 		return ret;
3880 
3881 	ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
3882 	if (ret) {
3883 		dev_err(dev, "Failed to register iommu\n");
3884 		iommu_device_sysfs_remove(&smmu->iommu);
3885 		return ret;
3886 	}
3887 
3888 	return 0;
3889 }
3890 
3891 static void arm_smmu_device_remove(struct platform_device *pdev)
3892 {
3893 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3894 
3895 	iommu_device_unregister(&smmu->iommu);
3896 	iommu_device_sysfs_remove(&smmu->iommu);
3897 	arm_smmu_device_disable(smmu);
3898 	iopf_queue_free(smmu->evtq.iopf);
3899 	ida_destroy(&smmu->vmid_map);
3900 }
3901 
3902 static void arm_smmu_device_shutdown(struct platform_device *pdev)
3903 {
3904 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
3905 
3906 	arm_smmu_device_disable(smmu);
3907 }
3908 
3909 static const struct of_device_id arm_smmu_of_match[] = {
3910 	{ .compatible = "arm,smmu-v3", },
3911 	{ },
3912 };
3913 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
3914 
3915 static void arm_smmu_driver_unregister(struct platform_driver *drv)
3916 {
3917 	arm_smmu_sva_notifier_synchronize();
3918 	platform_driver_unregister(drv);
3919 }
3920 
3921 static struct platform_driver arm_smmu_driver = {
3922 	.driver	= {
3923 		.name			= "arm-smmu-v3",
3924 		.of_match_table		= arm_smmu_of_match,
3925 		.suppress_bind_attrs	= true,
3926 	},
3927 	.probe	= arm_smmu_device_probe,
3928 	.remove_new = arm_smmu_device_remove,
3929 	.shutdown = arm_smmu_device_shutdown,
3930 };
3931 module_driver(arm_smmu_driver, platform_driver_register,
3932 	      arm_smmu_driver_unregister);
3933 
3934 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
3935 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
3936 MODULE_ALIAS("platform:arm-smmu-v3");
3937 MODULE_LICENSE("GPL v2");
3938