xref: /linux/drivers/iommu/arm/arm-smmu/arm-smmu.c (revision 908fc4c2)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * IOMMU API for ARM architected SMMU implementations.
4  *
5  * Copyright (C) 2013 ARM Limited
6  *
7  * Author: Will Deacon <will.deacon@arm.com>
8  *
9  * This driver currently supports:
10  *	- SMMUv1 and v2 implementations
11  *	- Stream-matching and stream-indexing
12  *	- v7/v8 long-descriptor format
13  *	- Non-secure access to the SMMU
14  *	- Context fault reporting
15  *	- Extended Stream ID (16 bit)
16  */
17 
18 #define pr_fmt(fmt) "arm-smmu: " fmt
19 
20 #include <linux/acpi.h>
21 #include <linux/acpi_iort.h>
22 #include <linux/bitfield.h>
23 #include <linux/delay.h>
24 #include <linux/dma-iommu.h>
25 #include <linux/dma-mapping.h>
26 #include <linux/err.h>
27 #include <linux/interrupt.h>
28 #include <linux/io.h>
29 #include <linux/iopoll.h>
30 #include <linux/module.h>
31 #include <linux/of.h>
32 #include <linux/of_address.h>
33 #include <linux/of_device.h>
34 #include <linux/pci.h>
35 #include <linux/platform_device.h>
36 #include <linux/pm_runtime.h>
37 #include <linux/ratelimit.h>
38 #include <linux/slab.h>
39 
40 #include <linux/amba/bus.h>
41 #include <linux/fsl/mc.h>
42 
43 #include "arm-smmu.h"
44 
45 /*
46  * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
47  * global register space are still, in fact, using a hypervisor to mediate it
48  * by trapping and emulating register accesses. Sadly, some deployed versions
49  * of said trapping code have bugs wherein they go horribly wrong for stores
50  * using r31 (i.e. XZR/WZR) as the source register.
51  */
52 #define QCOM_DUMMY_VAL -1
53 
54 #define MSI_IOVA_BASE			0x8000000
55 #define MSI_IOVA_LENGTH			0x100000
56 
57 static int force_stage;
58 module_param(force_stage, int, S_IRUGO);
59 MODULE_PARM_DESC(force_stage,
60 	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
61 static bool disable_bypass =
62 	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
63 module_param(disable_bypass, bool, S_IRUGO);
64 MODULE_PARM_DESC(disable_bypass,
65 	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
66 
67 #define s2cr_init_val (struct arm_smmu_s2cr){				\
68 	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
69 }
70 
71 static bool using_legacy_binding, using_generic_binding;
72 
73 static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
74 {
75 	if (pm_runtime_enabled(smmu->dev))
76 		return pm_runtime_resume_and_get(smmu->dev);
77 
78 	return 0;
79 }
80 
81 static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
82 {
83 	if (pm_runtime_enabled(smmu->dev))
84 		pm_runtime_put_autosuspend(smmu->dev);
85 }
86 
87 static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
88 {
89 	return container_of(dom, struct arm_smmu_domain, domain);
90 }
91 
92 static struct platform_driver arm_smmu_driver;
93 static struct iommu_ops arm_smmu_ops;
94 
95 #ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
96 static int arm_smmu_bus_init(struct iommu_ops *ops);
97 
98 static struct device_node *dev_get_dev_node(struct device *dev)
99 {
100 	if (dev_is_pci(dev)) {
101 		struct pci_bus *bus = to_pci_dev(dev)->bus;
102 
103 		while (!pci_is_root_bus(bus))
104 			bus = bus->parent;
105 		return of_node_get(bus->bridge->parent->of_node);
106 	}
107 
108 	return of_node_get(dev->of_node);
109 }
110 
111 static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
112 {
113 	*((__be32 *)data) = cpu_to_be32(alias);
114 	return 0; /* Continue walking */
115 }
116 
117 static int __find_legacy_master_phandle(struct device *dev, void *data)
118 {
119 	struct of_phandle_iterator *it = *(void **)data;
120 	struct device_node *np = it->node;
121 	int err;
122 
123 	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
124 			    "#stream-id-cells", -1)
125 		if (it->node == np) {
126 			*(void **)data = dev;
127 			return 1;
128 		}
129 	it->node = np;
130 	return err == -ENOENT ? 0 : err;
131 }
132 
133 static int arm_smmu_register_legacy_master(struct device *dev,
134 					   struct arm_smmu_device **smmu)
135 {
136 	struct device *smmu_dev;
137 	struct device_node *np;
138 	struct of_phandle_iterator it;
139 	void *data = &it;
140 	u32 *sids;
141 	__be32 pci_sid;
142 	int err;
143 
144 	np = dev_get_dev_node(dev);
145 	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
146 		of_node_put(np);
147 		return -ENODEV;
148 	}
149 
150 	it.node = np;
151 	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
152 				     __find_legacy_master_phandle);
153 	smmu_dev = data;
154 	of_node_put(np);
155 	if (err == 0)
156 		return -ENODEV;
157 	if (err < 0)
158 		return err;
159 
160 	if (dev_is_pci(dev)) {
161 		/* "mmu-masters" assumes Stream ID == Requester ID */
162 		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
163 				       &pci_sid);
164 		it.cur = &pci_sid;
165 		it.cur_count = 1;
166 	}
167 
168 	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
169 				&arm_smmu_ops);
170 	if (err)
171 		return err;
172 
173 	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
174 	if (!sids)
175 		return -ENOMEM;
176 
177 	*smmu = dev_get_drvdata(smmu_dev);
178 	of_phandle_iterator_args(&it, sids, it.cur_count);
179 	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
180 	kfree(sids);
181 	return err;
182 }
183 
184 /*
185  * With the legacy DT binding in play, we have no guarantees about
186  * probe order, but then we're also not doing default domains, so we can
187  * delay setting bus ops until we're sure every possible SMMU is ready,
188  * and that way ensure that no probe_device() calls get missed.
189  */
190 static int arm_smmu_legacy_bus_init(void)
191 {
192 	if (using_legacy_binding)
193 		return arm_smmu_bus_init(&arm_smmu_ops);
194 	return 0;
195 }
196 device_initcall_sync(arm_smmu_legacy_bus_init);
197 #else
198 static int arm_smmu_register_legacy_master(struct device *dev,
199 					   struct arm_smmu_device **smmu)
200 {
201 	return -ENODEV;
202 }
203 #endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */
204 
205 static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
206 {
207 	clear_bit(idx, map);
208 }
209 
210 /* Wait for any pending TLB invalidations to complete */
211 static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
212 				int sync, int status)
213 {
214 	unsigned int spin_cnt, delay;
215 	u32 reg;
216 
217 	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
218 		return smmu->impl->tlb_sync(smmu, page, sync, status);
219 
220 	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
221 	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
222 		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
223 			reg = arm_smmu_readl(smmu, page, status);
224 			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
225 				return;
226 			cpu_relax();
227 		}
228 		udelay(delay);
229 	}
230 	dev_err_ratelimited(smmu->dev,
231 			    "TLB sync timed out -- SMMU may be deadlocked\n");
232 }
233 
234 static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
235 {
236 	unsigned long flags;
237 
238 	spin_lock_irqsave(&smmu->global_sync_lock, flags);
239 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
240 			    ARM_SMMU_GR0_sTLBGSTATUS);
241 	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
242 }
243 
244 static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
245 {
246 	struct arm_smmu_device *smmu = smmu_domain->smmu;
247 	unsigned long flags;
248 
249 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
250 	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
251 			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
252 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
253 }
254 
255 static void arm_smmu_tlb_inv_context_s1(void *cookie)
256 {
257 	struct arm_smmu_domain *smmu_domain = cookie;
258 	/*
259 	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
260 	 * current CPU are visible beforehand.
261 	 */
262 	wmb();
263 	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
264 			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
265 	arm_smmu_tlb_sync_context(smmu_domain);
266 }
267 
268 static void arm_smmu_tlb_inv_context_s2(void *cookie)
269 {
270 	struct arm_smmu_domain *smmu_domain = cookie;
271 	struct arm_smmu_device *smmu = smmu_domain->smmu;
272 
273 	/* See above */
274 	wmb();
275 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
276 	arm_smmu_tlb_sync_global(smmu);
277 }
278 
279 static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
280 				      size_t granule, void *cookie, int reg)
281 {
282 	struct arm_smmu_domain *smmu_domain = cookie;
283 	struct arm_smmu_device *smmu = smmu_domain->smmu;
284 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
285 	int idx = cfg->cbndx;
286 
287 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
288 		wmb();
289 
290 	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
291 		iova = (iova >> 12) << 12;
292 		iova |= cfg->asid;
293 		do {
294 			arm_smmu_cb_write(smmu, idx, reg, iova);
295 			iova += granule;
296 		} while (size -= granule);
297 	} else {
298 		iova >>= 12;
299 		iova |= (u64)cfg->asid << 48;
300 		do {
301 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
302 			iova += granule >> 12;
303 		} while (size -= granule);
304 	}
305 }
306 
307 static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
308 				      size_t granule, void *cookie, int reg)
309 {
310 	struct arm_smmu_domain *smmu_domain = cookie;
311 	struct arm_smmu_device *smmu = smmu_domain->smmu;
312 	int idx = smmu_domain->cfg.cbndx;
313 
314 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
315 		wmb();
316 
317 	iova >>= 12;
318 	do {
319 		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
320 			arm_smmu_cb_writeq(smmu, idx, reg, iova);
321 		else
322 			arm_smmu_cb_write(smmu, idx, reg, iova);
323 		iova += granule >> 12;
324 	} while (size -= granule);
325 }
326 
327 static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
328 				     size_t granule, void *cookie)
329 {
330 	struct arm_smmu_domain *smmu_domain = cookie;
331 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
332 
333 	if (cfg->flush_walk_prefer_tlbiasid) {
334 		arm_smmu_tlb_inv_context_s1(cookie);
335 	} else {
336 		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
337 					  ARM_SMMU_CB_S1_TLBIVA);
338 		arm_smmu_tlb_sync_context(cookie);
339 	}
340 }
341 
342 static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
343 				     unsigned long iova, size_t granule,
344 				     void *cookie)
345 {
346 	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
347 				  ARM_SMMU_CB_S1_TLBIVAL);
348 }
349 
350 static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
351 				     size_t granule, void *cookie)
352 {
353 	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
354 				  ARM_SMMU_CB_S2_TLBIIPAS2);
355 	arm_smmu_tlb_sync_context(cookie);
356 }
357 
358 static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
359 				     unsigned long iova, size_t granule,
360 				     void *cookie)
361 {
362 	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
363 				  ARM_SMMU_CB_S2_TLBIIPAS2L);
364 }
365 
366 static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
367 					size_t granule, void *cookie)
368 {
369 	arm_smmu_tlb_inv_context_s2(cookie);
370 }
371 /*
372  * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
373  * almost negligible, but the benefit of getting the first one in as far ahead
374  * of the sync as possible is significant, hence we don't just make this a
375  * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
376  * think.
377  */
378 static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
379 					unsigned long iova, size_t granule,
380 					void *cookie)
381 {
382 	struct arm_smmu_domain *smmu_domain = cookie;
383 	struct arm_smmu_device *smmu = smmu_domain->smmu;
384 
385 	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
386 		wmb();
387 
388 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
389 }
390 
391 static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
392 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
393 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
394 	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
395 };
396 
397 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
398 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
399 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
400 	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
401 };
402 
403 static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
404 	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
405 	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
406 	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
407 };
408 
409 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
410 {
411 	u32 fsr, fsynr, cbfrsynra;
412 	unsigned long iova;
413 	struct iommu_domain *domain = dev;
414 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
415 	struct arm_smmu_device *smmu = smmu_domain->smmu;
416 	int idx = smmu_domain->cfg.cbndx;
417 	int ret;
418 
419 	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
420 	if (!(fsr & ARM_SMMU_FSR_FAULT))
421 		return IRQ_NONE;
422 
423 	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
424 	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
425 	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
426 
427 	ret = report_iommu_fault(domain, NULL, iova,
428 		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);
429 
430 	if (ret == -ENOSYS)
431 		dev_err_ratelimited(smmu->dev,
432 		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
433 			    fsr, iova, fsynr, cbfrsynra, idx);
434 
435 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
436 	return IRQ_HANDLED;
437 }
438 
439 static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
440 {
441 	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
442 	struct arm_smmu_device *smmu = dev;
443 	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
444 				      DEFAULT_RATELIMIT_BURST);
445 
446 	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
447 	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
448 	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
449 	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
450 
451 	if (!gfsr)
452 		return IRQ_NONE;
453 
454 	if (__ratelimit(&rs)) {
455 		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
456 		    (gfsr & ARM_SMMU_sGFSR_USF))
457 			dev_err(smmu->dev,
458 				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
459 				(u16)gfsynr1);
460 		else
461 			dev_err(smmu->dev,
462 				"Unexpected global fault, this could be serious\n");
463 		dev_err(smmu->dev,
464 			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
465 			gfsr, gfsynr0, gfsynr1, gfsynr2);
466 	}
467 
468 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
469 	return IRQ_HANDLED;
470 }
471 
472 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
473 				       struct io_pgtable_cfg *pgtbl_cfg)
474 {
475 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
476 	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
477 	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
478 
479 	cb->cfg = cfg;
480 
481 	/* TCR */
482 	if (stage1) {
483 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
484 			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
485 		} else {
486 			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
487 			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
488 			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
489 				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
490 			else
491 				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
492 		}
493 	} else {
494 		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
495 	}
496 
497 	/* TTBRs */
498 	if (stage1) {
499 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
500 			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
501 			cb->ttbr[1] = 0;
502 		} else {
503 			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
504 						 cfg->asid);
505 			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
506 						 cfg->asid);
507 
508 			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
509 				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
510 			else
511 				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
512 		}
513 	} else {
514 		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
515 	}
516 
517 	/* MAIRs (stage-1 only) */
518 	if (stage1) {
519 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
520 			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
521 			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
522 		} else {
523 			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
524 			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
525 		}
526 	}
527 }
528 
529 void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
530 {
531 	u32 reg;
532 	bool stage1;
533 	struct arm_smmu_cb *cb = &smmu->cbs[idx];
534 	struct arm_smmu_cfg *cfg = cb->cfg;
535 
536 	/* Unassigned context banks only need disabling */
537 	if (!cfg) {
538 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
539 		return;
540 	}
541 
542 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
543 
544 	/* CBA2R */
545 	if (smmu->version > ARM_SMMU_V1) {
546 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
547 			reg = ARM_SMMU_CBA2R_VA64;
548 		else
549 			reg = 0;
550 		/* 16-bit VMIDs live in CBA2R */
551 		if (smmu->features & ARM_SMMU_FEAT_VMID16)
552 			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
553 
554 		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
555 	}
556 
557 	/* CBAR */
558 	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
559 	if (smmu->version < ARM_SMMU_V2)
560 		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
561 
562 	/*
563 	 * Use the weakest shareability/memory types, so they are
564 	 * overridden by the ttbcr/pte.
565 	 */
566 	if (stage1) {
567 		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
568 				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
569 		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
570 				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
571 	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
572 		/* 8-bit VMIDs live in CBAR */
573 		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
574 	}
575 	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
576 
577 	/*
578 	 * TCR
579 	 * We must write this before the TTBRs, since it determines the
580 	 * access behaviour of some fields (in particular, ASID[15:8]).
581 	 */
582 	if (stage1 && smmu->version > ARM_SMMU_V1)
583 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
584 	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
585 
586 	/* TTBRs */
587 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
588 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
589 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
590 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
591 	} else {
592 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
593 		if (stage1)
594 			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
595 					   cb->ttbr[1]);
596 	}
597 
598 	/* MAIRs (stage-1 only) */
599 	if (stage1) {
600 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
601 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
602 	}
603 
604 	/* SCTLR */
605 	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
606 	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
607 	if (stage1)
608 		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
609 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
610 		reg |= ARM_SMMU_SCTLR_E;
611 
612 	if (smmu->impl && smmu->impl->write_sctlr)
613 		smmu->impl->write_sctlr(smmu, idx, reg);
614 	else
615 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
616 }
617 
618 static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
619 				       struct arm_smmu_device *smmu,
620 				       struct device *dev, unsigned int start)
621 {
622 	if (smmu->impl && smmu->impl->alloc_context_bank)
623 		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);
624 
625 	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
626 }
627 
628 static int arm_smmu_init_domain_context(struct iommu_domain *domain,
629 					struct arm_smmu_device *smmu,
630 					struct device *dev)
631 {
632 	int irq, start, ret = 0;
633 	unsigned long ias, oas;
634 	struct io_pgtable_ops *pgtbl_ops;
635 	struct io_pgtable_cfg pgtbl_cfg;
636 	enum io_pgtable_fmt fmt;
637 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
638 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
639 	irqreturn_t (*context_fault)(int irq, void *dev);
640 
641 	mutex_lock(&smmu_domain->init_mutex);
642 	if (smmu_domain->smmu)
643 		goto out_unlock;
644 
645 	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
646 		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
647 		smmu_domain->smmu = smmu;
648 		goto out_unlock;
649 	}
650 
651 	/*
652 	 * Mapping the requested stage onto what we support is surprisingly
653 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
654 	 * support for nested translation. That means we end up with the
655 	 * following table:
656 	 *
657 	 * Requested        Supported        Actual
658 	 *     S1               N              S1
659 	 *     S1             S1+S2            S1
660 	 *     S1               S2             S2
661 	 *     S1               S1             S1
662 	 *     N                N              N
663 	 *     N              S1+S2            S2
664 	 *     N                S2             S2
665 	 *     N                S1             S1
666 	 *
667 	 * Note that you can't actually request stage-2 mappings.
668 	 */
669 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
670 		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
671 	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
672 		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
673 
674 	/*
675 	 * Choosing a suitable context format is even more fiddly. Until we
676 	 * grow some way for the caller to express a preference, and/or move
677 	 * the decision into the io-pgtable code where it arguably belongs,
678 	 * just aim for the closest thing to the rest of the system, and hope
679 	 * that the hardware isn't esoteric enough that we can't assume AArch64
680 	 * support to be a superset of AArch32 support...
681 	 */
682 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
683 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
684 	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
685 	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
686 	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
687 	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
688 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
689 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
690 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
691 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
692 			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
693 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;
694 
695 	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
696 		ret = -EINVAL;
697 		goto out_unlock;
698 	}
699 
700 	switch (smmu_domain->stage) {
701 	case ARM_SMMU_DOMAIN_S1:
702 		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
703 		start = smmu->num_s2_context_banks;
704 		ias = smmu->va_size;
705 		oas = smmu->ipa_size;
706 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
707 			fmt = ARM_64_LPAE_S1;
708 		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
709 			fmt = ARM_32_LPAE_S1;
710 			ias = min(ias, 32UL);
711 			oas = min(oas, 40UL);
712 		} else {
713 			fmt = ARM_V7S;
714 			ias = min(ias, 32UL);
715 			oas = min(oas, 32UL);
716 		}
717 		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
718 		break;
719 	case ARM_SMMU_DOMAIN_NESTED:
720 		/*
721 		 * We will likely want to change this if/when KVM gets
722 		 * involved.
723 		 */
724 	case ARM_SMMU_DOMAIN_S2:
725 		cfg->cbar = CBAR_TYPE_S2_TRANS;
726 		start = 0;
727 		ias = smmu->ipa_size;
728 		oas = smmu->pa_size;
729 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
730 			fmt = ARM_64_LPAE_S2;
731 		} else {
732 			fmt = ARM_32_LPAE_S2;
733 			ias = min(ias, 40UL);
734 			oas = min(oas, 40UL);
735 		}
736 		if (smmu->version == ARM_SMMU_V2)
737 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
738 		else
739 			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
740 		break;
741 	default:
742 		ret = -EINVAL;
743 		goto out_unlock;
744 	}
745 
746 	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
747 	if (ret < 0) {
748 		goto out_unlock;
749 	}
750 
751 	smmu_domain->smmu = smmu;
752 
753 	cfg->cbndx = ret;
754 	if (smmu->version < ARM_SMMU_V2) {
755 		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
756 		cfg->irptndx %= smmu->num_context_irqs;
757 	} else {
758 		cfg->irptndx = cfg->cbndx;
759 	}
760 
761 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
762 		cfg->vmid = cfg->cbndx + 1;
763 	else
764 		cfg->asid = cfg->cbndx;
765 
766 	pgtbl_cfg = (struct io_pgtable_cfg) {
767 		.pgsize_bitmap	= smmu->pgsize_bitmap,
768 		.ias		= ias,
769 		.oas		= oas,
770 		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
771 		.tlb		= smmu_domain->flush_ops,
772 		.iommu_dev	= smmu->dev,
773 	};
774 
775 	if (smmu->impl && smmu->impl->init_context) {
776 		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
777 		if (ret)
778 			goto out_clear_smmu;
779 	}
780 
781 	if (smmu_domain->pgtbl_quirks)
782 		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
783 
784 	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
785 	if (!pgtbl_ops) {
786 		ret = -ENOMEM;
787 		goto out_clear_smmu;
788 	}
789 
790 	/* Update the domain's page sizes to reflect the page table format */
791 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
792 
793 	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
794 		domain->geometry.aperture_start = ~0UL << ias;
795 		domain->geometry.aperture_end = ~0UL;
796 	} else {
797 		domain->geometry.aperture_end = (1UL << ias) - 1;
798 	}
799 
800 	domain->geometry.force_aperture = true;
801 
802 	/* Initialise the context bank with our page table cfg */
803 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
804 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
805 
806 	/*
807 	 * Request context fault interrupt. Do this last to avoid the
808 	 * handler seeing a half-initialised domain state.
809 	 */
810 	irq = smmu->irqs[cfg->irptndx];
811 
812 	if (smmu->impl && smmu->impl->context_fault)
813 		context_fault = smmu->impl->context_fault;
814 	else
815 		context_fault = arm_smmu_context_fault;
816 
817 	ret = devm_request_irq(smmu->dev, irq, context_fault,
818 			       IRQF_SHARED, "arm-smmu-context-fault", domain);
819 	if (ret < 0) {
820 		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
821 			cfg->irptndx, irq);
822 		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
823 	}
824 
825 	mutex_unlock(&smmu_domain->init_mutex);
826 
827 	/* Publish page table ops for map/unmap */
828 	smmu_domain->pgtbl_ops = pgtbl_ops;
829 	return 0;
830 
831 out_clear_smmu:
832 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
833 	smmu_domain->smmu = NULL;
834 out_unlock:
835 	mutex_unlock(&smmu_domain->init_mutex);
836 	return ret;
837 }
838 
839 static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
840 {
841 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
842 	struct arm_smmu_device *smmu = smmu_domain->smmu;
843 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
844 	int ret, irq;
845 
846 	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
847 		return;
848 
849 	ret = arm_smmu_rpm_get(smmu);
850 	if (ret < 0)
851 		return;
852 
853 	/*
854 	 * Disable the context bank and free the page tables before freeing
855 	 * it.
856 	 */
857 	smmu->cbs[cfg->cbndx].cfg = NULL;
858 	arm_smmu_write_context_bank(smmu, cfg->cbndx);
859 
860 	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
861 		irq = smmu->irqs[cfg->irptndx];
862 		devm_free_irq(smmu->dev, irq, domain);
863 	}
864 
865 	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
866 	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
867 
868 	arm_smmu_rpm_put(smmu);
869 }
870 
871 static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
872 {
873 	struct arm_smmu_domain *smmu_domain;
874 
875 	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
876 		if (using_legacy_binding ||
877 		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
878 			return NULL;
879 	}
880 	/*
881 	 * Allocate the domain and initialise some of its data structures.
882 	 * We can't really do anything meaningful until we've added a
883 	 * master.
884 	 */
885 	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
886 	if (!smmu_domain)
887 		return NULL;
888 
889 	mutex_init(&smmu_domain->init_mutex);
890 	spin_lock_init(&smmu_domain->cb_lock);
891 
892 	return &smmu_domain->domain;
893 }
894 
895 static void arm_smmu_domain_free(struct iommu_domain *domain)
896 {
897 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
898 
899 	/*
900 	 * Free the domain resources. We assume that all devices have
901 	 * already been detached.
902 	 */
903 	arm_smmu_destroy_domain_context(domain);
904 	kfree(smmu_domain);
905 }
906 
907 static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
908 {
909 	struct arm_smmu_smr *smr = smmu->smrs + idx;
910 	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
911 		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
912 
913 	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
914 		reg |= ARM_SMMU_SMR_VALID;
915 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
916 }
917 
918 static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
919 {
920 	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
921 	u32 reg;
922 
923 	if (smmu->impl && smmu->impl->write_s2cr) {
924 		smmu->impl->write_s2cr(smmu, idx);
925 		return;
926 	}
927 
928 	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
929 	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
930 	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
931 
932 	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
933 	    smmu->smrs[idx].valid)
934 		reg |= ARM_SMMU_S2CR_EXIDVALID;
935 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
936 }
937 
938 static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
939 {
940 	arm_smmu_write_s2cr(smmu, idx);
941 	if (smmu->smrs)
942 		arm_smmu_write_smr(smmu, idx);
943 }
944 
945 /*
946  * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
947  * should be called after sCR0 is written.
948  */
949 static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
950 {
951 	u32 smr;
952 	int i;
953 
954 	if (!smmu->smrs)
955 		return;
956 	/*
957 	 * If we've had to accommodate firmware memory regions, we may
958 	 * have live SMRs by now; tread carefully...
959 	 *
960 	 * Somewhat perversely, not having a free SMR for this test implies we
961 	 * can get away without it anyway, as we'll only be able to 'allocate'
962 	 * these SMRs for the ID/mask values we're already trusting to be OK.
963 	 */
964 	for (i = 0; i < smmu->num_mapping_groups; i++)
965 		if (!smmu->smrs[i].valid)
966 			goto smr_ok;
967 	return;
968 smr_ok:
969 	/*
970 	 * SMR.ID bits may not be preserved if the corresponding MASK
971 	 * bits are set, so check each one separately. We can reject
972 	 * masters later if they try to claim IDs outside these masks.
973 	 */
974 	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
975 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
976 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
977 	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
978 
979 	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
980 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
981 	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
982 	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
983 }
984 
985 static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
986 {
987 	struct arm_smmu_smr *smrs = smmu->smrs;
988 	int i, free_idx = -ENOSPC;
989 
990 	/* Stream indexing is blissfully easy */
991 	if (!smrs)
992 		return id;
993 
994 	/* Validating SMRs is... less so */
995 	for (i = 0; i < smmu->num_mapping_groups; ++i) {
996 		if (!smrs[i].valid) {
997 			/*
998 			 * Note the first free entry we come across, which
999 			 * we'll claim in the end if nothing else matches.
1000 			 */
1001 			if (free_idx < 0)
1002 				free_idx = i;
1003 			continue;
1004 		}
1005 		/*
1006 		 * If the new entry is _entirely_ matched by an existing entry,
1007 		 * then reuse that, with the guarantee that there also cannot
1008 		 * be any subsequent conflicting entries. In normal use we'd
1009 		 * expect simply identical entries for this case, but there's
1010 		 * no harm in accommodating the generalisation.
1011 		 */
1012 		if ((mask & smrs[i].mask) == mask &&
1013 		    !((id ^ smrs[i].id) & ~smrs[i].mask))
1014 			return i;
1015 		/*
1016 		 * If the new entry has any other overlap with an existing one,
1017 		 * though, then there always exists at least one stream ID
1018 		 * which would cause a conflict, and we can't allow that risk.
1019 		 */
1020 		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
1021 			return -EINVAL;
1022 	}
1023 
1024 	return free_idx;
1025 }
1026 
1027 static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
1028 {
1029 	if (--smmu->s2crs[idx].count)
1030 		return false;
1031 
1032 	smmu->s2crs[idx] = s2cr_init_val;
1033 	if (smmu->smrs)
1034 		smmu->smrs[idx].valid = false;
1035 
1036 	return true;
1037 }
1038 
1039 static int arm_smmu_master_alloc_smes(struct device *dev)
1040 {
1041 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1042 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1043 	struct arm_smmu_device *smmu = cfg->smmu;
1044 	struct arm_smmu_smr *smrs = smmu->smrs;
1045 	int i, idx, ret;
1046 
1047 	mutex_lock(&smmu->stream_map_mutex);
1048 	/* Figure out a viable stream map entry allocation */
1049 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1050 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1051 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1052 
1053 		if (idx != INVALID_SMENDX) {
1054 			ret = -EEXIST;
1055 			goto out_err;
1056 		}
1057 
1058 		ret = arm_smmu_find_sme(smmu, sid, mask);
1059 		if (ret < 0)
1060 			goto out_err;
1061 
1062 		idx = ret;
1063 		if (smrs && smmu->s2crs[idx].count == 0) {
1064 			smrs[idx].id = sid;
1065 			smrs[idx].mask = mask;
1066 			smrs[idx].valid = true;
1067 		}
1068 		smmu->s2crs[idx].count++;
1069 		cfg->smendx[i] = (s16)idx;
1070 	}
1071 
1072 	/* It worked! Now, poke the actual hardware */
1073 	for_each_cfg_sme(cfg, fwspec, i, idx)
1074 		arm_smmu_write_sme(smmu, idx);
1075 
1076 	mutex_unlock(&smmu->stream_map_mutex);
1077 	return 0;
1078 
1079 out_err:
1080 	while (i--) {
1081 		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1082 		cfg->smendx[i] = INVALID_SMENDX;
1083 	}
1084 	mutex_unlock(&smmu->stream_map_mutex);
1085 	return ret;
1086 }
1087 
1088 static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
1089 				      struct iommu_fwspec *fwspec)
1090 {
1091 	struct arm_smmu_device *smmu = cfg->smmu;
1092 	int i, idx;
1093 
1094 	mutex_lock(&smmu->stream_map_mutex);
1095 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1096 		if (arm_smmu_free_sme(smmu, idx))
1097 			arm_smmu_write_sme(smmu, idx);
1098 		cfg->smendx[i] = INVALID_SMENDX;
1099 	}
1100 	mutex_unlock(&smmu->stream_map_mutex);
1101 }
1102 
1103 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1104 				      struct arm_smmu_master_cfg *cfg,
1105 				      struct iommu_fwspec *fwspec)
1106 {
1107 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1108 	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
1109 	u8 cbndx = smmu_domain->cfg.cbndx;
1110 	enum arm_smmu_s2cr_type type;
1111 	int i, idx;
1112 
1113 	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
1114 		type = S2CR_TYPE_BYPASS;
1115 	else
1116 		type = S2CR_TYPE_TRANS;
1117 
1118 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1119 		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1120 			continue;
1121 
1122 		s2cr[idx].type = type;
1123 		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1124 		s2cr[idx].cbndx = cbndx;
1125 		arm_smmu_write_s2cr(smmu, idx);
1126 	}
1127 	return 0;
1128 }
1129 
1130 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
1131 {
1132 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1133 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1134 	struct arm_smmu_master_cfg *cfg;
1135 	struct arm_smmu_device *smmu;
1136 	int ret;
1137 
1138 	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1139 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
1140 		return -ENXIO;
1141 	}
1142 
1143 	/*
1144 	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1145 	 * domains between of_xlate() and probe_device() - we have no way to cope
1146 	 * with that, so until ARM gets converted to rely on groups and default
1147 	 * domains, just say no (but more politely than by dereferencing NULL).
1148 	 * This should be at least a WARN_ON once that's sorted.
1149 	 */
1150 	cfg = dev_iommu_priv_get(dev);
1151 	if (!cfg)
1152 		return -ENODEV;
1153 
1154 	smmu = cfg->smmu;
1155 
1156 	ret = arm_smmu_rpm_get(smmu);
1157 	if (ret < 0)
1158 		return ret;
1159 
1160 	/* Ensure that the domain is finalised */
1161 	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1162 	if (ret < 0)
1163 		goto rpm_put;
1164 
1165 	/*
1166 	 * Sanity check the domain. We don't support domains across
1167 	 * different SMMUs.
1168 	 */
1169 	if (smmu_domain->smmu != smmu) {
1170 		dev_err(dev,
1171 			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1172 			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1173 		ret = -EINVAL;
1174 		goto rpm_put;
1175 	}
1176 
1177 	/* Looks ok, so add the device to the domain */
1178 	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1179 
1180 	/*
1181 	 * Setup an autosuspend delay to avoid bouncing runpm state.
1182 	 * Otherwise, if a driver for a suspended consumer device
1183 	 * unmaps buffers, it will runpm resume/suspend for each one.
1184 	 *
1185 	 * For example, when used by a GPU device, when an application
1186 	 * or game exits, it can trigger unmapping 100s or 1000s of
1187 	 * buffers.  With a runpm cycle for each buffer, that adds up
1188 	 * to 5-10sec worth of reprogramming the context bank, while
1189 	 * the system appears to be locked up to the user.
1190 	 */
1191 	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
1192 	pm_runtime_use_autosuspend(smmu->dev);
1193 
1194 rpm_put:
1195 	arm_smmu_rpm_put(smmu);
1196 	return ret;
1197 }
1198 
1199 static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
1200 			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
1201 			      int prot, gfp_t gfp, size_t *mapped)
1202 {
1203 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1204 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1205 	int ret;
1206 
1207 	if (!ops)
1208 		return -ENODEV;
1209 
1210 	arm_smmu_rpm_get(smmu);
1211 	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1212 	arm_smmu_rpm_put(smmu);
1213 
1214 	return ret;
1215 }
1216 
1217 static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
1218 				   size_t pgsize, size_t pgcount,
1219 				   struct iommu_iotlb_gather *iotlb_gather)
1220 {
1221 	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1222 	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
1223 	size_t ret;
1224 
1225 	if (!ops)
1226 		return 0;
1227 
1228 	arm_smmu_rpm_get(smmu);
1229 	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1230 	arm_smmu_rpm_put(smmu);
1231 
1232 	return ret;
1233 }
1234 
1235 static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
1236 {
1237 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1238 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1239 
1240 	if (smmu_domain->flush_ops) {
1241 		arm_smmu_rpm_get(smmu);
1242 		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1243 		arm_smmu_rpm_put(smmu);
1244 	}
1245 }
1246 
1247 static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
1248 				struct iommu_iotlb_gather *gather)
1249 {
1250 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1251 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1252 
1253 	if (!smmu)
1254 		return;
1255 
1256 	arm_smmu_rpm_get(smmu);
1257 	if (smmu->version == ARM_SMMU_V2 ||
1258 	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1259 		arm_smmu_tlb_sync_context(smmu_domain);
1260 	else
1261 		arm_smmu_tlb_sync_global(smmu);
1262 	arm_smmu_rpm_put(smmu);
1263 }
1264 
1265 static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
1266 					      dma_addr_t iova)
1267 {
1268 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1269 	struct arm_smmu_device *smmu = smmu_domain->smmu;
1270 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
1271 	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1272 	struct device *dev = smmu->dev;
1273 	void __iomem *reg;
1274 	u32 tmp;
1275 	u64 phys;
1276 	unsigned long va, flags;
1277 	int ret, idx = cfg->cbndx;
1278 	phys_addr_t addr = 0;
1279 
1280 	ret = arm_smmu_rpm_get(smmu);
1281 	if (ret < 0)
1282 		return 0;
1283 
1284 	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1285 	va = iova & ~0xfffUL;
1286 	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1287 		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1288 	else
1289 		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1290 
1291 	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1292 	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
1293 				      5, 50)) {
1294 		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1295 		dev_err(dev,
1296 			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1297 			&iova);
1298 		arm_smmu_rpm_put(smmu);
1299 		return ops->iova_to_phys(ops, iova);
1300 	}
1301 
1302 	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1303 	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1304 	if (phys & ARM_SMMU_CB_PAR_F) {
1305 		dev_err(dev, "translation fault!\n");
1306 		dev_err(dev, "PAR = 0x%llx\n", phys);
1307 		goto out;
1308 	}
1309 
1310 	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
1311 out:
1312 	arm_smmu_rpm_put(smmu);
1313 
1314 	return addr;
1315 }
1316 
1317 static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1318 					dma_addr_t iova)
1319 {
1320 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1321 	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1322 
1323 	if (!ops)
1324 		return 0;
1325 
1326 	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1327 			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
1328 		return arm_smmu_iova_to_phys_hard(domain, iova);
1329 
1330 	return ops->iova_to_phys(ops, iova);
1331 }
1332 
1333 static bool arm_smmu_capable(enum iommu_cap cap)
1334 {
1335 	switch (cap) {
1336 	case IOMMU_CAP_CACHE_COHERENCY:
1337 		/*
1338 		 * Return true here as the SMMU can always send out coherent
1339 		 * requests.
1340 		 */
1341 		return true;
1342 	case IOMMU_CAP_NOEXEC:
1343 		return true;
1344 	default:
1345 		return false;
1346 	}
1347 }
1348 
1349 static
1350 struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1351 {
1352 	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
1353 							  fwnode);
1354 	put_device(dev);
1355 	return dev ? dev_get_drvdata(dev) : NULL;
1356 }
1357 
1358 static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1359 {
1360 	struct arm_smmu_device *smmu = NULL;
1361 	struct arm_smmu_master_cfg *cfg;
1362 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1363 	int i, ret;
1364 
1365 	if (using_legacy_binding) {
1366 		ret = arm_smmu_register_legacy_master(dev, &smmu);
1367 
1368 		/*
1369 		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
1370 		 * will allocate/initialise a new one. Thus we need to update fwspec for
1371 		 * later use.
1372 		 */
1373 		fwspec = dev_iommu_fwspec_get(dev);
1374 		if (ret)
1375 			goto out_free;
1376 	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1377 		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1378 	} else {
1379 		return ERR_PTR(-ENODEV);
1380 	}
1381 
1382 	ret = -EINVAL;
1383 	for (i = 0; i < fwspec->num_ids; i++) {
1384 		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
1385 		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1386 
1387 		if (sid & ~smmu->streamid_mask) {
1388 			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1389 				sid, smmu->streamid_mask);
1390 			goto out_free;
1391 		}
1392 		if (mask & ~smmu->smr_mask_mask) {
1393 			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
1394 				mask, smmu->smr_mask_mask);
1395 			goto out_free;
1396 		}
1397 	}
1398 
1399 	ret = -ENOMEM;
1400 	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
1401 		      GFP_KERNEL);
1402 	if (!cfg)
1403 		goto out_free;
1404 
1405 	cfg->smmu = smmu;
1406 	dev_iommu_priv_set(dev, cfg);
1407 	while (i--)
1408 		cfg->smendx[i] = INVALID_SMENDX;
1409 
1410 	ret = arm_smmu_rpm_get(smmu);
1411 	if (ret < 0)
1412 		goto out_cfg_free;
1413 
1414 	ret = arm_smmu_master_alloc_smes(dev);
1415 	arm_smmu_rpm_put(smmu);
1416 
1417 	if (ret)
1418 		goto out_cfg_free;
1419 
1420 	device_link_add(dev, smmu->dev,
1421 			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);
1422 
1423 	return &smmu->iommu;
1424 
1425 out_cfg_free:
1426 	kfree(cfg);
1427 out_free:
1428 	iommu_fwspec_free(dev);
1429 	return ERR_PTR(ret);
1430 }
1431 
1432 static void arm_smmu_release_device(struct device *dev)
1433 {
1434 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1435 	struct arm_smmu_master_cfg *cfg;
1436 	struct arm_smmu_device *smmu;
1437 	int ret;
1438 
1439 	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1440 		return;
1441 
1442 	cfg  = dev_iommu_priv_get(dev);
1443 	smmu = cfg->smmu;
1444 
1445 	ret = arm_smmu_rpm_get(smmu);
1446 	if (ret < 0)
1447 		return;
1448 
1449 	arm_smmu_master_free_smes(cfg, fwspec);
1450 
1451 	arm_smmu_rpm_put(smmu);
1452 
1453 	dev_iommu_priv_set(dev, NULL);
1454 	kfree(cfg);
1455 	iommu_fwspec_free(dev);
1456 }
1457 
1458 static void arm_smmu_probe_finalize(struct device *dev)
1459 {
1460 	struct arm_smmu_master_cfg *cfg;
1461 	struct arm_smmu_device *smmu;
1462 
1463 	cfg = dev_iommu_priv_get(dev);
1464 	smmu = cfg->smmu;
1465 
1466 	if (smmu->impl && smmu->impl->probe_finalize)
1467 		smmu->impl->probe_finalize(smmu, dev);
1468 }
1469 
1470 static struct iommu_group *arm_smmu_device_group(struct device *dev)
1471 {
1472 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1473 	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1474 	struct arm_smmu_device *smmu = cfg->smmu;
1475 	struct iommu_group *group = NULL;
1476 	int i, idx;
1477 
1478 	mutex_lock(&smmu->stream_map_mutex);
1479 	for_each_cfg_sme(cfg, fwspec, i, idx) {
1480 		if (group && smmu->s2crs[idx].group &&
1481 		    group != smmu->s2crs[idx].group) {
1482 			mutex_unlock(&smmu->stream_map_mutex);
1483 			return ERR_PTR(-EINVAL);
1484 		}
1485 
1486 		group = smmu->s2crs[idx].group;
1487 	}
1488 
1489 	if (group) {
1490 		mutex_unlock(&smmu->stream_map_mutex);
1491 		return iommu_group_ref_get(group);
1492 	}
1493 
1494 	if (dev_is_pci(dev))
1495 		group = pci_device_group(dev);
1496 	else if (dev_is_fsl_mc(dev))
1497 		group = fsl_mc_device_group(dev);
1498 	else
1499 		group = generic_device_group(dev);
1500 
1501 	/* Remember group for faster lookups */
1502 	if (!IS_ERR(group))
1503 		for_each_cfg_sme(cfg, fwspec, i, idx)
1504 			smmu->s2crs[idx].group = group;
1505 
1506 	mutex_unlock(&smmu->stream_map_mutex);
1507 	return group;
1508 }
1509 
1510 static int arm_smmu_enable_nesting(struct iommu_domain *domain)
1511 {
1512 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1513 	int ret = 0;
1514 
1515 	mutex_lock(&smmu_domain->init_mutex);
1516 	if (smmu_domain->smmu)
1517 		ret = -EPERM;
1518 	else
1519 		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
1520 	mutex_unlock(&smmu_domain->init_mutex);
1521 
1522 	return ret;
1523 }
1524 
1525 static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
1526 		unsigned long quirks)
1527 {
1528 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1529 	int ret = 0;
1530 
1531 	mutex_lock(&smmu_domain->init_mutex);
1532 	if (smmu_domain->smmu)
1533 		ret = -EPERM;
1534 	else
1535 		smmu_domain->pgtbl_quirks = quirks;
1536 	mutex_unlock(&smmu_domain->init_mutex);
1537 
1538 	return ret;
1539 }
1540 
1541 static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
1542 {
1543 	u32 mask, fwid = 0;
1544 
1545 	if (args->args_count > 0)
1546 		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1547 
1548 	if (args->args_count > 1)
1549 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1550 	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1551 		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1552 
1553 	return iommu_fwspec_add_ids(dev, &fwid, 1);
1554 }
1555 
1556 static void arm_smmu_get_resv_regions(struct device *dev,
1557 				      struct list_head *head)
1558 {
1559 	struct iommu_resv_region *region;
1560 	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
1561 
1562 	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1563 					 prot, IOMMU_RESV_SW_MSI);
1564 	if (!region)
1565 		return;
1566 
1567 	list_add_tail(&region->list, head);
1568 
1569 	iommu_dma_get_resv_regions(dev, head);
1570 }
1571 
1572 static int arm_smmu_def_domain_type(struct device *dev)
1573 {
1574 	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1575 	const struct arm_smmu_impl *impl = cfg->smmu->impl;
1576 
1577 	if (using_legacy_binding)
1578 		return IOMMU_DOMAIN_IDENTITY;
1579 
1580 	if (impl && impl->def_domain_type)
1581 		return impl->def_domain_type(dev);
1582 
1583 	return 0;
1584 }
1585 
1586 static struct iommu_ops arm_smmu_ops = {
1587 	.capable		= arm_smmu_capable,
1588 	.domain_alloc		= arm_smmu_domain_alloc,
1589 	.probe_device		= arm_smmu_probe_device,
1590 	.release_device		= arm_smmu_release_device,
1591 	.probe_finalize		= arm_smmu_probe_finalize,
1592 	.device_group		= arm_smmu_device_group,
1593 	.of_xlate		= arm_smmu_of_xlate,
1594 	.get_resv_regions	= arm_smmu_get_resv_regions,
1595 	.put_resv_regions	= generic_iommu_put_resv_regions,
1596 	.def_domain_type	= arm_smmu_def_domain_type,
1597 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1598 	.owner			= THIS_MODULE,
1599 	.default_domain_ops = &(const struct iommu_domain_ops) {
1600 		.attach_dev		= arm_smmu_attach_dev,
1601 		.map_pages		= arm_smmu_map_pages,
1602 		.unmap_pages		= arm_smmu_unmap_pages,
1603 		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1604 		.iotlb_sync		= arm_smmu_iotlb_sync,
1605 		.iova_to_phys		= arm_smmu_iova_to_phys,
1606 		.enable_nesting		= arm_smmu_enable_nesting,
1607 		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
1608 		.free			= arm_smmu_domain_free,
1609 	}
1610 };
1611 
1612 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
1613 {
1614 	int i;
1615 	u32 reg;
1616 
1617 	/* clear global FSR */
1618 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
1619 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1620 
1621 	/*
1622 	 * Reset stream mapping groups: Initial values mark all SMRn as
1623 	 * invalid and all S2CRn as bypass unless overridden.
1624 	 */
1625 	for (i = 0; i < smmu->num_mapping_groups; ++i)
1626 		arm_smmu_write_sme(smmu, i);
1627 
1628 	/* Make sure all context banks are disabled and clear CB_FSR  */
1629 	for (i = 0; i < smmu->num_context_banks; ++i) {
1630 		arm_smmu_write_context_bank(smmu, i);
1631 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1632 	}
1633 
1634 	/* Invalidate the TLB, just in case */
1635 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
1636 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1637 
1638 	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1639 
1640 	/* Enable fault reporting */
1641 	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
1642 		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1643 
1644 	/* Disable TLB broadcasting. */
1645 	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1646 
1647 	/* Enable client access, handling unmatched streams as appropriate */
1648 	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1649 	if (disable_bypass)
1650 		reg |= ARM_SMMU_sCR0_USFCFG;
1651 	else
1652 		reg &= ~ARM_SMMU_sCR0_USFCFG;
1653 
1654 	/* Disable forced broadcasting */
1655 	reg &= ~ARM_SMMU_sCR0_FB;
1656 
1657 	/* Don't upgrade barriers */
1658 	reg &= ~(ARM_SMMU_sCR0_BSU);
1659 
1660 	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1661 		reg |= ARM_SMMU_sCR0_VMID16EN;
1662 
1663 	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1664 		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1665 
1666 	if (smmu->impl && smmu->impl->reset)
1667 		smmu->impl->reset(smmu);
1668 
1669 	/* Push the button */
1670 	arm_smmu_tlb_sync_global(smmu);
1671 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1672 }
1673 
1674 static int arm_smmu_id_size_to_bits(int size)
1675 {
1676 	switch (size) {
1677 	case 0:
1678 		return 32;
1679 	case 1:
1680 		return 36;
1681 	case 2:
1682 		return 40;
1683 	case 3:
1684 		return 42;
1685 	case 4:
1686 		return 44;
1687 	case 5:
1688 	default:
1689 		return 48;
1690 	}
1691 }
1692 
1693 static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
1694 {
1695 	unsigned int size;
1696 	u32 id;
1697 	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1698 	int i, ret;
1699 
1700 	dev_notice(smmu->dev, "probing hardware configuration...\n");
1701 	dev_notice(smmu->dev, "SMMUv%d with:\n",
1702 			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1703 
1704 	/* ID0 */
1705 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1706 
1707 	/* Restrict available stages based on module parameter */
1708 	if (force_stage == 1)
1709 		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1710 	else if (force_stage == 2)
1711 		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1712 
1713 	if (id & ARM_SMMU_ID0_S1TS) {
1714 		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
1715 		dev_notice(smmu->dev, "\tstage 1 translation\n");
1716 	}
1717 
1718 	if (id & ARM_SMMU_ID0_S2TS) {
1719 		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
1720 		dev_notice(smmu->dev, "\tstage 2 translation\n");
1721 	}
1722 
1723 	if (id & ARM_SMMU_ID0_NTS) {
1724 		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
1725 		dev_notice(smmu->dev, "\tnested translation\n");
1726 	}
1727 
1728 	if (!(smmu->features &
1729 		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1730 		dev_err(smmu->dev, "\tno translation support!\n");
1731 		return -ENODEV;
1732 	}
1733 
1734 	if ((id & ARM_SMMU_ID0_S1TS) &&
1735 	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1736 		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
1737 		dev_notice(smmu->dev, "\taddress translation ops\n");
1738 	}
1739 
1740 	/*
1741 	 * In order for DMA API calls to work properly, we must defer to what
1742 	 * the FW says about coherency, regardless of what the hardware claims.
1743 	 * Fortunately, this also opens up a workaround for systems where the
1744 	 * ID register value has ended up configured incorrectly.
1745 	 */
1746 	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1747 	if (cttw_fw || cttw_reg)
1748 		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1749 			   cttw_fw ? "" : "non-");
1750 	if (cttw_fw != cttw_reg)
1751 		dev_notice(smmu->dev,
1752 			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1753 
1754 	/* Max. number of entries we have for stream matching/indexing */
1755 	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1756 		smmu->features |= ARM_SMMU_FEAT_EXIDS;
1757 		size = 1 << 16;
1758 	} else {
1759 		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1760 	}
1761 	smmu->streamid_mask = size - 1;
1762 	if (id & ARM_SMMU_ID0_SMS) {
1763 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1764 		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1765 		if (size == 0) {
1766 			dev_err(smmu->dev,
1767 				"stream-matching supported, but no SMRs present!\n");
1768 			return -ENODEV;
1769 		}
1770 
1771 		/* Zero-initialised to mark as invalid */
1772 		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
1773 					  GFP_KERNEL);
1774 		if (!smmu->smrs)
1775 			return -ENOMEM;
1776 
1777 		dev_notice(smmu->dev,
1778 			   "\tstream matching with %u register groups", size);
1779 	}
1780 	/* s2cr->type == 0 means translation, so initialise explicitly */
1781 	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
1782 					 GFP_KERNEL);
1783 	if (!smmu->s2crs)
1784 		return -ENOMEM;
1785 	for (i = 0; i < size; i++)
1786 		smmu->s2crs[i] = s2cr_init_val;
1787 
1788 	smmu->num_mapping_groups = size;
1789 	mutex_init(&smmu->stream_map_mutex);
1790 	spin_lock_init(&smmu->global_sync_lock);
1791 
1792 	if (smmu->version < ARM_SMMU_V2 ||
1793 	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1794 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1795 		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1796 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
1797 	}
1798 
1799 	/* ID1 */
1800 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1801 	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1802 
1803 	/* Check for size mismatch of SMMU address space from mapped region */
1804 	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1805 	if (smmu->numpage != 2 * size << smmu->pgshift)
1806 		dev_warn(smmu->dev,
1807 			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
1808 			2 * size << smmu->pgshift, smmu->numpage);
1809 	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
1810 	smmu->numpage = size;
1811 
1812 	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
1813 	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1814 	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
1815 		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
1816 		return -ENODEV;
1817 	}
1818 	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
1819 		   smmu->num_context_banks, smmu->num_s2_context_banks);
1820 	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
1821 				 sizeof(*smmu->cbs), GFP_KERNEL);
1822 	if (!smmu->cbs)
1823 		return -ENOMEM;
1824 
1825 	/* ID2 */
1826 	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1827 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1828 	smmu->ipa_size = size;
1829 
1830 	/* The output mask is also applied for bypass */
1831 	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1832 	smmu->pa_size = size;
1833 
1834 	if (id & ARM_SMMU_ID2_VMID16)
1835 		smmu->features |= ARM_SMMU_FEAT_VMID16;
1836 
1837 	/*
1838 	 * What the page table walker can address actually depends on which
1839 	 * descriptor format is in use, but since a) we don't know that yet,
1840 	 * and b) it can vary per context bank, this will have to do...
1841 	 */
1842 	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
1843 		dev_warn(smmu->dev,
1844 			 "failed to set DMA mask for table walker\n");
1845 
1846 	if (smmu->version < ARM_SMMU_V2) {
1847 		smmu->va_size = smmu->ipa_size;
1848 		if (smmu->version == ARM_SMMU_V1_64K)
1849 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1850 	} else {
1851 		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1852 		smmu->va_size = arm_smmu_id_size_to_bits(size);
1853 		if (id & ARM_SMMU_ID2_PTFS_4K)
1854 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1855 		if (id & ARM_SMMU_ID2_PTFS_16K)
1856 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1857 		if (id & ARM_SMMU_ID2_PTFS_64K)
1858 			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1859 	}
1860 
1861 	if (smmu->impl && smmu->impl->cfg_probe) {
1862 		ret = smmu->impl->cfg_probe(smmu);
1863 		if (ret)
1864 			return ret;
1865 	}
1866 
1867 	/* Now we've corralled the various formats, what'll it do? */
1868 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1869 		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1870 	if (smmu->features &
1871 	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1872 		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1873 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1874 		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1875 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1876 		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
1877 
1878 	if (arm_smmu_ops.pgsize_bitmap == -1UL)
1879 		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
1880 	else
1881 		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
1882 	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
1883 		   smmu->pgsize_bitmap);
1884 
1885 
1886 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
1887 		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1888 			   smmu->va_size, smmu->ipa_size);
1889 
1890 	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
1891 		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1892 			   smmu->ipa_size, smmu->pa_size);
1893 
1894 	return 0;
1895 }
1896 
1897 struct arm_smmu_match_data {
1898 	enum arm_smmu_arch_version version;
1899 	enum arm_smmu_implementation model;
1900 };
1901 
1902 #define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1903 static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1904 
1905 ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
1906 ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1907 ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1908 ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1909 ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1910 ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1911 
1912 static const struct of_device_id arm_smmu_of_match[] = {
1913 	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
1914 	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
1915 	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1916 	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1917 	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1918 	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1919 	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1920 	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1921 	{ },
1922 };
1923 MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1924 
1925 #ifdef CONFIG_ACPI
1926 static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
1927 {
1928 	int ret = 0;
1929 
1930 	switch (model) {
1931 	case ACPI_IORT_SMMU_V1:
1932 	case ACPI_IORT_SMMU_CORELINK_MMU400:
1933 		smmu->version = ARM_SMMU_V1;
1934 		smmu->model = GENERIC_SMMU;
1935 		break;
1936 	case ACPI_IORT_SMMU_CORELINK_MMU401:
1937 		smmu->version = ARM_SMMU_V1_64K;
1938 		smmu->model = GENERIC_SMMU;
1939 		break;
1940 	case ACPI_IORT_SMMU_V2:
1941 		smmu->version = ARM_SMMU_V2;
1942 		smmu->model = GENERIC_SMMU;
1943 		break;
1944 	case ACPI_IORT_SMMU_CORELINK_MMU500:
1945 		smmu->version = ARM_SMMU_V2;
1946 		smmu->model = ARM_MMU500;
1947 		break;
1948 	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
1949 		smmu->version = ARM_SMMU_V2;
1950 		smmu->model = CAVIUM_SMMUV2;
1951 		break;
1952 	default:
1953 		ret = -ENODEV;
1954 	}
1955 
1956 	return ret;
1957 }
1958 
1959 static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1960 				      u32 *global_irqs, u32 *pmu_irqs)
1961 {
1962 	struct device *dev = smmu->dev;
1963 	struct acpi_iort_node *node =
1964 		*(struct acpi_iort_node **)dev_get_platdata(dev);
1965 	struct acpi_iort_smmu *iort_smmu;
1966 	int ret;
1967 
1968 	/* Retrieve SMMU1/2 specific data */
1969 	iort_smmu = (struct acpi_iort_smmu *)node->node_data;
1970 
1971 	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
1972 	if (ret < 0)
1973 		return ret;
1974 
1975 	/* Ignore the configuration access interrupt */
1976 	*global_irqs = 1;
1977 	*pmu_irqs = 0;
1978 
1979 	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
1980 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1981 
1982 	return 0;
1983 }
1984 #else
1985 static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
1986 					     u32 *global_irqs, u32 *pmu_irqs)
1987 {
1988 	return -ENODEV;
1989 }
1990 #endif
1991 
1992 static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
1993 				    u32 *global_irqs, u32 *pmu_irqs)
1994 {
1995 	const struct arm_smmu_match_data *data;
1996 	struct device *dev = smmu->dev;
1997 	bool legacy_binding;
1998 
1999 	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
2000 		return dev_err_probe(dev, -ENODEV,
2001 				     "missing #global-interrupts property\n");
2002 	*pmu_irqs = 0;
2003 
2004 	data = of_device_get_match_data(dev);
2005 	smmu->version = data->version;
2006 	smmu->model = data->model;
2007 
2008 	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
2009 	if (legacy_binding && !using_generic_binding) {
2010 		if (!using_legacy_binding) {
2011 			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
2012 				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
2013 		}
2014 		using_legacy_binding = true;
2015 	} else if (!legacy_binding && !using_legacy_binding) {
2016 		using_generic_binding = true;
2017 	} else {
2018 		dev_err(dev, "not probing due to mismatched DT properties\n");
2019 		return -ENODEV;
2020 	}
2021 
2022 	if (of_dma_is_coherent(dev->of_node))
2023 		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
2024 
2025 	return 0;
2026 }
2027 
2028 static int arm_smmu_bus_init(struct iommu_ops *ops)
2029 {
2030 	int err;
2031 
2032 	/* Oh, for a proper bus abstraction */
2033 	if (!iommu_present(&platform_bus_type)) {
2034 		err = bus_set_iommu(&platform_bus_type, ops);
2035 		if (err)
2036 			return err;
2037 	}
2038 #ifdef CONFIG_ARM_AMBA
2039 	if (!iommu_present(&amba_bustype)) {
2040 		err = bus_set_iommu(&amba_bustype, ops);
2041 		if (err)
2042 			goto err_reset_platform_ops;
2043 	}
2044 #endif
2045 #ifdef CONFIG_PCI
2046 	if (!iommu_present(&pci_bus_type)) {
2047 		err = bus_set_iommu(&pci_bus_type, ops);
2048 		if (err)
2049 			goto err_reset_amba_ops;
2050 	}
2051 #endif
2052 #ifdef CONFIG_FSL_MC_BUS
2053 	if (!iommu_present(&fsl_mc_bus_type)) {
2054 		err = bus_set_iommu(&fsl_mc_bus_type, ops);
2055 		if (err)
2056 			goto err_reset_pci_ops;
2057 	}
2058 #endif
2059 	return 0;
2060 
2061 err_reset_pci_ops: __maybe_unused;
2062 #ifdef CONFIG_PCI
2063 	bus_set_iommu(&pci_bus_type, NULL);
2064 #endif
2065 err_reset_amba_ops: __maybe_unused;
2066 #ifdef CONFIG_ARM_AMBA
2067 	bus_set_iommu(&amba_bustype, NULL);
2068 #endif
2069 err_reset_platform_ops: __maybe_unused;
2070 	bus_set_iommu(&platform_bus_type, NULL);
2071 	return err;
2072 }
2073 
2074 static int arm_smmu_device_probe(struct platform_device *pdev)
2075 {
2076 	struct resource *res;
2077 	resource_size_t ioaddr;
2078 	struct arm_smmu_device *smmu;
2079 	struct device *dev = &pdev->dev;
2080 	int num_irqs, i, err;
2081 	u32 global_irqs, pmu_irqs;
2082 	irqreturn_t (*global_fault)(int irq, void *dev);
2083 
2084 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
2085 	if (!smmu) {
2086 		dev_err(dev, "failed to allocate arm_smmu_device\n");
2087 		return -ENOMEM;
2088 	}
2089 	smmu->dev = dev;
2090 
2091 	if (dev->of_node)
2092 		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2093 	else
2094 		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2095 	if (err)
2096 		return err;
2097 
2098 	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2099 	if (IS_ERR(smmu->base))
2100 		return PTR_ERR(smmu->base);
2101 	ioaddr = res->start;
2102 	/*
2103 	 * The resource size should effectively match the value of SMMU_TOP;
2104 	 * stash that temporarily until we know PAGESIZE to validate it with.
2105 	 */
2106 	smmu->numpage = resource_size(res);
2107 
2108 	smmu = arm_smmu_impl_init(smmu);
2109 	if (IS_ERR(smmu))
2110 		return PTR_ERR(smmu);
2111 
2112 	num_irqs = platform_irq_count(pdev);
2113 
2114 	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
2115 	if (smmu->num_context_irqs <= 0)
2116 		return dev_err_probe(dev, -ENODEV,
2117 				"found %d interrupts but expected at least %d\n",
2118 				num_irqs, global_irqs + pmu_irqs + 1);
2119 
2120 	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
2121 				  sizeof(*smmu->irqs), GFP_KERNEL);
2122 	if (!smmu->irqs)
2123 		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
2124 				     smmu->num_context_irqs);
2125 
2126 	for (i = 0; i < smmu->num_context_irqs; i++) {
2127 		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2128 
2129 		if (irq < 0)
2130 			return irq;
2131 		smmu->irqs[i] = irq;
2132 	}
2133 
2134 	err = devm_clk_bulk_get_all(dev, &smmu->clks);
2135 	if (err < 0) {
2136 		dev_err(dev, "failed to get clocks %d\n", err);
2137 		return err;
2138 	}
2139 	smmu->num_clks = err;
2140 
2141 	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
2142 	if (err)
2143 		return err;
2144 
2145 	err = arm_smmu_device_cfg_probe(smmu);
2146 	if (err)
2147 		return err;
2148 
2149 	if (smmu->version == ARM_SMMU_V2) {
2150 		if (smmu->num_context_banks > smmu->num_context_irqs) {
2151 			dev_err(dev,
2152 			      "found only %d context irq(s) but %d required\n",
2153 			      smmu->num_context_irqs, smmu->num_context_banks);
2154 			return -ENODEV;
2155 		}
2156 
2157 		/* Ignore superfluous interrupts */
2158 		smmu->num_context_irqs = smmu->num_context_banks;
2159 	}
2160 
2161 	if (smmu->impl && smmu->impl->global_fault)
2162 		global_fault = smmu->impl->global_fault;
2163 	else
2164 		global_fault = arm_smmu_global_fault;
2165 
2166 	for (i = 0; i < global_irqs; i++) {
2167 		int irq = platform_get_irq(pdev, i);
2168 
2169 		if (irq < 0)
2170 			return irq;
2171 
2172 		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
2173 				       "arm-smmu global fault", smmu);
2174 		if (err)
2175 			return dev_err_probe(dev, err,
2176 					"failed to request global IRQ %d (%u)\n",
2177 					i, irq);
2178 	}
2179 
2180 	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2181 				     "smmu.%pa", &ioaddr);
2182 	if (err) {
2183 		dev_err(dev, "Failed to register iommu in sysfs\n");
2184 		return err;
2185 	}
2186 
2187 	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2188 	if (err) {
2189 		dev_err(dev, "Failed to register iommu\n");
2190 		goto err_sysfs_remove;
2191 	}
2192 
2193 	platform_set_drvdata(pdev, smmu);
2194 	arm_smmu_device_reset(smmu);
2195 	arm_smmu_test_smr_masks(smmu);
2196 
2197 	/*
2198 	 * We want to avoid touching dev->power.lock in fastpaths unless
2199 	 * it's really going to do something useful - pm_runtime_enabled()
2200 	 * can serve as an ideal proxy for that decision. So, conditionally
2201 	 * enable pm_runtime.
2202 	 */
2203 	if (dev->pm_domain) {
2204 		pm_runtime_set_active(dev);
2205 		pm_runtime_enable(dev);
2206 	}
2207 
2208 	/*
2209 	 * For ACPI and generic DT bindings, an SMMU will be probed before
2210 	 * any device which might need it, so we want the bus ops in place
2211 	 * ready to handle default domain setup as soon as any SMMU exists.
2212 	 */
2213 	if (!using_legacy_binding) {
2214 		err = arm_smmu_bus_init(&arm_smmu_ops);
2215 		if (err)
2216 			goto err_unregister_device;
2217 	}
2218 
2219 	return 0;
2220 
2221 err_unregister_device:
2222 	iommu_device_unregister(&smmu->iommu);
2223 err_sysfs_remove:
2224 	iommu_device_sysfs_remove(&smmu->iommu);
2225 	return err;
2226 }
2227 
2228 static int arm_smmu_device_remove(struct platform_device *pdev)
2229 {
2230 	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2231 
2232 	if (!smmu)
2233 		return -ENODEV;
2234 
2235 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2236 		dev_notice(&pdev->dev, "disabling translation\n");
2237 
2238 	arm_smmu_bus_init(NULL);
2239 	iommu_device_unregister(&smmu->iommu);
2240 	iommu_device_sysfs_remove(&smmu->iommu);
2241 
2242 	arm_smmu_rpm_get(smmu);
2243 	/* Turn the thing off */
2244 	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2245 	arm_smmu_rpm_put(smmu);
2246 
2247 	if (pm_runtime_enabled(smmu->dev))
2248 		pm_runtime_force_suspend(smmu->dev);
2249 	else
2250 		clk_bulk_disable(smmu->num_clks, smmu->clks);
2251 
2252 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2253 	return 0;
2254 }
2255 
2256 static void arm_smmu_device_shutdown(struct platform_device *pdev)
2257 {
2258 	arm_smmu_device_remove(pdev);
2259 }
2260 
2261 static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2262 {
2263 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2264 	int ret;
2265 
2266 	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
2267 	if (ret)
2268 		return ret;
2269 
2270 	arm_smmu_device_reset(smmu);
2271 
2272 	return 0;
2273 }
2274 
2275 static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2276 {
2277 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2278 
2279 	clk_bulk_disable(smmu->num_clks, smmu->clks);
2280 
2281 	return 0;
2282 }
2283 
2284 static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
2285 {
2286 	int ret;
2287 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2288 
2289 	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
2290 	if (ret)
2291 		return ret;
2292 
2293 	if (pm_runtime_suspended(dev))
2294 		return 0;
2295 
2296 	ret = arm_smmu_runtime_resume(dev);
2297 	if (ret)
2298 		clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2299 
2300 	return ret;
2301 }
2302 
2303 static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
2304 {
2305 	int ret = 0;
2306 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2307 
2308 	if (pm_runtime_suspended(dev))
2309 		goto clk_unprepare;
2310 
2311 	ret = arm_smmu_runtime_suspend(dev);
2312 	if (ret)
2313 		return ret;
2314 
2315 clk_unprepare:
2316 	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2317 	return ret;
2318 }
2319 
2320 static const struct dev_pm_ops arm_smmu_pm_ops = {
2321 	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
2322 	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
2323 			   arm_smmu_runtime_resume, NULL)
2324 };
2325 
2326 static struct platform_driver arm_smmu_driver = {
2327 	.driver	= {
2328 		.name			= "arm-smmu",
2329 		.of_match_table		= arm_smmu_of_match,
2330 		.pm			= &arm_smmu_pm_ops,
2331 		.suppress_bind_attrs    = true,
2332 	},
2333 	.probe	= arm_smmu_device_probe,
2334 	.remove	= arm_smmu_device_remove,
2335 	.shutdown = arm_smmu_device_shutdown,
2336 };
2337 module_platform_driver(arm_smmu_driver);
2338 
2339 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2340 MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2341 MODULE_ALIAS("platform:arm-smmu");
2342 MODULE_LICENSE("GPL v2");
2343