xref: /linux/drivers/misc/cxl/cxllib.c (revision 44f57d78)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright 2017 IBM Corp.
4  */
5 
6 #include <linux/hugetlb.h>
7 #include <linux/sched/mm.h>
8 #include <asm/pnv-pci.h>
9 #include <misc/cxllib.h>
10 
11 #include "cxl.h"
12 
13 #define CXL_INVALID_DRA                 ~0ull
14 #define CXL_DUMMY_READ_SIZE             128
15 #define CXL_DUMMY_READ_ALIGN            8
16 #define CXL_CAPI_WINDOW_START           0x2000000000000ull
17 #define CXL_CAPI_WINDOW_LOG_SIZE        48
18 #define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
19 
20 
21 bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
22 {
23 	int rc;
24 	u32 phb_index;
25 	u64 chip_id, capp_unit_id;
26 
27 	/* No flags currently supported */
28 	if (flags)
29 		return false;
30 
31 	if (!cpu_has_feature(CPU_FTR_HVMODE))
32 		return false;
33 
34 	if (!cxl_is_power9())
35 		return false;
36 
37 	if (cxl_slot_is_switched(dev))
38 		return false;
39 
40 	/* on p9, some pci slots are not connected to a CAPP unit */
41 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
42 	if (rc)
43 		return false;
44 
45 	return true;
46 }
47 EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
48 
49 static DEFINE_MUTEX(dra_mutex);
50 static u64 dummy_read_addr = CXL_INVALID_DRA;
51 
52 static int allocate_dummy_read_buf(void)
53 {
54 	u64 buf, vaddr;
55 	size_t buf_size;
56 
57 	/*
58 	 * Dummy read buffer is 128-byte long, aligned on a
59 	 * 256-byte boundary and we need the physical address.
60 	 */
61 	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
62 	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
63 	if (!buf)
64 		return -ENOMEM;
65 
66 	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
67 					(~0ull << CXL_DUMMY_READ_ALIGN);
68 
69 	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
70 		"Dummy read buffer alignment issue");
71 	dummy_read_addr = virt_to_phys((void *) vaddr);
72 	return 0;
73 }
74 
75 int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
76 {
77 	int rc;
78 	u32 phb_index;
79 	u64 chip_id, capp_unit_id;
80 
81 	if (!cpu_has_feature(CPU_FTR_HVMODE))
82 		return -EINVAL;
83 
84 	mutex_lock(&dra_mutex);
85 	if (dummy_read_addr == CXL_INVALID_DRA) {
86 		rc = allocate_dummy_read_buf();
87 		if (rc) {
88 			mutex_unlock(&dra_mutex);
89 			return rc;
90 		}
91 	}
92 	mutex_unlock(&dra_mutex);
93 
94 	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
95 	if (rc)
96 		return rc;
97 
98 	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
99 	if (rc)
100 		return rc;
101 
102 	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
103 	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
104 	cfg->bar_addr = CXL_CAPI_WINDOW_START;
105 	cfg->dra = dummy_read_addr;
106 	return 0;
107 }
108 EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
109 
110 int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
111 			unsigned long flags)
112 {
113 	int rc = 0;
114 
115 	if (!cpu_has_feature(CPU_FTR_HVMODE))
116 		return -EINVAL;
117 
118 	switch (mode) {
119 	case CXL_MODE_PCI:
120 		/*
121 		 * We currently don't support going back to PCI mode
122 		 * However, we'll turn the invalidations off, so that
123 		 * the firmware doesn't have to ack them and can do
124 		 * things like reset, etc.. with no worries.
125 		 * So always return EPERM (can't go back to PCI) or
126 		 * EBUSY if we couldn't even turn off snooping
127 		 */
128 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
129 		if (rc)
130 			rc = -EBUSY;
131 		else
132 			rc = -EPERM;
133 		break;
134 	case CXL_MODE_CXL:
135 		/* DMA only supported on TVT1 for the time being */
136 		if (flags != CXL_MODE_DMA_TVT1)
137 			return -EINVAL;
138 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
139 		if (rc)
140 			return rc;
141 		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
142 		break;
143 	default:
144 		rc = -EINVAL;
145 	}
146 	return rc;
147 }
148 EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
149 
150 /*
151  * When switching the PHB to capi mode, the TVT#1 entry for
152  * the Partitionable Endpoint is set in bypass mode, like
153  * in PCI mode.
154  * Configure the device dma to use TVT#1, which is done
155  * by calling dma_set_mask() with a mask large enough.
156  */
157 int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
158 {
159 	int rc;
160 
161 	if (flags)
162 		return -EINVAL;
163 
164 	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
165 	return rc;
166 }
167 EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
168 
169 int cxllib_get_PE_attributes(struct task_struct *task,
170 			     unsigned long translation_mode,
171 			     struct cxllib_pe_attributes *attr)
172 {
173 	struct mm_struct *mm = NULL;
174 
175 	if (translation_mode != CXL_TRANSLATED_MODE &&
176 		translation_mode != CXL_REAL_MODE)
177 		return -EINVAL;
178 
179 	attr->sr = cxl_calculate_sr(false,
180 				task == NULL,
181 				translation_mode == CXL_REAL_MODE,
182 				true);
183 	attr->lpid = mfspr(SPRN_LPID);
184 	if (task) {
185 		mm = get_task_mm(task);
186 		if (mm == NULL)
187 			return -EINVAL;
188 		/*
189 		 * Caller is keeping a reference on mm_users for as long
190 		 * as XSL uses the memory context
191 		 */
192 		attr->pid = mm->context.id;
193 		mmput(mm);
194 		attr->tid = task->thread.tidr;
195 	} else {
196 		attr->pid = 0;
197 		attr->tid = 0;
198 	}
199 	return 0;
200 }
201 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
202 
203 static int get_vma_info(struct mm_struct *mm, u64 addr,
204 			u64 *vma_start, u64 *vma_end,
205 			unsigned long *page_size)
206 {
207 	struct vm_area_struct *vma = NULL;
208 	int rc = 0;
209 
210 	down_read(&mm->mmap_sem);
211 
212 	vma = find_vma(mm, addr);
213 	if (!vma) {
214 		rc = -EFAULT;
215 		goto out;
216 	}
217 	*page_size = vma_kernel_pagesize(vma);
218 	*vma_start = vma->vm_start;
219 	*vma_end = vma->vm_end;
220 out:
221 	up_read(&mm->mmap_sem);
222 	return rc;
223 }
224 
225 int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
226 {
227 	int rc;
228 	u64 dar, vma_start, vma_end;
229 	unsigned long page_size;
230 
231 	if (mm == NULL)
232 		return -EFAULT;
233 
234 	/*
235 	 * The buffer we have to process can extend over several pages
236 	 * and may also cover several VMAs.
237 	 * We iterate over all the pages. The page size could vary
238 	 * between VMAs.
239 	 */
240 	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
241 	if (rc)
242 		return rc;
243 
244 	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
245 	     dar += page_size) {
246 		if (dar < vma_start || dar >= vma_end) {
247 			/*
248 			 * We don't hold the mm->mmap_sem semaphore
249 			 * while iterating, since the semaphore is
250 			 * required by one of the lower-level page
251 			 * fault processing functions and it could
252 			 * create a deadlock.
253 			 *
254 			 * It means the VMAs can be altered between 2
255 			 * loop iterations and we could theoretically
256 			 * miss a page (however unlikely). But that's
257 			 * not really a problem, as the driver will
258 			 * retry access, get another page fault on the
259 			 * missing page and call us again.
260 			 */
261 			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
262 					&page_size);
263 			if (rc)
264 				return rc;
265 		}
266 
267 		rc = cxl_handle_mm_fault(mm, flags, dar);
268 		if (rc)
269 			return -EFAULT;
270 	}
271 	return 0;
272 }
273 EXPORT_SYMBOL_GPL(cxllib_handle_fault);
274