xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_reloc.c (revision f3041bfa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI nexus DVMA relocation routines.
30  *
31  * These routines handle the interactions with the HAT layer to
32  * implement page relocation for page(s) which have active DMA handle
33  * bindings when DVMA is being used for those handles.
34  *
35  * The current modus operandi is as follows:
36  *
37  *   Object binding: register the appropriate callback for each page
38  *     of the kernel object while obtaining the PFN for the DVMA page.
39  *
40  *   Object unbinding: unregister the callback for each page of the
41  *     kernel object.
42  *
43  *   Relocation request:
44  *     1) Suspend the bus and sync the caches.
45  *     2) Remap the DVMA object using the new provided PFN.
46  *     3) Unsuspend the bus.
47  *
48  *  The relocation code runs with CPUs captured (idling in xc_loop())
49  *  so we can only acquire spinlocks at PIL >= 13 for synchronization
50  *  within those codepaths.
51  */
52 #include <sys/types.h>
53 #include <sys/kmem.h>
54 #include <sys/async.h>
55 #include <sys/sysmacros.h>
56 #include <sys/sunddi.h>
57 #include <sys/machsystm.h>
58 #include <sys/ddi_impldefs.h>
59 #include <sys/dvma.h>
60 #include <vm/hat.h>
61 #include <sys/pci/pci_obj.h>
62 
63 /*LINTLIBRARY*/
64 
65 void
66 pci_dvma_unregister_callbacks(pci_t *pci_p, ddi_dma_impl_t *mp)
67 {
68 	ddi_dma_obj_t *dobj_p = &mp->dmai_object;
69 	struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
70 	page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
71 	caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
72 	struct hat *hat_p;
73 	uint32_t offset;
74 	int i;
75 
76 	if (!PCI_DMA_CANRELOC(mp))
77 		return;
78 
79 	hat_p = (as_p == NULL)? kas.a_hat : as_p->a_hat;
80 	ASSERT(hat_p == kas.a_hat);
81 	ASSERT(pplist == NULL);
82 
83 	offset = mp->dmai_roffset;
84 	hat_delete_callback(vaddr, IOMMU_PAGE_SIZE - offset, mp, HAC_PAGELOCK,
85 	    MP_HAT_CB_COOKIE(mp, 0));
86 	vaddr = (caddr_t)(((uintptr_t)vaddr + IOMMU_PAGE_SIZE) &
87 	    IOMMU_PAGE_MASK);
88 	for (i = 1; i < mp->dmai_ndvmapages; i++) {
89 		hat_delete_callback(vaddr, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK,
90 		    MP_HAT_CB_COOKIE(mp, i));
91 		vaddr += IOMMU_PAGE_SIZE;
92 	}
93 	mp->dmai_flags &= ~DMAI_FLAGS_RELOC;
94 }
95 
96 static int
97 pci_dvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
98 	pfn_t newpfn)
99 {
100 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
101 	dev_info_t *rdip = mp->dmai_rdip;
102 	ddi_dma_obj_t *dobj_p = &mp->dmai_object;
103 	page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
104 	caddr_t baseva = dobj_p->dmao_obj.virt_obj.v_addr;
105 	int index;
106 	size_t length = IOMMU_PTOB(1);
107 	off_t offset;
108 
109 	DEBUG0(DBG_RELOC, rdip, "postrelocator called\n");
110 
111 	if (flags == HAT_POSTUNSUSPEND) {
112 		mutex_enter(&pci_reloc_mutex);
113 		ASSERT(pci_reloc_thread == curthread);
114 		ASSERT(pci_reloc_presuspend > 0);
115 		if (--pci_reloc_presuspend == 0) {
116 			pci_reloc_thread = NULL;
117 			cv_broadcast(&pci_reloc_cv);
118 		}
119 		mutex_exit(&pci_reloc_mutex);
120 		return (0);
121 	}
122 
123 	ASSERT(flags == HAT_UNSUSPEND);
124 	ASSERT(pci_reloc_suspend > 0);
125 	pci_reloc_suspend--;
126 
127 	ASSERT(len <= length);
128 	ASSERT(pplist == NULL);	/* addr bind handle only */
129 	ASSERT(dobj_p->dmao_obj.virt_obj.v_as == &kas ||
130 	    dobj_p->dmao_obj.virt_obj.v_as == NULL);
131 	ASSERT(PCI_DMA_ISDVMA(mp));
132 	ASSERT(pci_reloc_thread == curthread);
133 
134 	offset = va - baseva;
135 	index = IOMMU_BTOPR(offset);
136 	ASSERT(index < mp->dmai_ndvmapages);
137 
138 	DEBUG3(DBG_RELOC, rdip, "index 0x%x, vaddr 0x%llx, baseva 0x%llx\n",
139 	    index, (int64_t)va, (int64_t)baseva);
140 
141 	if ((mp)->dmai_ndvmapages == 1) {
142 		DEBUG2(DBG_RELOC, rdip, "pfn remap (1) 0x%x -> 0x%x\n",
143 		    mp->dmai_pfnlst, newpfn);
144 		    mp->dmai_pfnlst = (void *)newpfn;
145 	} else {
146 		DEBUG3(DBG_RELOC, rdip, "pfn remap (%d) 0x%x -> 0x%x\n",
147 		    index, ((iopfn_t *)mp->dmai_pfnlst)[index], newpfn);
148 		((iopfn_t *)mp->dmai_pfnlst)[index] = (iopfn_t)newpfn;
149 	}
150 
151 	if (ddi_dma_mctl(rdip, rdip, (ddi_dma_handle_t)mp, DDI_DMA_REMAP,
152 	    &offset, &length, NULL, 0) != DDI_SUCCESS)
153 		return (EIO);
154 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
155 	    DDI_SUCCESS)
156 		return (EIO);
157 
158 	return (0);
159 }
160 
161 /*
162  * Log a warning message if a callback is still registered on
163  * a page which is being freed.  This is indicative of a driver
164  * bug -- DMA handles are bound, and the memory is being freed by
165  * the VM subsystem without an unbind call on the handle first.
166  */
167 static int
168 pci_dma_relocerr(caddr_t va, uint_t len, uint_t errorcode, void *mpvoid)
169 {
170 	int errlevel = pci_dma_panic_on_leak? CE_PANIC : CE_WARN;
171 	if (errorcode == HAT_CB_ERR_LEAKED) {
172 		cmn_err(errlevel, "object 0x%p has a bound DMA handle 0x%p\n",
173 			va, mpvoid);
174 		return (0);
175 	}
176 
177 	/* unknown error code, unhandled so panic */
178 	return (EINVAL);
179 }
180 
181 /*
182  * pci DVMA remap entry points
183  *
184  * Called in response to a DDI_DMA_REMAP DMA ctlops command.
185  * Remaps the region specified in the underlying IOMMU. Safe
186  * to assume that the bus was quiesced and ddi_dma_sync() was
187  * invoked by the caller before we got to this point.
188  */
189 int
190 pci_dvma_remap(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
191 	off_t offset, size_t length)
192 {
193 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
194 	iommu_t *iommu_p = pci_p->pci_iommu_p;
195 	dvma_addr_t dvma_pg;
196 	size_t npgs;
197 	int idx;
198 
199 	dvma_pg = IOMMU_BTOP(mp->dmai_mapping);
200 	idx = IOMMU_BTOPR(offset);
201 	dvma_pg += idx;
202 	npgs = IOMMU_BTOPR(length);
203 
204 	DEBUG3(DBG_RELOC, mp->dmai_rdip,
205 	    "pci_dvma_remap: dvma_pg 0x%llx len 0x%llx idx 0x%x\n",
206 	    dvma_pg, length, idx);
207 
208 	ASSERT(pci_p->pci_pbm_p->pbm_quiesce_count > 0);
209 	iommu_remap_pages(iommu_p, mp, dvma_pg, npgs, idx);
210 
211 	return (DDI_SUCCESS);
212 }
213 
214 void
215 pci_fdvma_remap(ddi_dma_impl_t *mp, caddr_t kvaddr, dvma_addr_t dvma_pg,
216 	size_t npages, size_t index, pfn_t newpfn)
217 {
218 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
219 	pci_t *pci_p = (pci_t *)fdvma_p->softsp;
220 	iommu_t *iommu_p = pci_p->pci_iommu_p;
221 	dev_info_t *dip = pci_p->pci_dip;
222 	iopfn_t pfn = (iopfn_t)newpfn;
223 	dvma_addr_t pg_index = dvma_pg - iommu_p->dvma_base_pg;
224 	int i;
225 	uint64_t tte;
226 
227 	/* make sure we don't exceed reserved boundary */
228 	DEBUG3(DBG_FAST_DVMA, dip, "fast remap index=%x: %p, npgs=%x", index,
229 	    kvaddr, npages);
230 	if (index + npages > mp->dmai_ndvmapages) {
231 		cmn_err(pci_panic_on_fatal_errors ? CE_PANIC : CE_WARN,
232 			"%s%d: fdvma remap index(%lx)+pgs(%lx) exceeds limit\n",
233 			ddi_driver_name(dip), ddi_get_instance(dip),
234 			index, npages);
235 		return;
236 	}
237 
238 	for (i = 0; i < npages; i++, kvaddr += IOMMU_PAGE_SIZE) {
239 		DEBUG3(DBG_FAST_DVMA, dip, "remap dvma_pg %x -> pfn %x,"
240 		    " old tte 0x%llx\n", dvma_pg + i, pfn,
241 		    iommu_p->iommu_tsb_vaddr[pg_index + i]);
242 
243 		if (pfn == PFN_INVALID)
244 			goto bad_pfn;
245 
246 		if (i == 0)
247 			tte = MAKE_TTE_TEMPLATE(pfn, mp);
248 
249 		/* XXX assumes iommu and mmu has same page size */
250 		iommu_p->iommu_tsb_vaddr[pg_index + i] = tte | IOMMU_PTOB(pfn);
251 		IOMMU_PAGE_FLUSH(iommu_p, (dvma_pg + i));
252 	}
253 	return;
254 bad_pfn:
255 	cmn_err(CE_WARN, "%s%d: fdvma remap can't get page frame for vaddr %p",
256 		ddi_driver_name(dip), ddi_get_instance(dip), kvaddr);
257 }
258 
259 static int
260 pci_fdvma_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
261 {
262 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
263 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
264 	caddr_t baseva, endva;
265 	int i;
266 
267 	/*
268 	 * It isn't safe to do relocation if all of the IOMMU
269 	 * mappings haven't yet been established at this index.
270 	 */
271 	for (i = 0; i < mp->dmai_ndvmapages; i++) {
272 		baseva = fdvma_p->kvbase[i];
273 		endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);
274 		if (va >= baseva && va < endva)
275 			return (0);	/* found a valid index */
276 	}
277 	return (EAGAIN);
278 }
279 
280 static int
281 pci_fdvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
282 	pfn_t pfn)
283 {
284 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
285 	dev_info_t *rdip = mp->dmai_rdip;
286 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
287 	caddr_t baseva;
288 	dvma_addr_t dvma_pg;
289 	size_t length = PAGESIZE;
290 	int i;
291 
292 	DEBUG0(DBG_RELOC, rdip, "fdvma postrelocator called\n");
293 
294 	if (flags == HAT_POSTUNSUSPEND) {
295 		mutex_enter(&pci_reloc_mutex);
296 		ASSERT(pci_reloc_thread == curthread);
297 		if (--pci_reloc_presuspend == 0) {
298 			pci_reloc_thread = NULL;
299 			cv_broadcast(&pci_reloc_cv);
300 		}
301 		mutex_exit(&pci_reloc_mutex);
302 		return (0);
303 	}
304 
305 	pci_reloc_suspend--;
306 
307 	ASSERT(flags == HAT_UNSUSPEND);
308 	ASSERT(len <= length);
309 	ASSERT((mp->dmai_rflags & DMP_BYPASSNEXUS) != 0);
310 
311 	/*
312 	 * This virtual page can have multiple cookies that refer
313 	 * to it within the same handle. We must walk the whole
314 	 * table for this DMA handle finding all the cookies, and
315 	 * update all of them. Sigh.
316 	 */
317 	for (i = 0; i < mp->dmai_ndvmapages; i++) {
318 		caddr_t endva;
319 		int index;
320 
321 		baseva = fdvma_p->kvbase[i];
322 		endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);
323 
324 		if (va >= baseva && va < endva) {
325 			index = i + IOMMU_BTOP(va - baseva);
326 			ASSERT(index < mp->dmai_ndvmapages);
327 
328 			DEBUG4(DBG_RELOC, rdip, "mp %p: index 0x%x, "
329 			    " vaddr 0x%llx, baseva 0x%llx\n", mp, index,
330 			    (int64_t)va, (int64_t)baseva);
331 
332 			dvma_pg = IOMMU_BTOP(mp->dmai_mapping) + index;
333 			pci_fdvma_remap(mp, va, dvma_pg, IOMMU_BTOP(length),
334 			    index, pfn);
335 		}
336 	}
337 
338 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
339 	    DDI_SUCCESS)
340 		return (EIO);
341 
342 	return (0);
343 }
344 
345 void
346 pci_fdvma_unregister_callbacks(pci_t *pci_p, fdvma_t *fdvma_p,
347 	ddi_dma_impl_t *mp, uint_t index)
348 {
349 	size_t npgs = fdvma_p->pagecnt[index];
350 	caddr_t kva = fdvma_p->kvbase[index];
351 	int i;
352 
353 	ASSERT(index + npgs <= mp->dmai_ndvmapages);
354 	ASSERT(kva != NULL);
355 
356 	for (i = 0; i < npgs && pci_dvma_remap_enabled;
357 	    i++, kva += IOMMU_PAGE_SIZE)
358 		hat_delete_callback(kva, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK,
359 		    fdvma_p->cbcookie[index + i]);
360 }
361 
362 static int
363 pci_common_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
364 {
365 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
366 	ddi_dma_handle_t h = (ddi_dma_handle_t)mpvoid;
367 	dev_info_t *rdip = mp->dmai_rdip;
368 	int ret;
369 
370 	DEBUG0(DBG_RELOC, rdip, "prerelocator called\n");
371 
372 	if (flags == HAT_PRESUSPEND) {
373 		if (!ddi_prop_exists(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
374 		    "dvma-remap-supported"))
375 			return (ENOTSUP);
376 		if (!PCI_DMA_ISMAPPED(mp))
377 			return (EAGAIN);
378 
379 		if (mp->dmai_rflags & DMP_BYPASSNEXUS) {
380 			ret = pci_fdvma_prerelocator(va, len, flags, mpvoid);
381 			if (ret != 0)
382 				return (ret);
383 		} else if (!PCI_DMA_ISDVMA(mp))
384 			return (EINVAL);
385 
386 		/*
387 		 * Acquire the exclusive right to relocate a PCI DMA page,
388 		 * since we later have to pause CPUs which could otherwise
389 		 * lead to all sorts of synchronization headaches.
390 		 */
391 		mutex_enter(&pci_reloc_mutex);
392 		if (pci_reloc_thread != curthread) {
393 			while (pci_reloc_thread != NULL) {
394 				cv_wait(&pci_reloc_cv, &pci_reloc_mutex);
395 			}
396 			pci_reloc_thread = curthread;
397 			ASSERT(pci_reloc_suspend == 0);
398 		}
399 		mutex_exit(&pci_reloc_mutex);
400 
401 		ASSERT(pci_reloc_thread == curthread);
402 		pci_reloc_presuspend++;
403 
404 		return (0);
405 	}
406 
407 	ASSERT(flags == HAT_SUSPEND);
408 	ASSERT(PCI_DMA_CANRELOC(mp));
409 	ASSERT(pci_reloc_thread == curthread);
410 	pci_reloc_suspend++;
411 
412 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_QUIESCE, NULL, NULL) !=
413 	    DDI_SUCCESS)
414 		return (EIO);
415 	if (ddi_dma_sync(h, 0, 0, DDI_DMA_SYNC_FORKERNEL) != DDI_SUCCESS)
416 		return (EIO);
417 
418 	return (0);
419 }
420 
421 /*
422  * Register two callback types: one for normal DVMA and the
423  * other for fast DVMA, since each method has a different way
424  * of tracking the PFNs behind a handle.
425  */
426 void
427 pci_reloc_init(void)
428 {
429 	int key = pci_reloc_getkey();
430 
431 	mutex_init(&pci_reloc_mutex, NULL, MUTEX_DEFAULT, NULL);
432 	cv_init(&pci_reloc_cv, NULL, CV_DEFAULT, NULL);
433 	pci_dvma_cbid = hat_register_callback(
434 		key + ('D'<<24 | 'V'<<16 | 'M'<<8 | 'A'),
435 		pci_common_prerelocator, pci_dvma_postrelocator,
436 		pci_dma_relocerr, 1);
437 	pci_fast_dvma_cbid = hat_register_callback(
438 		key + ('F'<<24 | 'D'<<16 | 'M'<<8 | 'A'),
439 		pci_common_prerelocator,
440 		pci_fdvma_postrelocator, pci_dma_relocerr, 1);
441 }
442 
443 void
444 pci_reloc_fini(void)
445 {
446 	cv_destroy(&pci_reloc_cv);
447 	mutex_destroy(&pci_reloc_mutex);
448 }
449