xref: /freebsd/sys/dev/ioat/ioat.c (revision 7afbb263)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/bus.h>
33 #include <sys/conf.h>
34 #include <sys/ioccom.h>
35 #include <sys/kernel.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/mutex.h>
40 #include <sys/rman.h>
41 #include <sys/sysctl.h>
42 #include <sys/time.h>
43 #include <dev/pci/pcireg.h>
44 #include <dev/pci/pcivar.h>
45 #include <machine/bus.h>
46 #include <machine/resource.h>
47 #include <machine/stdarg.h>
48 
49 #include "ioat.h"
50 #include "ioat_hw.h"
51 #include "ioat_internal.h"
52 
53 static int ioat_probe(device_t device);
54 static int ioat_attach(device_t device);
55 static int ioat_detach(device_t device);
56 static int ioat_setup_intr(struct ioat_softc *ioat);
57 static int ioat_teardown_intr(struct ioat_softc *ioat);
58 static int ioat3_attach(device_t device);
59 static int ioat3_selftest(struct ioat_softc *ioat);
60 static int ioat_map_pci_bar(struct ioat_softc *ioat);
61 static void ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg,
62     int error);
63 static void ioat_interrupt_handler(void *arg);
64 static boolean_t ioat_model_resets_msix(struct ioat_softc *ioat);
65 static void ioat_process_events(struct ioat_softc *ioat);
66 static inline uint32_t ioat_get_active(struct ioat_softc *ioat);
67 static inline uint32_t ioat_get_ring_space(struct ioat_softc *ioat);
68 static void ioat_free_ring_entry(struct ioat_softc *ioat,
69     struct ioat_descriptor *desc);
70 static struct ioat_descriptor * ioat_alloc_ring_entry(struct ioat_softc *ioat);
71 static int ioat_reserve_space_and_lock(struct ioat_softc *ioat, int num_descs);
72 static struct ioat_descriptor * ioat_get_ring_entry(struct ioat_softc *ioat,
73     uint32_t index);
74 static boolean_t resize_ring(struct ioat_softc *ioat, int order);
75 static void ioat_timer_callback(void *arg);
76 static void dump_descriptor(void *hw_desc);
77 static void ioat_submit_single(struct ioat_softc *ioat);
78 static void ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg,
79     int error);
80 static int ioat_reset_hw(struct ioat_softc *ioat);
81 static void ioat_setup_sysctl(device_t device);
82 
83 MALLOC_DEFINE(M_IOAT, "ioat", "ioat driver memory allocations");
84 SYSCTL_NODE(_hw, OID_AUTO, ioat, CTLFLAG_RD, 0, "ioat node");
85 
86 static int g_force_legacy_interrupts;
87 SYSCTL_INT(_hw_ioat, OID_AUTO, force_legacy_interrupts, CTLFLAG_RDTUN,
88     &g_force_legacy_interrupts, 0, "Set to non-zero to force MSI-X disabled");
89 
90 static int g_ioat_debug_level = 0;
91 SYSCTL_INT(_hw_ioat, OID_AUTO, debug_level, CTLFLAG_RWTUN, &g_ioat_debug_level,
92     0, "Set log level (0-3) for ioat(4). Higher is more verbose.");
93 
94 /*
95  * OS <-> Driver interface structures
96  */
97 static device_method_t ioat_pci_methods[] = {
98 	/* Device interface */
99 	DEVMETHOD(device_probe,     ioat_probe),
100 	DEVMETHOD(device_attach,    ioat_attach),
101 	DEVMETHOD(device_detach,    ioat_detach),
102 	{ 0, 0 }
103 };
104 
105 static driver_t ioat_pci_driver = {
106 	"ioat",
107 	ioat_pci_methods,
108 	sizeof(struct ioat_softc),
109 };
110 
111 static devclass_t ioat_devclass;
112 DRIVER_MODULE(ioat, pci, ioat_pci_driver, ioat_devclass, 0, 0);
113 
114 /*
115  * Private data structures
116  */
117 static struct ioat_softc *ioat_channel[IOAT_MAX_CHANNELS];
118 static int ioat_channel_index = 0;
119 SYSCTL_INT(_hw_ioat, OID_AUTO, channels, CTLFLAG_RD, &ioat_channel_index, 0,
120     "Number of IOAT channels attached");
121 
122 static struct _pcsid
123 {
124 	u_int32_t   type;
125 	const char  *desc;
126 } pci_ids[] = {
127 	{ 0x34308086, "TBG IOAT Ch0" },
128 	{ 0x34318086, "TBG IOAT Ch1" },
129 	{ 0x34328086, "TBG IOAT Ch2" },
130 	{ 0x34338086, "TBG IOAT Ch3" },
131 	{ 0x34298086, "TBG IOAT Ch4" },
132 	{ 0x342a8086, "TBG IOAT Ch5" },
133 	{ 0x342b8086, "TBG IOAT Ch6" },
134 	{ 0x342c8086, "TBG IOAT Ch7" },
135 
136 	{ 0x37108086, "JSF IOAT Ch0" },
137 	{ 0x37118086, "JSF IOAT Ch1" },
138 	{ 0x37128086, "JSF IOAT Ch2" },
139 	{ 0x37138086, "JSF IOAT Ch3" },
140 	{ 0x37148086, "JSF IOAT Ch4" },
141 	{ 0x37158086, "JSF IOAT Ch5" },
142 	{ 0x37168086, "JSF IOAT Ch6" },
143 	{ 0x37178086, "JSF IOAT Ch7" },
144 	{ 0x37188086, "JSF IOAT Ch0 (RAID)" },
145 	{ 0x37198086, "JSF IOAT Ch1 (RAID)" },
146 
147 	{ 0x3c208086, "SNB IOAT Ch0" },
148 	{ 0x3c218086, "SNB IOAT Ch1" },
149 	{ 0x3c228086, "SNB IOAT Ch2" },
150 	{ 0x3c238086, "SNB IOAT Ch3" },
151 	{ 0x3c248086, "SNB IOAT Ch4" },
152 	{ 0x3c258086, "SNB IOAT Ch5" },
153 	{ 0x3c268086, "SNB IOAT Ch6" },
154 	{ 0x3c278086, "SNB IOAT Ch7" },
155 	{ 0x3c2e8086, "SNB IOAT Ch0 (RAID)" },
156 	{ 0x3c2f8086, "SNB IOAT Ch1 (RAID)" },
157 
158 	{ 0x0e208086, "IVB IOAT Ch0" },
159 	{ 0x0e218086, "IVB IOAT Ch1" },
160 	{ 0x0e228086, "IVB IOAT Ch2" },
161 	{ 0x0e238086, "IVB IOAT Ch3" },
162 	{ 0x0e248086, "IVB IOAT Ch4" },
163 	{ 0x0e258086, "IVB IOAT Ch5" },
164 	{ 0x0e268086, "IVB IOAT Ch6" },
165 	{ 0x0e278086, "IVB IOAT Ch7" },
166 	{ 0x0e2e8086, "IVB IOAT Ch0 (RAID)" },
167 	{ 0x0e2f8086, "IVB IOAT Ch1 (RAID)" },
168 
169 	{ 0x2f208086, "HSW IOAT Ch0" },
170 	{ 0x2f218086, "HSW IOAT Ch1" },
171 	{ 0x2f228086, "HSW IOAT Ch2" },
172 	{ 0x2f238086, "HSW IOAT Ch3" },
173 	{ 0x2f248086, "HSW IOAT Ch4" },
174 	{ 0x2f258086, "HSW IOAT Ch5" },
175 	{ 0x2f268086, "HSW IOAT Ch6" },
176 	{ 0x2f278086, "HSW IOAT Ch7" },
177 	{ 0x2f2e8086, "HSW IOAT Ch0 (RAID)" },
178 	{ 0x2f2f8086, "HSW IOAT Ch1 (RAID)" },
179 
180 	{ 0x0c508086, "BWD IOAT Ch0" },
181 	{ 0x0c518086, "BWD IOAT Ch1" },
182 	{ 0x0c528086, "BWD IOAT Ch2" },
183 	{ 0x0c538086, "BWD IOAT Ch3" },
184 
185 	{ 0x6f508086, "BDXDE IOAT Ch0" },
186 	{ 0x6f518086, "BDXDE IOAT Ch1" },
187 	{ 0x6f528086, "BDXDE IOAT Ch2" },
188 	{ 0x6f538086, "BDXDE IOAT Ch3" },
189 
190 	{ 0x00000000, NULL           }
191 };
192 
193 /*
194  * OS <-> Driver linkage functions
195  */
196 static int
197 ioat_probe(device_t device)
198 {
199 	struct _pcsid *ep;
200 	u_int32_t type;
201 
202 	type = pci_get_devid(device);
203 	for (ep = pci_ids; ep->type; ep++) {
204 		if (ep->type == type) {
205 			device_set_desc(device, ep->desc);
206 			return (0);
207 		}
208 	}
209 	return (ENXIO);
210 }
211 
212 static int
213 ioat_attach(device_t device)
214 {
215 	struct ioat_softc *ioat;
216 	int error;
217 
218 	ioat = DEVICE2SOFTC(device);
219 	ioat->device = device;
220 
221 	error = ioat_map_pci_bar(ioat);
222 	if (error != 0)
223 		goto err;
224 
225 	ioat->version = ioat_read_cbver(ioat);
226 	if (ioat->version < IOAT_VER_3_0) {
227 		error = ENODEV;
228 		goto err;
229 	}
230 
231 	error = ioat_setup_intr(ioat);
232 	if (error != 0)
233 		return (error);
234 
235 	error = ioat3_attach(device);
236 	if (error != 0)
237 		goto err;
238 
239 	error = pci_enable_busmaster(device);
240 	if (error != 0)
241 		goto err;
242 
243 	error = ioat3_selftest(ioat);
244 	if (error != 0)
245 		return (error);
246 
247 	ioat_process_events(ioat);
248 	ioat_setup_sysctl(device);
249 
250 	ioat_channel[ioat_channel_index++] = ioat;
251 	ioat_test_attach();
252 
253 err:
254 	if (error != 0)
255 		ioat_detach(device);
256 	return (error);
257 }
258 
259 static int
260 ioat_detach(device_t device)
261 {
262 	struct ioat_softc *ioat;
263 	uint32_t i;
264 
265 	ioat = DEVICE2SOFTC(device);
266 
267 	ioat_test_detach();
268 	callout_drain(&ioat->timer);
269 
270 	pci_disable_busmaster(device);
271 
272 	if (ioat->pci_resource != NULL)
273 		bus_release_resource(device, SYS_RES_MEMORY,
274 		    ioat->pci_resource_id, ioat->pci_resource);
275 
276 	if (ioat->ring != NULL) {
277 		for (i = 0; i < (1 << ioat->ring_size_order); i++)
278 			ioat_free_ring_entry(ioat, ioat->ring[i]);
279 		free(ioat->ring, M_IOAT);
280 	}
281 
282 	if (ioat->comp_update != NULL) {
283 		bus_dmamap_unload(ioat->comp_update_tag, ioat->comp_update_map);
284 		bus_dmamem_free(ioat->comp_update_tag, ioat->comp_update,
285 		    ioat->comp_update_map);
286 		bus_dma_tag_destroy(ioat->comp_update_tag);
287 	}
288 
289 	bus_dma_tag_destroy(ioat->hw_desc_tag);
290 
291 	ioat_teardown_intr(ioat);
292 
293 	return (0);
294 }
295 
296 static int
297 ioat_teardown_intr(struct ioat_softc *ioat)
298 {
299 
300 	if (ioat->tag != NULL)
301 		bus_teardown_intr(ioat->device, ioat->res, ioat->tag);
302 
303 	if (ioat->res != NULL)
304 		bus_release_resource(ioat->device, SYS_RES_IRQ,
305 		    rman_get_rid(ioat->res), ioat->res);
306 
307 	pci_release_msi(ioat->device);
308 	return (0);
309 }
310 
311 static int
312 ioat3_selftest(struct ioat_softc *ioat)
313 {
314 	uint64_t status;
315 	uint32_t chanerr;
316 	int i;
317 
318 	ioat_acquire(&ioat->dmaengine);
319 	ioat_null(&ioat->dmaengine, NULL, NULL, 0);
320 	ioat_release(&ioat->dmaengine);
321 
322 	for (i = 0; i < 100; i++) {
323 		DELAY(1);
324 		status = ioat_get_chansts(ioat);
325 		if (is_ioat_idle(status))
326 			return (0);
327 	}
328 
329 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
330 	ioat_log_message(0, "could not start channel: "
331 	    "status = %#jx error = %x\n", (uintmax_t)status, chanerr);
332 	return (ENXIO);
333 }
334 
335 /*
336  * Initialize Hardware
337  */
338 static int
339 ioat3_attach(device_t device)
340 {
341 	struct ioat_softc *ioat;
342 	struct ioat_descriptor **ring;
343 	struct ioat_descriptor *next;
344 	struct ioat_dma_hw_descriptor *dma_hw_desc;
345 	uint32_t capabilities;
346 	int i, num_descriptors;
347 	int error;
348 	uint8_t xfercap;
349 
350 	error = 0;
351 	ioat = DEVICE2SOFTC(device);
352 	capabilities = ioat_read_dmacapability(ioat);
353 
354 	xfercap = ioat_read_xfercap(ioat);
355 	ioat->max_xfer_size = 1 << xfercap;
356 
357 	/* TODO: need to check DCA here if we ever do XOR/PQ */
358 
359 	mtx_init(&ioat->submit_lock, "ioat_submit", NULL, MTX_DEF);
360 	mtx_init(&ioat->cleanup_lock, "ioat_process_events", NULL, MTX_DEF);
361 	callout_init(&ioat->timer, 1);
362 
363 	ioat->is_resize_pending = FALSE;
364 	ioat->is_completion_pending = FALSE;
365 	ioat->is_reset_pending = FALSE;
366 	ioat->is_channel_running = FALSE;
367 	ioat->is_waiting_for_ack = FALSE;
368 
369 	bus_dma_tag_create(bus_get_dma_tag(ioat->device), sizeof(uint64_t), 0x0,
370 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
371 	    sizeof(uint64_t), 1, sizeof(uint64_t), 0, NULL, NULL,
372 	    &ioat->comp_update_tag);
373 
374 	error = bus_dmamem_alloc(ioat->comp_update_tag,
375 	    (void **)&ioat->comp_update, BUS_DMA_ZERO, &ioat->comp_update_map);
376 	if (ioat->comp_update == NULL)
377 		return (ENOMEM);
378 
379 	error = bus_dmamap_load(ioat->comp_update_tag, ioat->comp_update_map,
380 	    ioat->comp_update, sizeof(uint64_t), ioat_comp_update_map, ioat,
381 	    0);
382 	if (error != 0)
383 		return (error);
384 
385 	ioat->ring_size_order = IOAT_MIN_ORDER;
386 
387 	num_descriptors = 1 << ioat->ring_size_order;
388 
389 	bus_dma_tag_create(bus_get_dma_tag(ioat->device), 0x40, 0x0,
390 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
391 	    sizeof(struct ioat_dma_hw_descriptor), 1,
392 	    sizeof(struct ioat_dma_hw_descriptor), 0, NULL, NULL,
393 	    &ioat->hw_desc_tag);
394 
395 	ioat->ring = malloc(num_descriptors * sizeof(*ring), M_IOAT,
396 	    M_ZERO | M_NOWAIT);
397 	if (ioat->ring == NULL)
398 		return (ENOMEM);
399 
400 	ring = ioat->ring;
401 	for (i = 0; i < num_descriptors; i++) {
402 		ring[i] = ioat_alloc_ring_entry(ioat);
403 		if (ring[i] == NULL)
404 			return (ENOMEM);
405 
406 		ring[i]->id = i;
407 	}
408 
409 	for (i = 0; i < num_descriptors - 1; i++) {
410 		next = ring[i + 1];
411 		dma_hw_desc = ring[i]->u.dma;
412 
413 		dma_hw_desc->next = next->hw_desc_bus_addr;
414 	}
415 
416 	ring[i]->u.dma->next = ring[0]->hw_desc_bus_addr;
417 
418 	ioat->head = 0;
419 	ioat->tail = 0;
420 	ioat->last_seen = 0;
421 
422 	error = ioat_reset_hw(ioat);
423 	if (error != 0)
424 		return (error);
425 
426 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
427 	ioat_write_chancmp(ioat, ioat->comp_update_bus_addr);
428 	ioat_write_chainaddr(ioat, ring[0]->hw_desc_bus_addr);
429 	return (0);
430 }
431 
432 static int
433 ioat_map_pci_bar(struct ioat_softc *ioat)
434 {
435 
436 	ioat->pci_resource_id = PCIR_BAR(0);
437 	ioat->pci_resource = bus_alloc_resource(ioat->device, SYS_RES_MEMORY,
438 	    &ioat->pci_resource_id, 0, ~0, 1, RF_ACTIVE);
439 
440 	if (ioat->pci_resource == NULL) {
441 		ioat_log_message(0, "unable to allocate pci resource\n");
442 		return (ENODEV);
443 	}
444 
445 	ioat->pci_bus_tag = rman_get_bustag(ioat->pci_resource);
446 	ioat->pci_bus_handle = rman_get_bushandle(ioat->pci_resource);
447 	return (0);
448 }
449 
450 static void
451 ioat_comp_update_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
452 {
453 	struct ioat_softc *ioat = arg;
454 
455 	ioat->comp_update_bus_addr = seg[0].ds_addr;
456 }
457 
458 static void
459 ioat_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
460 {
461 	bus_addr_t *baddr;
462 
463 	baddr = arg;
464 	*baddr = segs->ds_addr;
465 }
466 
467 /*
468  * Interrupt setup and handlers
469  */
470 static int
471 ioat_setup_intr(struct ioat_softc *ioat)
472 {
473 	uint32_t num_vectors;
474 	int error;
475 	boolean_t use_msix;
476 	boolean_t force_legacy_interrupts;
477 
478 	use_msix = FALSE;
479 	force_legacy_interrupts = FALSE;
480 
481 	if (!g_force_legacy_interrupts && pci_msix_count(ioat->device) >= 1) {
482 		num_vectors = 1;
483 		pci_alloc_msix(ioat->device, &num_vectors);
484 		if (num_vectors == 1)
485 			use_msix = TRUE;
486 	}
487 
488 	if (use_msix) {
489 		ioat->rid = 1;
490 		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
491 		    &ioat->rid, RF_ACTIVE);
492 	} else {
493 		ioat->rid = 0;
494 		ioat->res = bus_alloc_resource_any(ioat->device, SYS_RES_IRQ,
495 		    &ioat->rid, RF_SHAREABLE | RF_ACTIVE);
496 	}
497 	if (ioat->res == NULL) {
498 		ioat_log_message(0, "bus_alloc_resource failed\n");
499 		return (ENOMEM);
500 	}
501 
502 	ioat->tag = NULL;
503 	error = bus_setup_intr(ioat->device, ioat->res, INTR_MPSAFE |
504 	    INTR_TYPE_MISC, NULL, ioat_interrupt_handler, ioat, &ioat->tag);
505 	if (error != 0) {
506 		ioat_log_message(0, "bus_setup_intr failed\n");
507 		return (error);
508 	}
509 
510 	ioat_write_intrctrl(ioat, IOAT_INTRCTRL_MASTER_INT_EN);
511 	return (0);
512 }
513 
514 static boolean_t
515 ioat_model_resets_msix(struct ioat_softc *ioat)
516 {
517 	u_int32_t pciid;
518 
519 	pciid = pci_get_devid(ioat->device);
520 	switch (pciid) {
521 		/* BWD: */
522 	case 0x0c508086:
523 	case 0x0c518086:
524 	case 0x0c528086:
525 	case 0x0c538086:
526 		/* BDXDE: */
527 	case 0x6f508086:
528 	case 0x6f518086:
529 	case 0x6f528086:
530 	case 0x6f538086:
531 		return (TRUE);
532 	}
533 
534 	return (FALSE);
535 }
536 
537 static void
538 ioat_interrupt_handler(void *arg)
539 {
540 	struct ioat_softc *ioat = arg;
541 
542 	ioat_process_events(ioat);
543 }
544 
545 static void
546 ioat_process_events(struct ioat_softc *ioat)
547 {
548 	struct ioat_descriptor *desc;
549 	struct bus_dmadesc *dmadesc;
550 	uint64_t comp_update, status;
551 	uint32_t completed;
552 
553 	mtx_lock(&ioat->cleanup_lock);
554 
555 	completed = 0;
556 	comp_update = *ioat->comp_update;
557 	status = comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK;
558 
559 	ioat_log_message(3, "%s\n", __func__);
560 
561 	if (status == ioat->last_seen) {
562 	 	mtx_unlock(&ioat->cleanup_lock);
563 		return;
564 	}
565 
566 	while (1) {
567 		desc = ioat_get_ring_entry(ioat, ioat->tail);
568 		dmadesc = &desc->bus_dmadesc;
569 		ioat_log_message(3, "completing desc %d\n", ioat->tail);
570 
571 		if (dmadesc->callback_fn)
572 			(*dmadesc->callback_fn)(dmadesc->callback_arg);
573 
574 		ioat->tail++;
575 		if (desc->hw_desc_bus_addr == status)
576 			break;
577 	}
578 
579 	ioat->last_seen = desc->hw_desc_bus_addr;
580 
581 	if (ioat->head == ioat->tail) {
582 		ioat->is_completion_pending = FALSE;
583 		callout_reset(&ioat->timer, 5 * hz, ioat_timer_callback, ioat);
584 	}
585 
586 	ioat_write_chanctrl(ioat, IOAT_CHANCTRL_RUN);
587 	mtx_unlock(&ioat->cleanup_lock);
588 }
589 
590 /*
591  * User API functions
592  */
593 bus_dmaengine_t
594 ioat_get_dmaengine(uint32_t index)
595 {
596 
597 	if (index < ioat_channel_index)
598 		return (&ioat_channel[index]->dmaengine);
599 	return (NULL);
600 }
601 
602 void
603 ioat_acquire(bus_dmaengine_t dmaengine)
604 {
605 	struct ioat_softc *ioat;
606 
607 	ioat = to_ioat_softc(dmaengine);
608 	mtx_lock(&ioat->submit_lock);
609 	ioat_log_message(3, "%s\n", __func__);
610 }
611 
612 void
613 ioat_release(bus_dmaengine_t dmaengine)
614 {
615 	struct ioat_softc *ioat;
616 
617 	ioat_log_message(3, "%s\n", __func__);
618 	ioat = to_ioat_softc(dmaengine);
619 	ioat_write_2(ioat, IOAT_DMACOUNT_OFFSET, (uint16_t)ioat->head);
620 	mtx_unlock(&ioat->submit_lock);
621 }
622 
623 struct bus_dmadesc *
624 ioat_null(bus_dmaengine_t dmaengine, bus_dmaengine_callback_t callback_fn,
625     void *callback_arg, uint32_t flags)
626 {
627 	struct ioat_softc *ioat;
628 	struct ioat_descriptor *desc;
629 	struct ioat_dma_hw_descriptor *hw_desc;
630 
631 	KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x",
632 		flags & ~DMA_ALL_FLAGS));
633 
634 	ioat = to_ioat_softc(dmaengine);
635 
636 	if (ioat_reserve_space_and_lock(ioat, 1) != 0)
637 		return (NULL);
638 
639 	ioat_log_message(3, "%s\n", __func__);
640 
641 	desc = ioat_get_ring_entry(ioat, ioat->head);
642 	hw_desc = desc->u.dma;
643 
644 	hw_desc->u.control_raw = 0;
645 	hw_desc->u.control.null = 1;
646 	hw_desc->u.control.completion_update = 1;
647 
648 	if ((flags & DMA_INT_EN) != 0)
649 		hw_desc->u.control.int_enable = 1;
650 
651 	hw_desc->size = 8;
652 	hw_desc->src_addr = 0;
653 	hw_desc->dest_addr = 0;
654 
655 	desc->bus_dmadesc.callback_fn = callback_fn;
656 	desc->bus_dmadesc.callback_arg = callback_arg;
657 
658 	ioat_submit_single(ioat);
659 	return (&desc->bus_dmadesc);
660 }
661 
662 struct bus_dmadesc *
663 ioat_copy(bus_dmaengine_t dmaengine, bus_addr_t dst,
664     bus_addr_t src, bus_size_t len, bus_dmaengine_callback_t callback_fn,
665     void *callback_arg, uint32_t flags)
666 {
667 	struct ioat_descriptor *desc;
668 	struct ioat_dma_hw_descriptor *hw_desc;
669 	struct ioat_softc *ioat;
670 
671 	KASSERT((flags & ~DMA_ALL_FLAGS) == 0, ("Unrecognized flag(s): %#x",
672 		flags & ~DMA_ALL_FLAGS));
673 
674 	ioat = to_ioat_softc(dmaengine);
675 
676 	if (len > ioat->max_xfer_size) {
677 		ioat_log_message(0, "%s: max_xfer_size = %d, requested = %d\n",
678 		    __func__, ioat->max_xfer_size, (int)len);
679 		return (NULL);
680 	}
681 
682 	if (ioat_reserve_space_and_lock(ioat, 1) != 0)
683 		return (NULL);
684 
685 	ioat_log_message(3, "%s\n", __func__);
686 
687 	desc = ioat_get_ring_entry(ioat, ioat->head);
688 	hw_desc = desc->u.dma;
689 
690 	hw_desc->u.control_raw = 0;
691 	hw_desc->u.control.completion_update = 1;
692 
693 	if ((flags & DMA_INT_EN) != 0)
694 		hw_desc->u.control.int_enable = 1;
695 
696 	hw_desc->size = len;
697 	hw_desc->src_addr = src;
698 	hw_desc->dest_addr = dst;
699 
700 	if (g_ioat_debug_level >= 3)
701 		dump_descriptor(hw_desc);
702 
703 	desc->bus_dmadesc.callback_fn = callback_fn;
704 	desc->bus_dmadesc.callback_arg = callback_arg;
705 
706 	ioat_submit_single(ioat);
707 	return (&desc->bus_dmadesc);
708 }
709 
710 /*
711  * Ring Management
712  */
713 static inline uint32_t
714 ioat_get_active(struct ioat_softc *ioat)
715 {
716 
717 	return ((ioat->head - ioat->tail) & ((1 << ioat->ring_size_order) - 1));
718 }
719 
720 static inline uint32_t
721 ioat_get_ring_space(struct ioat_softc *ioat)
722 {
723 
724 	return ((1 << ioat->ring_size_order) - ioat_get_active(ioat) - 1);
725 }
726 
727 static struct ioat_descriptor *
728 ioat_alloc_ring_entry(struct ioat_softc *ioat)
729 {
730 	struct ioat_dma_hw_descriptor *hw_desc;
731 	struct ioat_descriptor *desc;
732 
733 	desc = malloc(sizeof(struct ioat_descriptor), M_IOAT, M_NOWAIT);
734 	if (desc == NULL)
735 		return (NULL);
736 
737 	bus_dmamem_alloc(ioat->hw_desc_tag, (void **)&hw_desc, BUS_DMA_ZERO,
738 	    &ioat->hw_desc_map);
739 	if (hw_desc == NULL) {
740 		free(desc, M_IOAT);
741 		return (NULL);
742 	}
743 
744 	bus_dmamap_load(ioat->hw_desc_tag, ioat->hw_desc_map, hw_desc,
745 	    sizeof(*hw_desc), ioat_dmamap_cb, &desc->hw_desc_bus_addr, 0);
746 
747 	desc->u.dma = hw_desc;
748 	return (desc);
749 }
750 
751 static void
752 ioat_free_ring_entry(struct ioat_softc *ioat, struct ioat_descriptor *desc)
753 {
754 
755 	if (desc == NULL)
756 		return;
757 
758 	if (desc->u.dma)
759 		bus_dmamem_free(ioat->hw_desc_tag, desc->u.dma,
760 		    ioat->hw_desc_map);
761 	free(desc, M_IOAT);
762 }
763 
764 static int
765 ioat_reserve_space_and_lock(struct ioat_softc *ioat, int num_descs)
766 {
767 	boolean_t retry;
768 
769 	while (1) {
770 		if (ioat_get_ring_space(ioat) >= num_descs)
771 			return (0);
772 
773 		mtx_lock(&ioat->cleanup_lock);
774 		retry = resize_ring(ioat, ioat->ring_size_order + 1);
775 		mtx_unlock(&ioat->cleanup_lock);
776 
777 		if (!retry)
778 			return (ENOMEM);
779 	}
780 }
781 
782 static struct ioat_descriptor *
783 ioat_get_ring_entry(struct ioat_softc *ioat, uint32_t index)
784 {
785 
786 	return (ioat->ring[index % (1 << ioat->ring_size_order)]);
787 }
788 
789 static boolean_t
790 resize_ring(struct ioat_softc *ioat, int order)
791 {
792 	struct ioat_descriptor **ring;
793 	struct ioat_descriptor *next;
794 	struct ioat_dma_hw_descriptor *hw;
795 	struct ioat_descriptor *ent;
796 	uint32_t current_size, active, new_size, i, new_idx, current_idx;
797 	uint32_t new_idx2;
798 
799 	current_size = 1 << ioat->ring_size_order;
800 	active = (ioat->head - ioat->tail) & (current_size - 1);
801 	new_size = 1 << order;
802 
803 	if (order > IOAT_MAX_ORDER)
804 		return (FALSE);
805 
806 	/*
807 	 * when shrinking, verify that we can hold the current active
808 	 * set in the new ring
809 	 */
810 	if (active >= new_size)
811 		return (FALSE);
812 
813 	/* allocate the array to hold the software ring */
814 	ring = malloc(new_size * sizeof(*ring), M_IOAT, M_ZERO | M_NOWAIT);
815 	if (ring == NULL)
816 		return (FALSE);
817 
818 	ioat_log_message(2, "ring resize: new: %d old: %d\n",
819 	    new_size, current_size);
820 
821 	/* allocate/trim descriptors as needed */
822 	if (new_size > current_size) {
823 		/* copy current descriptors to the new ring */
824 		for (i = 0; i < current_size; i++) {
825 			current_idx = (ioat->tail + i) & (current_size - 1);
826 			new_idx = (ioat->tail + i) & (new_size - 1);
827 
828 			ring[new_idx] = ioat->ring[current_idx];
829 			ring[new_idx]->id = new_idx;
830 		}
831 
832 		/* add new descriptors to the ring */
833 		for (i = current_size; i < new_size; i++) {
834 			new_idx = (ioat->tail + i) & (new_size - 1);
835 
836 			ring[new_idx] = ioat_alloc_ring_entry(ioat);
837 			if (ring[new_idx] == NULL) {
838 				while (i--) {
839 					new_idx2 = (ioat->tail + i) &
840 					    (new_size - 1);
841 
842 					ioat_free_ring_entry(ioat,
843 					    ring[new_idx2]);
844 				}
845 				free(ring, M_IOAT);
846 				return (FALSE);
847 			}
848 			ring[new_idx]->id = new_idx;
849 		}
850 
851 		for (i = current_size - 1; i < new_size; i++) {
852 			new_idx = (ioat->tail + i) & (new_size - 1);
853 			next = ring[(new_idx + 1) & (new_size - 1)];
854 			hw = ring[new_idx]->u.dma;
855 
856 			hw->next = next->hw_desc_bus_addr;
857 		}
858 	} else {
859 		/*
860 		 * copy current descriptors to the new ring, dropping the
861 		 * removed descriptors
862 		 */
863 		for (i = 0; i < new_size; i++) {
864 			current_idx = (ioat->tail + i) & (current_size - 1);
865 			new_idx = (ioat->tail + i) & (new_size - 1);
866 
867 			ring[new_idx] = ioat->ring[current_idx];
868 			ring[new_idx]->id = new_idx;
869 		}
870 
871 		/* free deleted descriptors */
872 		for (i = new_size; i < current_size; i++) {
873 			ent = ioat_get_ring_entry(ioat, ioat->tail + i);
874 			ioat_free_ring_entry(ioat, ent);
875 		}
876 
877 		/* fix up hardware ring */
878 		hw = ring[(ioat->tail + new_size - 1) & (new_size - 1)]->u.dma;
879 		next = ring[(ioat->tail + new_size) & (new_size - 1)];
880 		hw->next = next->hw_desc_bus_addr;
881 	}
882 
883 	free(ioat->ring, M_IOAT);
884 	ioat->ring = ring;
885 	ioat->ring_size_order = order;
886 
887 	return (TRUE);
888 }
889 
890 static void
891 ioat_timer_callback(void *arg)
892 {
893 	struct ioat_descriptor *desc;
894 	struct ioat_softc *ioat;
895 	uint64_t status;
896 	uint32_t chanerr;
897 
898 	ioat = arg;
899 	ioat_log_message(2, "%s\n", __func__);
900 
901 	if (ioat->is_completion_pending) {
902 		status = ioat_get_chansts(ioat);
903 
904 		/*
905 		 * When halted due to errors, check for channel programming
906 		 * errors before advancing the completion state.
907 		 */
908 		if (is_ioat_halted(status)) {
909 			chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
910 			ioat_log_message(0, "Channel halted (%x)\n", chanerr);
911 
912 			desc = ioat_get_ring_entry(ioat, ioat->tail + 0);
913 			dump_descriptor(desc->u.raw);
914 
915 			desc = ioat_get_ring_entry(ioat, ioat->tail + 1);
916 			dump_descriptor(desc->u.raw);
917 		}
918 		ioat_process_events(ioat);
919 	} else {
920 		mtx_lock(&ioat->submit_lock);
921 		mtx_lock(&ioat->cleanup_lock);
922 
923 		if (ioat_get_active(ioat) == 0 &&
924 		    ioat->ring_size_order > IOAT_MIN_ORDER)
925 			resize_ring(ioat, ioat->ring_size_order - 1);
926 
927 		mtx_unlock(&ioat->cleanup_lock);
928 		mtx_unlock(&ioat->submit_lock);
929 
930 		if (ioat->ring_size_order > IOAT_MIN_ORDER)
931 			callout_reset(&ioat->timer, 5 * hz,
932 			    ioat_timer_callback, ioat);
933 	}
934 }
935 
936 /*
937  * Support Functions
938  */
939 static void
940 ioat_submit_single(struct ioat_softc *ioat)
941 {
942 
943 	atomic_add_rel_int(&ioat->head, 1);
944 
945 	if (!ioat->is_completion_pending) {
946 		ioat->is_completion_pending = TRUE;
947 		callout_reset(&ioat->timer, 10 * hz, ioat_timer_callback,
948 		    ioat);
949 	}
950 }
951 
952 static int
953 ioat_reset_hw(struct ioat_softc *ioat)
954 {
955 	uint64_t status;
956 	uint32_t chanerr;
957 	int timeout;
958 
959 	status = ioat_get_chansts(ioat);
960 	if (is_ioat_active(status) || is_ioat_idle(status))
961 		ioat_suspend(ioat);
962 
963 	/* Wait at most 20 ms */
964 	for (timeout = 0; (is_ioat_active(status) || is_ioat_idle(status)) &&
965 	    timeout < 20; timeout++) {
966 		DELAY(1000);
967 		status = ioat_get_chansts(ioat);
968 	}
969 	if (timeout == 20)
970 		return (ETIMEDOUT);
971 
972 	chanerr = ioat_read_4(ioat, IOAT_CHANERR_OFFSET);
973 	ioat_write_4(ioat, IOAT_CHANERR_OFFSET, chanerr);
974 
975 	/*
976 	 * IOAT v3 workaround - CHANERRMSK_INT with 3E07h to masks out errors
977 	 *  that can cause stability issues for IOAT v3.
978 	 */
979 	pci_write_config(ioat->device, IOAT_CFG_CHANERRMASK_INT_OFFSET, 0x3e07,
980 	    4);
981 	chanerr = pci_read_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, 4);
982 	pci_write_config(ioat->device, IOAT_CFG_CHANERR_INT_OFFSET, chanerr, 4);
983 
984 	/*
985 	 * BDXDE and BWD models reset MSI-X registers on device reset.
986 	 * Save/restore their contents manually.
987 	 */
988 	if (ioat_model_resets_msix(ioat))
989 		pci_save_state(ioat->device);
990 
991 	ioat_reset(ioat);
992 
993 	/* Wait at most 20 ms */
994 	for (timeout = 0; ioat_reset_pending(ioat) && timeout < 20; timeout++)
995 		DELAY(1000);
996 	if (timeout == 20)
997 		return (ETIMEDOUT);
998 
999 	if (ioat_model_resets_msix(ioat))
1000 		pci_restore_state(ioat->device);
1001 
1002 	return (0);
1003 }
1004 
1005 static void
1006 dump_descriptor(void *hw_desc)
1007 {
1008 	int i, j;
1009 
1010 	for (i = 0; i < 2; i++) {
1011 		for (j = 0; j < 8; j++)
1012 			printf("%08x ", ((uint32_t *)hw_desc)[i * 8 + j]);
1013 		printf("\n");
1014 	}
1015 }
1016 
1017 static void
1018 ioat_setup_sysctl(device_t device)
1019 {
1020 	struct sysctl_ctx_list *sysctl_ctx;
1021 	struct sysctl_oid *sysctl_tree;
1022 	struct ioat_softc *ioat;
1023 
1024 	ioat = DEVICE2SOFTC(device);
1025 	sysctl_ctx = device_get_sysctl_ctx(device);
1026 	sysctl_tree = device_get_sysctl_tree(device);
1027 
1028 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
1029 	    "ring_size_order", CTLFLAG_RD, &ioat->ring_size_order,
1030 	    0, "HW descriptor ring size order");
1031 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
1032 	    "head", CTLFLAG_RD, &ioat->head,
1033 	    0, "HW descriptor head pointer index");
1034 	SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
1035 	    "tail", CTLFLAG_RD, &ioat->tail,
1036 	    0, "HW descriptor tail pointer index");
1037 }
1038 
1039 void
1040 ioat_log_message(int verbosity, char *fmt, ...)
1041 {
1042 	va_list argp;
1043 	char buffer[512];
1044 	struct timeval tv;
1045 
1046 	if (verbosity > g_ioat_debug_level)
1047 		return;
1048 
1049 	va_start(argp, fmt);
1050 	vsnprintf(buffer, sizeof(buffer) - 1, fmt, argp);
1051 	va_end(argp);
1052 	microuptime(&tv);
1053 
1054 	printf("[%d:%06d] ioat: %s", (int)tv.tv_sec, (int)tv.tv_usec, buffer);
1055 }
1056