1 /*-
2 * Copyright (C) 2012 Intel Corporation
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/bus.h>
30 #include <sys/conf.h>
31 #include <sys/ioccom.h>
32 #include <sys/kernel.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/mutex.h>
37 #include <sys/rman.h>
38 #include <sys/sysctl.h>
39 #include <dev/pci/pcireg.h>
40 #include <dev/pci/pcivar.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <machine/stdarg.h>
44 #include <vm/vm.h>
45 #include <vm/vm_param.h>
46 #include <vm/pmap.h>
47
48 #include "ioat.h"
49 #include "ioat_hw.h"
50 #include "ioat_internal.h"
51 #include "ioat_test.h"
52
53 #ifndef time_after
54 #define time_after(a,b) ((long)(b) - (long)(a) < 0)
55 #endif
56
57 MALLOC_DEFINE(M_IOAT_TEST, "ioat_test", "ioat test allocations");
58
59 #define IOAT_MAX_BUFS 256
60
61 struct test_transaction {
62 void *buf[IOAT_MAX_BUFS];
63 uint32_t length;
64 uint32_t depth;
65 uint32_t crc[IOAT_MAX_BUFS];
66 struct ioat_test *test;
67 TAILQ_ENTRY(test_transaction) entry;
68 };
69
70 #define IT_LOCK() mtx_lock(&ioat_test_lk)
71 #define IT_UNLOCK() mtx_unlock(&ioat_test_lk)
72 #define IT_ASSERT() mtx_assert(&ioat_test_lk, MA_OWNED)
73 static struct mtx ioat_test_lk;
74 MTX_SYSINIT(ioat_test_lk, &ioat_test_lk, "test coordination mtx", MTX_DEF);
75
76 static int g_thread_index = 1;
77 static struct cdev *g_ioat_cdev = NULL;
78
79 #define ioat_test_log(v, ...) _ioat_test_log((v), "ioat_test: " __VA_ARGS__)
80 static void _ioat_test_log(int verbosity, const char *fmt, ...);
81
82 static void
ioat_test_transaction_destroy(struct test_transaction * tx)83 ioat_test_transaction_destroy(struct test_transaction *tx)
84 {
85 struct ioat_test *test;
86 int i;
87
88 test = tx->test;
89
90 for (i = 0; i < IOAT_MAX_BUFS; i++) {
91 if (tx->buf[i] != NULL) {
92 if (test->testkind == IOAT_TEST_DMA_8K)
93 free(tx->buf[i], M_IOAT_TEST);
94 else
95 contigfree(tx->buf[i], tx->length, M_IOAT_TEST);
96 tx->buf[i] = NULL;
97 }
98 }
99
100 free(tx, M_IOAT_TEST);
101 }
102
103 static struct
ioat_test_transaction_create(struct ioat_test * test,unsigned num_buffers)104 test_transaction *ioat_test_transaction_create(struct ioat_test *test,
105 unsigned num_buffers)
106 {
107 struct test_transaction *tx;
108 unsigned i;
109
110 tx = malloc(sizeof(*tx), M_IOAT_TEST, M_NOWAIT | M_ZERO);
111 if (tx == NULL)
112 return (NULL);
113
114 tx->length = test->buffer_size;
115
116 for (i = 0; i < num_buffers; i++) {
117 if (test->testkind == IOAT_TEST_DMA_8K)
118 tx->buf[i] = malloc(test->buffer_size, M_IOAT_TEST,
119 M_NOWAIT);
120 else
121 tx->buf[i] = contigmalloc(test->buffer_size,
122 M_IOAT_TEST, M_NOWAIT, 0, BUS_SPACE_MAXADDR,
123 PAGE_SIZE, 0);
124
125 if (tx->buf[i] == NULL) {
126 ioat_test_transaction_destroy(tx);
127 return (NULL);
128 }
129 }
130 return (tx);
131 }
132
133 static void
dump_hex(void * p,size_t chunks)134 dump_hex(void *p, size_t chunks)
135 {
136 size_t i, j;
137
138 for (i = 0; i < chunks; i++) {
139 for (j = 0; j < 8; j++)
140 printf("%08x ", ((uint32_t *)p)[i * 8 + j]);
141 printf("\n");
142 }
143 }
144
145 static bool
ioat_compare_ok(struct test_transaction * tx)146 ioat_compare_ok(struct test_transaction *tx)
147 {
148 struct ioat_test *test;
149 char *dst, *src;
150 uint32_t i, j;
151
152 test = tx->test;
153
154 for (i = 0; i < tx->depth; i++) {
155 dst = tx->buf[2 * i + 1];
156 src = tx->buf[2 * i];
157
158 if (test->testkind == IOAT_TEST_FILL) {
159 for (j = 0; j < tx->length; j += sizeof(uint64_t)) {
160 if (memcmp(src, &dst[j],
161 MIN(sizeof(uint64_t), tx->length - j))
162 != 0)
163 return (false);
164 }
165 } else if (test->testkind == IOAT_TEST_DMA) {
166 if (memcmp(src, dst, tx->length) != 0)
167 return (false);
168 } else if (test->testkind == IOAT_TEST_RAW_DMA) {
169 if (test->raw_write)
170 dst = test->raw_vtarget;
171 dump_hex(dst, tx->length / 32);
172 }
173 }
174 return (true);
175 }
176
177 static void
ioat_dma_test_callback(void * arg,int error)178 ioat_dma_test_callback(void *arg, int error)
179 {
180 struct test_transaction *tx;
181 struct ioat_test *test;
182
183 if (error != 0)
184 ioat_test_log(0, "%s: Got error: %d\n", __func__, error);
185
186 tx = arg;
187 test = tx->test;
188
189 if (test->verify && !ioat_compare_ok(tx)) {
190 ioat_test_log(0, "miscompare found\n");
191 atomic_add_32(&test->status[IOAT_TEST_MISCOMPARE], tx->depth);
192 } else if (!test->too_late)
193 atomic_add_32(&test->status[IOAT_TEST_OK], tx->depth);
194
195 IT_LOCK();
196 TAILQ_REMOVE(&test->pend_q, tx, entry);
197 TAILQ_INSERT_TAIL(&test->free_q, tx, entry);
198 wakeup(&test->free_q);
199 IT_UNLOCK();
200 }
201
202 static int
ioat_test_prealloc_memory(struct ioat_test * test,int index)203 ioat_test_prealloc_memory(struct ioat_test *test, int index)
204 {
205 uint32_t i, j, k;
206 struct test_transaction *tx;
207
208 for (i = 0; i < test->transactions; i++) {
209 tx = ioat_test_transaction_create(test, test->chain_depth * 2);
210 if (tx == NULL) {
211 ioat_test_log(0, "tx == NULL - memory exhausted\n");
212 test->status[IOAT_TEST_NO_MEMORY]++;
213 return (ENOMEM);
214 }
215
216 TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
217
218 tx->test = test;
219 tx->depth = test->chain_depth;
220
221 /* fill in source buffers */
222 for (j = 0; j < (tx->length / sizeof(uint32_t)); j++) {
223 uint32_t val = j + (index << 28);
224
225 for (k = 0; k < test->chain_depth; k++) {
226 ((uint32_t *)tx->buf[2*k])[j] = ~val;
227 ((uint32_t *)tx->buf[2*k+1])[j] = val;
228 }
229 }
230 }
231 return (0);
232 }
233
234 static void
ioat_test_release_memory(struct ioat_test * test)235 ioat_test_release_memory(struct ioat_test *test)
236 {
237 struct test_transaction *tx, *s;
238
239 TAILQ_FOREACH_SAFE(tx, &test->free_q, entry, s)
240 ioat_test_transaction_destroy(tx);
241 TAILQ_INIT(&test->free_q);
242
243 TAILQ_FOREACH_SAFE(tx, &test->pend_q, entry, s)
244 ioat_test_transaction_destroy(tx);
245 TAILQ_INIT(&test->pend_q);
246 }
247
248 static void
ioat_test_submit_1_tx(struct ioat_test * test,bus_dmaengine_t dma)249 ioat_test_submit_1_tx(struct ioat_test *test, bus_dmaengine_t dma)
250 {
251 struct test_transaction *tx;
252 struct bus_dmadesc *desc;
253 bus_dmaengine_callback_t cb;
254 bus_addr_t src, dest;
255 uint64_t fillpattern;
256 uint32_t i, flags;
257
258 desc = NULL;
259
260 IT_LOCK();
261 while (TAILQ_EMPTY(&test->free_q))
262 msleep(&test->free_q, &ioat_test_lk, 0, "test_submit", 0);
263
264 tx = TAILQ_FIRST(&test->free_q);
265 TAILQ_REMOVE(&test->free_q, tx, entry);
266 TAILQ_INSERT_HEAD(&test->pend_q, tx, entry);
267 IT_UNLOCK();
268
269 if (test->testkind != IOAT_TEST_MEMCPY)
270 ioat_acquire(dma);
271 for (i = 0; i < tx->depth; i++) {
272 if (test->testkind == IOAT_TEST_MEMCPY) {
273 memcpy(tx->buf[2 * i + 1], tx->buf[2 * i], tx->length);
274 if (i == tx->depth - 1)
275 ioat_dma_test_callback(tx, 0);
276 continue;
277 }
278
279 src = vtophys((vm_offset_t)tx->buf[2*i]);
280 dest = vtophys((vm_offset_t)tx->buf[2*i+1]);
281
282 if (test->testkind == IOAT_TEST_RAW_DMA) {
283 if (test->raw_write)
284 dest = test->raw_target;
285 else
286 src = test->raw_target;
287 }
288
289 if (i == tx->depth - 1) {
290 cb = ioat_dma_test_callback;
291 flags = DMA_INT_EN;
292 } else {
293 cb = NULL;
294 flags = 0;
295 }
296
297 if (test->testkind == IOAT_TEST_DMA ||
298 test->testkind == IOAT_TEST_RAW_DMA)
299 desc = ioat_copy(dma, dest, src, tx->length, cb, tx,
300 flags);
301 else if (test->testkind == IOAT_TEST_FILL) {
302 fillpattern = *(uint64_t *)tx->buf[2*i];
303 desc = ioat_blockfill(dma, dest, fillpattern,
304 tx->length, cb, tx, flags);
305 } else if (test->testkind == IOAT_TEST_DMA_8K) {
306 bus_addr_t src2, dst2;
307
308 src2 = vtophys((vm_offset_t)tx->buf[2*i] + PAGE_SIZE);
309 dst2 = vtophys((vm_offset_t)tx->buf[2*i+1] + PAGE_SIZE);
310
311 desc = ioat_copy_8k_aligned(dma, dest, dst2, src, src2,
312 cb, tx, flags);
313 } else if (test->testkind == IOAT_TEST_DMA_8K_PB) {
314 bus_addr_t src2, dst2;
315
316 src2 = vtophys((vm_offset_t)tx->buf[2*i+1] + PAGE_SIZE);
317 dst2 = vtophys((vm_offset_t)tx->buf[2*i] + PAGE_SIZE);
318
319 desc = ioat_copy_8k_aligned(dma, dest, dst2, src, src2,
320 cb, tx, flags);
321 } else if (test->testkind == IOAT_TEST_DMA_CRC) {
322 bus_addr_t crc;
323
324 tx->crc[i] = 0;
325 crc = vtophys((vm_offset_t)&tx->crc[i]);
326 desc = ioat_crc(dma, src, tx->length,
327 NULL, crc, cb, tx, flags | DMA_CRC_STORE);
328 } else if (test->testkind == IOAT_TEST_DMA_CRC_COPY) {
329 bus_addr_t crc;
330
331 tx->crc[i] = 0;
332 crc = vtophys((vm_offset_t)&tx->crc[i]);
333 desc = ioat_copy_crc(dma, dest, src, tx->length,
334 NULL, crc, cb, tx, flags | DMA_CRC_STORE);
335 }
336 if (desc == NULL)
337 break;
338 }
339 if (test->testkind == IOAT_TEST_MEMCPY)
340 return;
341 ioat_release(dma);
342
343 /*
344 * We couldn't issue an IO -- either the device is being detached or
345 * the HW reset. Essentially spin until the device comes back up or
346 * our timer expires.
347 */
348 if (desc == NULL && tx->depth > 0) {
349 atomic_add_32(&test->status[IOAT_TEST_NO_DMA_ENGINE], tx->depth);
350 IT_LOCK();
351 TAILQ_REMOVE(&test->pend_q, tx, entry);
352 TAILQ_INSERT_HEAD(&test->free_q, tx, entry);
353 IT_UNLOCK();
354 }
355 }
356
357 static void
ioat_dma_test(void * arg)358 ioat_dma_test(void *arg)
359 {
360 struct ioat_softc *ioat;
361 struct ioat_test *test;
362 bus_dmaengine_t dmaengine;
363 uint32_t loops;
364 int index, rc, start, end, error;
365
366 test = arg;
367 memset(__DEVOLATILE(void *, test->status), 0, sizeof(test->status));
368
369 if ((test->testkind == IOAT_TEST_DMA_8K ||
370 test->testkind == IOAT_TEST_DMA_8K_PB) &&
371 test->buffer_size != 2 * PAGE_SIZE) {
372 ioat_test_log(0, "Asked for 8k test and buffer size isn't 8k\n");
373 test->status[IOAT_TEST_INVALID_INPUT]++;
374 return;
375 }
376
377 if (test->buffer_size > 1024 * 1024) {
378 ioat_test_log(0, "Buffer size too large >1MB\n");
379 test->status[IOAT_TEST_NO_MEMORY]++;
380 return;
381 }
382
383 if (test->chain_depth * 2 > IOAT_MAX_BUFS) {
384 ioat_test_log(0, "Depth too large (> %u)\n",
385 (unsigned)IOAT_MAX_BUFS / 2);
386 test->status[IOAT_TEST_NO_MEMORY]++;
387 return;
388 }
389
390 if (btoc((uint64_t)test->buffer_size * test->chain_depth *
391 test->transactions) > (physmem / 4)) {
392 ioat_test_log(0, "Sanity check failed -- test would "
393 "use more than 1/4 of phys mem.\n");
394 test->status[IOAT_TEST_NO_MEMORY]++;
395 return;
396 }
397
398 if ((uint64_t)test->transactions * test->chain_depth > (1<<16)) {
399 ioat_test_log(0, "Sanity check failed -- test would "
400 "use more than available IOAT ring space.\n");
401 test->status[IOAT_TEST_NO_MEMORY]++;
402 return;
403 }
404
405 if (test->testkind >= IOAT_NUM_TESTKINDS) {
406 ioat_test_log(0, "Invalid kind %u\n",
407 (unsigned)test->testkind);
408 test->status[IOAT_TEST_INVALID_INPUT]++;
409 return;
410 }
411
412 dmaengine = ioat_get_dmaengine(test->channel_index, M_NOWAIT);
413 if (dmaengine == NULL) {
414 ioat_test_log(0, "Couldn't acquire dmaengine\n");
415 test->status[IOAT_TEST_NO_DMA_ENGINE]++;
416 return;
417 }
418 ioat = to_ioat_softc(dmaengine);
419
420 if (test->testkind == IOAT_TEST_FILL &&
421 (ioat->capabilities & IOAT_DMACAP_BFILL) == 0)
422 {
423 ioat_test_log(0,
424 "Hardware doesn't support block fill, aborting test\n");
425 test->status[IOAT_TEST_INVALID_INPUT]++;
426 goto out;
427 }
428
429 if (test->coalesce_period > ioat->intrdelay_max) {
430 ioat_test_log(0,
431 "Hardware doesn't support intrdelay of %u us.\n",
432 (unsigned)test->coalesce_period);
433 test->status[IOAT_TEST_INVALID_INPUT]++;
434 goto out;
435 }
436 error = ioat_set_interrupt_coalesce(dmaengine, test->coalesce_period);
437 if (error == ENODEV && test->coalesce_period == 0)
438 error = 0;
439 if (error != 0) {
440 ioat_test_log(0, "ioat_set_interrupt_coalesce: %d\n", error);
441 test->status[IOAT_TEST_INVALID_INPUT]++;
442 goto out;
443 }
444
445 if (test->zero_stats)
446 memset(&ioat->stats, 0, sizeof(ioat->stats));
447
448 if (test->testkind == IOAT_TEST_RAW_DMA) {
449 if (test->raw_is_virtual) {
450 test->raw_vtarget = (void *)test->raw_target;
451 test->raw_target = vtophys(test->raw_vtarget);
452 } else {
453 test->raw_vtarget = pmap_mapdev(test->raw_target,
454 test->buffer_size);
455 }
456 }
457
458 index = g_thread_index++;
459 TAILQ_INIT(&test->free_q);
460 TAILQ_INIT(&test->pend_q);
461
462 if (test->duration == 0)
463 ioat_test_log(1, "Thread %d: num_loops remaining: 0x%08x\n",
464 index, test->transactions);
465 else
466 ioat_test_log(1, "Thread %d: starting\n", index);
467
468 rc = ioat_test_prealloc_memory(test, index);
469 if (rc != 0) {
470 ioat_test_log(0, "prealloc_memory: %d\n", rc);
471 goto out;
472 }
473 wmb();
474
475 test->too_late = false;
476 start = ticks;
477 end = start + (((sbintime_t)test->duration * hz) / 1000);
478
479 for (loops = 0;; loops++) {
480 if (test->duration == 0 && loops >= test->transactions)
481 break;
482 else if (test->duration != 0 && time_after(ticks, end)) {
483 test->too_late = true;
484 break;
485 }
486
487 ioat_test_submit_1_tx(test, dmaengine);
488 }
489
490 ioat_test_log(1, "Test Elapsed: %d ticks (overrun %d), %d sec.\n",
491 ticks - start, ticks - end, (ticks - start) / hz);
492
493 IT_LOCK();
494 while (!TAILQ_EMPTY(&test->pend_q))
495 msleep(&test->free_q, &ioat_test_lk, 0, "ioattestcompl", hz);
496 IT_UNLOCK();
497
498 ioat_test_log(1, "Test Elapsed2: %d ticks (overrun %d), %d sec.\n",
499 ticks - start, ticks - end, (ticks - start) / hz);
500
501 ioat_test_release_memory(test);
502 out:
503 if (test->testkind == IOAT_TEST_RAW_DMA && !test->raw_is_virtual)
504 pmap_unmapdev(test->raw_vtarget, test->buffer_size);
505 ioat_put_dmaengine(dmaengine);
506 }
507
508 static int
ioat_test_open(struct cdev * dev,int flags,int fmt,struct thread * td)509 ioat_test_open(struct cdev *dev, int flags, int fmt, struct thread *td)
510 {
511
512 return (0);
513 }
514
515 static int
ioat_test_close(struct cdev * dev,int flags,int fmt,struct thread * td)516 ioat_test_close(struct cdev *dev, int flags, int fmt, struct thread *td)
517 {
518
519 return (0);
520 }
521
522 static int
ioat_test_ioctl(struct cdev * dev,unsigned long cmd,caddr_t arg,int flag,struct thread * td)523 ioat_test_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, int flag,
524 struct thread *td)
525 {
526
527 switch (cmd) {
528 case IOAT_DMATEST:
529 ioat_dma_test(arg);
530 break;
531 default:
532 return (EINVAL);
533 }
534 return (0);
535 }
536
537 static struct cdevsw ioat_cdevsw = {
538 .d_version = D_VERSION,
539 .d_flags = 0,
540 .d_open = ioat_test_open,
541 .d_close = ioat_test_close,
542 .d_ioctl = ioat_test_ioctl,
543 .d_name = "ioat_test",
544 };
545
546 static int
enable_ioat_test(bool enable)547 enable_ioat_test(bool enable)
548 {
549 struct make_dev_args devargs;
550 int error = 0;
551
552 if (enable && g_ioat_cdev == NULL) {
553 make_dev_args_init(&devargs);
554 devargs.mda_devsw = &ioat_cdevsw;
555 devargs.mda_uid = UID_ROOT;
556 devargs.mda_gid = GID_WHEEL;
557 devargs.mda_mode = 0600;
558 error = make_dev_s(&devargs, &g_ioat_cdev, "ioat_test");
559 } else if (!enable && g_ioat_cdev != NULL) {
560 destroy_dev(g_ioat_cdev);
561 g_ioat_cdev = NULL;
562 }
563 return (error);
564 }
565
566 static int
sysctl_enable_ioat_test(SYSCTL_HANDLER_ARGS)567 sysctl_enable_ioat_test(SYSCTL_HANDLER_ARGS)
568 {
569 int error, enabled;
570
571 enabled = (g_ioat_cdev != NULL);
572 error = sysctl_handle_int(oidp, &enabled, 0, req);
573 if (error != 0 || req->newptr == NULL)
574 return (error);
575
576 return (enable_ioat_test(enabled));
577 }
578 SYSCTL_PROC(_hw_ioat, OID_AUTO, enable_ioat_test,
579 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
580 sysctl_enable_ioat_test, "I",
581 "Non-zero: Enable the /dev/ioat_test device");
582
583 void
ioat_test_attach(void)584 ioat_test_attach(void)
585 {
586 char *val;
587
588 val = kern_getenv("hw.ioat.enable_ioat_test");
589 if (val != NULL && strcmp(val, "0") != 0)
590 enable_ioat_test(true);
591 freeenv(val);
592 }
593
594 void
ioat_test_detach(void)595 ioat_test_detach(void)
596 {
597
598 enable_ioat_test(false);
599 }
600
601 static void
_ioat_test_log(int verbosity,const char * fmt,...)602 _ioat_test_log(int verbosity, const char *fmt, ...)
603 {
604 va_list argp;
605
606 if (verbosity > g_ioat_debug_level)
607 return;
608
609 va_start(argp, fmt);
610 vprintf(fmt, argp);
611 va_end(argp);
612 }
613