xref: /freebsd/share/man/man4/ioat.4 (revision 61e21613)
1.\" Copyright (c) 2015 EMC / Isilon Storage Division
2.\" All rights reserved.
3.\"
4.\" Redistribution and use in source and binary forms, with or without
5.\" modification, are permitted provided that the following conditions
6.\" are met:
7.\" 1. Redistributions of source code must retain the above copyright
8.\"    notice, this list of conditions and the following disclaimer.
9.\" 2. Redistributions in binary form must reproduce the above copyright
10.\"    notice, this list of conditions and the following disclaimer in the
11.\"    documentation and/or other materials provided with the distribution.
12.\"
13.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
14.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
17.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23.\" SUCH DAMAGE.
24.\"
25.Dd May 3, 2016
26.Dt IOAT 4
27.Os
28.Sh NAME
29.Nm I/OAT
30.Nd Intel I/O Acceleration Technology
31.Sh SYNOPSIS
32To compile this driver into your kernel,
33place the following line in your kernel configuration file:
34.Bd -ragged -offset indent
35.Cd "device ioat"
36.Ed
37.Pp
38Or, to load the driver as a module at boot, place the following line in
39.Xr loader.conf 5 :
40.Bd -literal -offset indent
41ioat_load="YES"
42.Ed
43.Pp
44In
45.Xr loader.conf 5 :
46.Pp
47.Cd hw.ioat.force_legacy_interrupts=0
48.Pp
49In
50.Xr loader.conf 5 or
51.Xr sysctl.conf 5 :
52.Pp
53.Cd hw.ioat.enable_ioat_test=0
54.Cd hw.ioat.debug_level=0
55(only critical errors; maximum of 3)
56.Pp
57.Ft typedef void
58.Fn (*bus_dmaengine_callback_t) "void *arg" "int error"
59.Pp
60.Ft bus_dmaengine_t
61.Fn ioat_get_dmaengine "uint32_t channel_index"
62.Ft void
63.Fn ioat_put_dmaengine "bus_dmaengine_t dmaengine"
64.Ft int
65.Fn ioat_get_hwversion "bus_dmaengine_t dmaengine"
66.Ft size_t
67.Fn ioat_get_max_io_size "bus_dmaengine_t dmaengine"
68.Ft int
69.Fn ioat_set_interrupt_coalesce "bus_dmaengine_t dmaengine" "uint16_t delay"
70.Ft uint16_t
71.Fn ioat_get_max_coalesce_period "bus_dmaengine_t dmaengine"
72.Ft void
73.Fn ioat_acquire "bus_dmaengine_t dmaengine"
74.Ft int
75.Fn ioat_acquire_reserve "bus_dmaengine_t dmaengine" "uint32_t n" "int mflags"
76.Ft void
77.Fn ioat_release "bus_dmaengine_t dmaengine"
78.Ft struct bus_dmadesc *
79.Fo ioat_copy
80.Fa "bus_dmaengine_t dmaengine"
81.Fa "bus_addr_t dst"
82.Fa "bus_addr_t src"
83.Fa "bus_size_t len"
84.Fa "bus_dmaengine_callback_t callback_fn"
85.Fa "void *callback_arg"
86.Fa "uint32_t flags"
87.Fc
88.Ft struct bus_dmadesc *
89.Fo ioat_copy_8k_aligned
90.Fa "bus_dmaengine_t dmaengine"
91.Fa "bus_addr_t dst1"
92.Fa "bus_addr_t dst2"
93.Fa "bus_addr_t src1"
94.Fa "bus_addr_t src2"
95.Fa "bus_dmaengine_callback_t callback_fn"
96.Fa "void *callback_arg"
97.Fa "uint32_t flags"
98.Fc
99.Ft struct bus_dmadesc *
100.Fo ioat_copy_crc
101.Fa "bus_dmaengine_t dmaengine"
102.Fa "bus_addr_t dst"
103.Fa "bus_addr_t src"
104.Fa "bus_size_t len"
105.Fa "uint32_t *initialseed"
106.Fa "bus_addr_t crcptr"
107.Fa "bus_dmaengine_callback_t callback_fn"
108.Fa "void *callback_arg"
109.Fa "uint32_t flags"
110.Fc
111.Ft struct bus_dmadesc *
112.Fo ioat_crc
113.Fa "bus_dmaengine_t dmaengine"
114.Fa "bus_addr_t src"
115.Fa "bus_size_t len"
116.Fa "uint32_t *initialseed"
117.Fa "bus_addr_t crcptr"
118.Fa "bus_dmaengine_callback_t callback_fn"
119.Fa "void *callback_arg"
120.Fa "uint32_t flags"
121.Fc
122.Ft struct bus_dmadesc *
123.Fo ioat_blockfill
124.Fa "bus_dmaengine_t dmaengine"
125.Fa "bus_addr_t dst"
126.Fa "uint64_t fillpattern"
127.Fa "bus_size_t len"
128.Fa "bus_dmaengine_callback_t callback_fn"
129.Fa "void *callback_arg"
130.Fa "uint32_t flags"
131.Fc
132.Ft struct bus_dmadesc *
133.Fo ioat_null
134.Fa "bus_dmaengine_t dmaengine"
135.Fa "bus_dmaengine_callback_t callback_fn"
136.Fa "void *callback_arg"
137.Fa "uint32_t flags"
138.Fc
139.Sh DESCRIPTION
140The
141.Nm
142driver provides a kernel API to a variety of DMA engines on some Intel server
143platforms.
144.Pp
145There is a number of DMA channels per CPU package.
146(Typically 4 or 8.)
147Each may be used independently.
148Operations on a single channel proceed sequentially.
149.Pp
150Blockfill operations can be used to write a 64-bit pattern to memory.
151.Pp
152Copy operations can be used to offload memory copies to the DMA engines.
153.Pp
154Null operations do nothing, but may be used to test the interrupt and callback
155mechanism.
156.Pp
157All operations can optionally trigger an interrupt at completion with the
158.Ar DMA_INT_EN
159flag.
160For example, a user might submit multiple operations to the same channel and
161only enable an interrupt and callback for the last operation.
162.Pp
163The hardware can delay and coalesce interrupts on a given channel for a
164configurable period of time, in microseconds.
165This may be desired to reduce the processing and interrupt overhead per
166descriptor, especially for workflows consisting of many small operations.
167Software can control this on a per-channel basis with the
168.Fn ioat_set_interrupt_coalesce
169API.
170The
171.Fn ioat_get_max_coalesce_period
172API can be used to determine the maximum coalescing period supported by the
173hardware, in microseconds.
174Current platforms support up to a 16.383 millisecond coalescing period.
175Optimal configuration will vary by workflow and desired operation latency.
176.Pp
177All operations are safe to use in a non-blocking context with the
178.Ar DMA_NO_WAIT
179flag.
180(Of course, allocations may fail and operations requested with
181.Ar DMA_NO_WAIT
182may return NULL.)
183.Pp
184Operations that depend on the result of prior operations should use
185.Ar DMA_FENCE .
186For example, such a scenario can happen when two related DMA operations are
187queued.
188First, a DMA copy to one location (A), followed directly by a DMA copy
189from A to B.
190In this scenario, some classes of I/OAT hardware may prefetch A for the second
191operation before it is written by the first operation.
192To avoid reading a stale value in sequences of dependent operations, use
193.Ar DMA_FENCE .
194.Pp
195All operations, as well as
196.Fn ioat_get_dmaengine ,
197can return NULL in special circumstances.
198For example, if the
199.Nm
200driver is being unloaded, or the administrator has induced a hardware reset, or
201a usage error has resulted in a hardware error state that needs to be recovered
202from.
203.Pp
204It is invalid to attempt to submit new DMA operations in a
205.Fa bus_dmaengine_callback_t
206context.
207.Pp
208The CRC operations have three distinct modes.
209The default mode is to accumulate.
210By accumulating over multiple descriptors, a user may gather a CRC over several
211chunks of memory and only write out the result once.
212.Pp
213The
214.Ar DMA_CRC_STORE
215flag causes the operation to emit the CRC32C result.
216If
217.Ar DMA_CRC_INLINE
218is set, the result is written inline with the destination data (or source in
219.Fn ioat_crc
220mode).
221If
222.Ar DMA_CRC_INLINE
223is not set, the result is written to the provided
224.Fa crcptr .
225.Pp
226Similarly, the
227.Ar DMA_CRC_TEST
228flag causes the operation to compare the CRC32C result to an existing checksum.
229If
230.Ar DMA_CRC_INLINE
231is set, the result is compared against the inline four bytes trailing the
232source data.
233If it is not set, the result is compared against the value pointed to by
234.Fa crcptr .
235.Pp
236.Fn ioat_copy_crc
237calculates a CRC32C while copying data.
238.Fn ioat_crc
239only computes a CRC32C of some data.
240If the
241.Fa initialseed
242argument to either routine is non-NULL, the CRC32C engine is initialized with
243the value it points to.
244.Sh USAGE
245A typical user will lookup the DMA engine object for a given channel with
246.Fn ioat_get_dmaengine .
247When the user wants to offload a copy, they will first
248.Fn ioat_acquire
249the
250.Ar bus_dmaengine_t
251object for exclusive access to enqueue operations on that channel.
252Optionally, the user can reserve space by using
253.Fn ioat_acquire_reserve
254instead.
255If
256.Fn ioat_acquire_reserve
257succeeds, there is guaranteed to be room for
258.Fa N
259new operations in the internal ring buffer.
260.Pp
261Then, they will submit one or more operations using
262.Fn ioat_blockfill ,
263.Fn ioat_copy ,
264.Fn ioat_copy_8k_aligned ,
265.Fn ioat_copy_crc ,
266.Fn ioat_crc ,
267or
268.Fn ioat_null .
269After queuing one or more individual DMA operations, they will
270.Fn ioat_release
271the
272.Ar bus_dmaengine_t
273to drop their exclusive access to the channel.
274The routine they provided for the
275.Fa callback_fn
276argument will be invoked with the provided
277.Fa callback_arg
278when the operation is complete.
279When they are finished with the
280.Ar bus_dmaengine_t ,
281the user should
282.Fn ioat_put_dmaengine .
283.Pp
284Users MUST NOT block between
285.Fn ioat_acquire
286and
287.Fn ioat_release .
288Users SHOULD NOT hold
289.Ar bus_dmaengine_t
290references for a very long time to enable fault recovery and kernel module
291unload.
292.Pp
293For an example of usage, see
294.Pa src/sys/dev/ioat/ioat_test.c .
295.Sh FILES
296.Bl -tag
297.It Pa /dev/ioat_test
298test device for
299.Xr ioatcontrol 8
300.El
301.Sh SEE ALSO
302.Xr ioatcontrol 8
303.Sh HISTORY
304The
305.Nm
306driver first appeared in
307.Fx 11.0 .
308.Sh AUTHORS
309The
310.Nm
311driver was developed by
312.An \&Jim Harris Aq Mt jimharris@FreeBSD.org ,
313.An \&Carl Delsey Aq Mt carl.r.delsey@intel.com ,
314and
315.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
316This manual page was written by
317.An \&Conrad Meyer Aq Mt cem@FreeBSD.org .
318.Sh CAVEATS
319Copy operation takes bus addresses as parameters, not virtual addresses.
320.Pp
321Buffers for individual copy operations must be physically contiguous.
322.Pp
323Copies larger than max transfer size (1MB, but may vary by hardware) are not
324supported.
325Future versions will likely support this by breaking up the transfer into
326smaller sizes.
327.Sh BUGS
328The
329.Nm
330driver only supports blockfill, copy, and null operations at this time.
331The driver does not yet support advanced DMA modes, such as XOR, that some
332I/OAT devices support.
333