1 #ifndef _IPXE_TCP_H
2 #define _IPXE_TCP_H
3
4 /** @file
5 *
6 * TCP protocol
7 *
8 * This file defines the iPXE TCP API.
9 *
10 */
11
12 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
13
14 #include <ipxe/tcpip.h>
15
16 /**
17 * A TCP header
18 */
19 struct tcp_header {
20 uint16_t src; /* Source port */
21 uint16_t dest; /* Destination port */
22 uint32_t seq; /* Sequence number */
23 uint32_t ack; /* Acknowledgement number */
24 uint8_t hlen; /* Header length (4), Reserved (4) */
25 uint8_t flags; /* Reserved (2), Flags (6) */
26 uint16_t win; /* Advertised window */
27 uint16_t csum; /* Checksum */
28 uint16_t urg; /* Urgent pointer */
29 };
30
31 /** @defgroup tcpopts TCP options
32 * @{
33 */
34
35 /** End of TCP options list */
36 #define TCP_OPTION_END 0
37
38 /** TCP option pad */
39 #define TCP_OPTION_NOP 1
40
41 /** Generic TCP option */
42 struct tcp_option {
43 uint8_t kind;
44 uint8_t length;
45 } __attribute__ (( packed ));
46
47 /** TCP MSS option */
48 struct tcp_mss_option {
49 uint8_t kind;
50 uint8_t length;
51 uint16_t mss;
52 } __attribute__ (( packed ));
53
54 /** Code for the TCP MSS option */
55 #define TCP_OPTION_MSS 2
56
57 /** TCP window scale option */
58 struct tcp_window_scale_option {
59 uint8_t kind;
60 uint8_t length;
61 uint8_t scale;
62 } __attribute__ (( packed ));
63
64 /** Padded TCP window scale option (used for sending) */
65 struct tcp_window_scale_padded_option {
66 uint8_t nop;
67 struct tcp_window_scale_option wsopt;
68 } __attribute (( packed ));
69
70 /** Code for the TCP window scale option */
71 #define TCP_OPTION_WS 3
72
73 /** Advertised TCP window scale
74 *
75 * Using a scale factor of 2**9 provides for a maximum window of 32MB,
76 * which is sufficient to allow Gigabit-speed transfers with a 200ms
77 * RTT. The minimum advertised window is 512 bytes, which is still
78 * less than a single packet.
79 */
80 #define TCP_RX_WINDOW_SCALE 9
81
82 /** TCP selective acknowledgement permitted option */
83 struct tcp_sack_permitted_option {
84 uint8_t kind;
85 uint8_t length;
86 } __attribute__ (( packed ));
87
88 /** Padded TCP selective acknowledgement permitted option (used for sending) */
89 struct tcp_sack_permitted_padded_option {
90 uint8_t nop[2];
91 struct tcp_sack_permitted_option spopt;
92 } __attribute__ (( packed ));
93
94 /** Code for the TCP selective acknowledgement permitted option */
95 #define TCP_OPTION_SACK_PERMITTED 4
96
97 /** TCP selective acknowledgement option */
98 struct tcp_sack_option {
99 uint8_t kind;
100 uint8_t length;
101 } __attribute__ (( packed ));
102
103 /** TCP selective acknowledgement block */
104 struct tcp_sack_block {
105 uint32_t left;
106 uint32_t right;
107 } __attribute__ (( packed ));
108
109 /** Maximum number of selective acknowledgement blocks
110 *
111 * This allows for the presence of the TCP timestamp option.
112 */
113 #define TCP_SACK_MAX 3
114
115 /** Padded TCP selective acknowledgement option (used for sending) */
116 struct tcp_sack_padded_option {
117 uint8_t nop[2];
118 struct tcp_sack_option sackopt;
119 } __attribute__ (( packed ));
120
121 /** Code for the TCP selective acknowledgement option */
122 #define TCP_OPTION_SACK 5
123
124 /** TCP timestamp option */
125 struct tcp_timestamp_option {
126 uint8_t kind;
127 uint8_t length;
128 uint32_t tsval;
129 uint32_t tsecr;
130 } __attribute__ (( packed ));
131
132 /** Padded TCP timestamp option (used for sending) */
133 struct tcp_timestamp_padded_option {
134 uint8_t nop[2];
135 struct tcp_timestamp_option tsopt;
136 } __attribute__ (( packed ));
137
138 /** Code for the TCP timestamp option */
139 #define TCP_OPTION_TS 8
140
141 /** Parsed TCP options */
142 struct tcp_options {
143 /** Window scale option, if present */
144 const struct tcp_window_scale_option *wsopt;
145 /** SACK permitted option, if present */
146 const struct tcp_sack_permitted_option *spopt;
147 /** Timestamp option, if present */
148 const struct tcp_timestamp_option *tsopt;
149 };
150
151 /** @} */
152
153 /*
154 * TCP flags
155 */
156 #define TCP_CWR 0x80
157 #define TCP_ECE 0x40
158 #define TCP_URG 0x20
159 #define TCP_ACK 0x10
160 #define TCP_PSH 0x08
161 #define TCP_RST 0x04
162 #define TCP_SYN 0x02
163 #define TCP_FIN 0x01
164
165 /**
166 * @defgroup tcpstates TCP states
167 *
168 * The TCP state is defined by a combination of the flags that have
169 * been sent to the peer, the flags that have been acknowledged by the
170 * peer, and the flags that have been received from the peer.
171 *
172 * @{
173 */
174
175 /** TCP flags that have been sent in outgoing packets */
176 #define TCP_STATE_SENT(flags) ( (flags) << 0 )
177 #define TCP_FLAGS_SENT(state) ( ( (state) >> 0 ) & 0xff )
178
179 /** TCP flags that have been acknowledged by the peer
180 *
181 * Note that this applies only to SYN and FIN.
182 */
183 #define TCP_STATE_ACKED(flags) ( (flags) << 8 )
184 #define TCP_FLAGS_ACKED(state) ( ( (state) >> 8 ) & 0xff )
185
186 /** TCP flags that have been received from the peer
187 *
188 * Note that this applies only to SYN and FIN, and that once SYN has
189 * been received, we should always be sending ACK.
190 */
191 #define TCP_STATE_RCVD(flags) ( (flags) << 16 )
192 #define TCP_FLAGS_RCVD(state) ( ( (state) >> 16 ) & 0xff )
193
194 /** TCP flags that are currently being sent in outgoing packets */
195 #define TCP_FLAGS_SENDING(state) \
196 ( TCP_FLAGS_SENT ( state ) & ~TCP_FLAGS_ACKED ( state ) )
197
198 /** CLOSED
199 *
200 * The connection has not yet been used for anything.
201 */
202 #define TCP_CLOSED TCP_RST
203
204 /** LISTEN
205 *
206 * Not currently used as a state; we have no support for listening
207 * connections. Given a unique value to avoid compiler warnings.
208 */
209 #define TCP_LISTEN 0
210
211 /** SYN_SENT
212 *
213 * SYN has been sent, nothing has yet been received or acknowledged.
214 */
215 #define TCP_SYN_SENT ( TCP_STATE_SENT ( TCP_SYN ) )
216
217 /** SYN_RCVD
218 *
219 * SYN has been sent but not acknowledged, SYN has been received.
220 */
221 #define TCP_SYN_RCVD ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
222 TCP_STATE_RCVD ( TCP_SYN ) )
223
224 /** ESTABLISHED
225 *
226 * SYN has been sent and acknowledged, SYN has been received.
227 */
228 #define TCP_ESTABLISHED ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
229 TCP_STATE_ACKED ( TCP_SYN ) | \
230 TCP_STATE_RCVD ( TCP_SYN ) )
231
232 /** FIN_WAIT_1
233 *
234 * SYN has been sent and acknowledged, SYN has been received, FIN has
235 * been sent but not acknowledged, FIN has not been received.
236 *
237 * RFC 793 shows that we can enter FIN_WAIT_1 without have had SYN
238 * acknowledged, i.e. if the application closes the connection after
239 * sending and receiving SYN, but before having had SYN acknowledged.
240 * However, we have to *pretend* that SYN has been acknowledged
241 * anyway, otherwise we end up sending SYN and FIN in the same
242 * sequence number slot. Therefore, when we transition from SYN_RCVD
243 * to FIN_WAIT_1, we have to remember to set TCP_STATE_ACKED(TCP_SYN)
244 * and increment our sequence number.
245 */
246 #define TCP_FIN_WAIT_1 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
247 TCP_STATE_ACKED ( TCP_SYN ) | \
248 TCP_STATE_RCVD ( TCP_SYN ) )
249
250 /** FIN_WAIT_2
251 *
252 * SYN has been sent and acknowledged, SYN has been received, FIN has
253 * been sent and acknowledged, FIN ha not been received.
254 */
255 #define TCP_FIN_WAIT_2 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
256 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
257 TCP_STATE_RCVD ( TCP_SYN ) )
258
259 /** CLOSING / LAST_ACK
260 *
261 * SYN has been sent and acknowledged, SYN has been received, FIN has
262 * been sent but not acknowledged, FIN has been received.
263 *
264 * This state actually encompasses both CLOSING and LAST_ACK; they are
265 * identical with the definition of state that we use. I don't
266 * *believe* that they need to be distinguished.
267 */
268 #define TCP_CLOSING_OR_LAST_ACK \
269 ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
270 TCP_STATE_ACKED ( TCP_SYN ) | \
271 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
272
273 /** TIME_WAIT
274 *
275 * SYN has been sent and acknowledged, SYN has been received, FIN has
276 * been sent and acknowledged, FIN has been received.
277 */
278 #define TCP_TIME_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK | TCP_FIN ) | \
279 TCP_STATE_ACKED ( TCP_SYN | TCP_FIN ) | \
280 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
281
282 /** CLOSE_WAIT
283 *
284 * SYN has been sent and acknowledged, SYN has been received, FIN has
285 * been received.
286 */
287 #define TCP_CLOSE_WAIT ( TCP_STATE_SENT ( TCP_SYN | TCP_ACK ) | \
288 TCP_STATE_ACKED ( TCP_SYN ) | \
289 TCP_STATE_RCVD ( TCP_SYN | TCP_FIN ) )
290
291 /** Can send data in current state
292 *
293 * We can send data if and only if we have had our SYN acked and we
294 * have not yet sent our FIN.
295 */
296 #define TCP_CAN_SEND_DATA(state) \
297 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
298 TCP_STATE_SENT ( TCP_FIN ) ) ) \
299 == TCP_STATE_ACKED ( TCP_SYN ) )
300
301 /** Have ever been fully established
302 *
303 * We have been fully established if we have both received a SYN and
304 * had our own SYN acked.
305 */
306 #define TCP_HAS_BEEN_ESTABLISHED(state) \
307 ( ( (state) & ( TCP_STATE_ACKED ( TCP_SYN ) | \
308 TCP_STATE_RCVD ( TCP_SYN ) ) ) \
309 == ( TCP_STATE_ACKED ( TCP_SYN ) | TCP_STATE_RCVD ( TCP_SYN ) ) )
310
311 /** Have closed gracefully
312 *
313 * We have closed gracefully if we have both received a FIN and had
314 * our own FIN acked.
315 */
316 #define TCP_CLOSED_GRACEFULLY(state) \
317 ( ( (state) & ( TCP_STATE_ACKED ( TCP_FIN ) | \
318 TCP_STATE_RCVD ( TCP_FIN ) ) ) \
319 == ( TCP_STATE_ACKED ( TCP_FIN ) | TCP_STATE_RCVD ( TCP_FIN ) ) )
320
321 /** @} */
322
323 /** Mask for TCP header length field */
324 #define TCP_MASK_HLEN 0xf0
325
326 /** Smallest port number on which a TCP connection can listen */
327 #define TCP_MIN_PORT 1
328
329 /**
330 * Maxmimum advertised TCP window size
331 *
332 * The maximum bandwidth on any link is limited by
333 *
334 * max_bandwidth * round_trip_time = tcp_window
335 *
336 * Some rough expectations for achievable bandwidths over various
337 * links are:
338 *
339 * a) Gigabit LAN: expected bandwidth 125MB/s, typical RTT 0.5ms,
340 * minimum required window 64kB
341 *
342 * b) Home Internet connection: expected bandwidth 10MB/s, typical
343 * RTT 25ms, minimum required window 256kB
344 *
345 * c) WAN: expected bandwidth 2MB/s, typical RTT 100ms, minimum
346 * required window 200kB.
347 *
348 * The maximum possible value for the TCP window size is 1GB (using
349 * the maximum window scale of 2**14). However, it is advisable to
350 * keep the window size as small as possible (without limiting
351 * bandwidth), since in the event of a lost packet the window size
352 * represents the maximum amount that will need to be retransmitted.
353 *
354 * We therefore choose a maximum window size of 256kB.
355 */
356 #define TCP_MAX_WINDOW_SIZE ( 256 * 1024 )
357
358 /**
359 * Path MTU
360 *
361 * IPv6 requires all data link layers to support a datagram size of
362 * 1280 bytes. We choose to use this as our maximum transmitted
363 * datagram size, on the assumption that any practical link layer we
364 * encounter will allow this size. This is a very conservative
365 * assumption in practice, but the impact of making such a
366 * conservative assumption is insignificant since the amount of data
367 * that we transmit (rather than receive) is negligible.
368 *
369 * We allow space within this 1280 bytes for an IPv6 header, a TCP
370 * header, and a (padded) TCP timestamp option.
371 */
372 #define TCP_PATH_MTU \
373 ( 1280 - 40 /* IPv6 */ - 20 /* TCP */ - 12 /* TCP timestamp */ )
374
375 /** TCP maximum segment lifetime
376 *
377 * Currently set to 2 minutes, as per RFC 793.
378 */
379 #define TCP_MSL ( 2 * 60 * TICKS_PER_SEC )
380
381 /**
382 * TCP keepalive period
383 *
384 * We send keepalive ACKs after this period of inactivity has elapsed
385 * on an established connection.
386 */
387 #define TCP_KEEPALIVE_DELAY ( 15 * TICKS_PER_SEC )
388
389 /**
390 * TCP maximum header length
391 *
392 */
393 #define TCP_MAX_HEADER_LEN \
394 ( MAX_LL_NET_HEADER_LEN + \
395 sizeof ( struct tcp_header ) + \
396 sizeof ( struct tcp_mss_option ) + \
397 sizeof ( struct tcp_window_scale_padded_option ) + \
398 sizeof ( struct tcp_timestamp_padded_option ) )
399
400 /**
401 * Compare TCP sequence numbers
402 *
403 * @v seq1 Sequence number 1
404 * @v seq2 Sequence number 2
405 * @ret diff Sequence difference
406 *
407 * Analogous to memcmp(), returns an integer less than, equal to, or
408 * greater than zero if @c seq1 is found, respectively, to be before,
409 * equal to, or after @c seq2.
410 */
411 static inline __attribute__ (( always_inline )) int32_t
tcp_cmp(uint32_t seq1,uint32_t seq2)412 tcp_cmp ( uint32_t seq1, uint32_t seq2 ) {
413 return ( ( int32_t ) ( seq1 - seq2 ) );
414 }
415
416 /**
417 * Check if TCP sequence number lies within window
418 *
419 * @v seq Sequence number
420 * @v start Start of window
421 * @v len Length of window
422 * @ret in_window Sequence number is within window
423 */
tcp_in_window(uint32_t seq,uint32_t start,uint32_t len)424 static inline int tcp_in_window ( uint32_t seq, uint32_t start,
425 uint32_t len ) {
426 return ( ( seq - start ) < len );
427 }
428
429 /** TCP finish wait time
430 *
431 * Currently set to one second, since we should not allow a slowly
432 * responding server to substantially delay a call to shutdown().
433 */
434 #define TCP_FINISH_TIMEOUT ( 1 * TICKS_PER_SEC )
435
436 extern struct tcpip_protocol tcp_protocol __tcpip_protocol;
437
438 #endif /* _IPXE_TCP_H */
439