1 /**
2  * @file
3  * Transmission Control Protocol, outgoing traffic
4  *
5  * The output functions of TCP.
6  *
7  * There are two distinct ways for TCP segments to get sent:
8  * - queued data: these are segments transferring data or segments containing
9  *   SYN or FIN (which both count as one sequence number). They are created as
10  *   struct @ref pbuf together with a struct tcp_seg and enqueue to the
11  *   unsent list of the pcb. They are sent by tcp_output:
12  *   - @ref tcp_write : creates data segments
13  *   - @ref tcp_split_unsent_seg : splits a data segment
14  *   - @ref tcp_enqueue_flags : creates SYN-only or FIN-only segments
15  *   - @ref tcp_output / tcp_output_segment : finalize the tcp header
16  *      (e.g. sequence numbers, options, checksum) and output to IP
17  *   - the various tcp_rexmit functions shuffle around segments between the
18  *     unsent an unacked lists to retransmit them
19  *   - tcp_create_segment and tcp_pbuf_prealloc allocate pbuf and
20  *     segment for these functions
21  * - direct send: these segments don't contain data but control the connection
22  *   behaviour. They are created as pbuf only and sent directly without
23  *   enqueueing them:
24  *   - @ref tcp_send_empty_ack sends an ACK-only segment
25  *   - @ref tcp_rst sends a RST segment
26  *   - @ref tcp_keepalive sends a keepalive segment
27  *   - @ref tcp_zero_window_probe sends a window probe segment
28  *   - tcp_output_alloc_header allocates a header-only pbuf for these functions
29  */
30 
31 /*
32  * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
33  * All rights reserved.
34  *
35  * Redistribution and use in source and binary forms, with or without modification,
36  * are permitted provided that the following conditions are met:
37  *
38  * 1. Redistributions of source code must retain the above copyright notice,
39  *    this list of conditions and the following disclaimer.
40  * 2. Redistributions in binary form must reproduce the above copyright notice,
41  *    this list of conditions and the following disclaimer in the documentation
42  *    and/or other materials provided with the distribution.
43  * 3. The name of the author may not be used to endorse or promote products
44  *    derived from this software without specific prior written permission.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
47  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
48  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
49  * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
51  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
54  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
55  * OF SUCH DAMAGE.
56  *
57  * This file is part of the lwIP TCP/IP stack.
58  *
59  * Author: Adam Dunkels <adam@sics.se>
60  *
61  */
62 
63 #include "lwip/opt.h"
64 
65 #if LWIP_TCP /* don't build if not configured for use in lwipopts.h */
66 
67 #include "lwip/priv/tcp_priv.h"
68 #include "lwip/def.h"
69 #include "lwip/mem.h"
70 #include "lwip/memp.h"
71 #include "lwip/ip_addr.h"
72 #include "lwip/netif.h"
73 #include "lwip/inet_chksum.h"
74 #include "lwip/stats.h"
75 #include "lwip/ip6.h"
76 #include "lwip/ip6_addr.h"
77 #if LWIP_TCP_TIMESTAMPS
78 #include "lwip/sys.h"
79 #endif
80 
81 #include <string.h>
82 
83 #ifdef LWIP_HOOK_FILENAME
84 #include LWIP_HOOK_FILENAME
85 #endif
86 
87 /* Allow to add custom TCP header options by defining this hook */
88 #ifdef LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH
89 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_HOOK_TCP_OUT_TCPOPT_LENGTH(pcb, LWIP_TCP_OPT_LENGTH(flags))
90 #else
91 #define LWIP_TCP_OPT_LENGTH_SEGMENT(flags, pcb) LWIP_TCP_OPT_LENGTH(flags)
92 #endif
93 
94 /* Define some copy-macros for checksum-on-copy so that the code looks
95    nicer by preventing too many ifdef's. */
96 #if TCP_CHECKSUM_ON_COPY
97 #define TCP_DATA_COPY(dst, src, len, seg) do { \
98   tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), \
99                      len, &seg->chksum, &seg->chksum_swapped); \
100   seg->flags |= TF_SEG_DATA_CHECKSUMMED; } while(0)
101 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped)  \
102   tcp_seg_add_chksum(LWIP_CHKSUM_COPY(dst, src, len), len, chksum, chksum_swapped);
103 #else /* TCP_CHECKSUM_ON_COPY*/
104 #define TCP_DATA_COPY(dst, src, len, seg)                     MEMCPY(dst, src, len)
105 #define TCP_DATA_COPY2(dst, src, len, chksum, chksum_swapped) MEMCPY(dst, src, len)
106 #endif /* TCP_CHECKSUM_ON_COPY*/
107 
108 /** Define this to 1 for an extra check that the output checksum is valid
109  * (useful when the checksum is generated by the application, not the stack) */
110 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK
111 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK   0
112 #endif
113 /* Allow to override the failure of sanity check from warning to e.g. hard failure */
114 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
115 #ifndef TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL
116 #define TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(msg) LWIP_DEBUGF(TCP_DEBUG | LWIP_DBG_LEVEL_WARNING, msg)
117 #endif
118 #endif
119 
120 #if TCP_OVERSIZE
121 /** The size of segment pbufs created when TCP_OVERSIZE is enabled */
122 #ifndef TCP_OVERSIZE_CALC_LENGTH
123 #define TCP_OVERSIZE_CALC_LENGTH(length) ((length) + TCP_OVERSIZE)
124 #endif
125 #endif
126 
127 /* Forward declarations.*/
128 static err_t tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif);
129 static err_t tcp_output_control_segment_netif(const struct tcp_pcb *pcb, struct pbuf *p,
130                                               const ip_addr_t *src, const ip_addr_t *dst,
131                                               struct netif *netif);
132 
133 /* tcp_route: common code that returns a fixed bound netif or calls ip_route */
134 static struct netif *
tcp_route(const struct tcp_pcb * pcb,const ip_addr_t * src,const ip_addr_t * dst)135 tcp_route(const struct tcp_pcb *pcb, const ip_addr_t *src, const ip_addr_t *dst)
136 {
137   LWIP_UNUSED_ARG(src); /* in case IPv4-only and source-based routing is disabled */
138 
139   if ((pcb != NULL) && (pcb->netif_idx != NETIF_NO_INDEX)) {
140     return netif_get_by_index(pcb->netif_idx);
141   } else {
142     return ip_route(src, dst);
143   }
144 }
145 
146 /**
147  * Create a TCP segment with prefilled header.
148  *
149  * Called by @ref tcp_write, @ref tcp_enqueue_flags and @ref tcp_split_unsent_seg
150  *
151  * @param pcb Protocol control block for the TCP connection.
152  * @param p pbuf that is used to hold the TCP header.
153  * @param hdrflags TCP flags for header.
154  * @param seqno TCP sequence number of this packet
155  * @param optflags options to include in TCP header
156  * @return a new tcp_seg pointing to p, or NULL.
157  * The TCP header is filled in except ackno and wnd.
158  * p is freed on failure.
159  */
160 static struct tcp_seg *
tcp_create_segment(const struct tcp_pcb * pcb,struct pbuf * p,u8_t hdrflags,u32_t seqno,u8_t optflags)161 tcp_create_segment(const struct tcp_pcb *pcb, struct pbuf *p, u8_t hdrflags, u32_t seqno, u8_t optflags)
162 {
163   struct tcp_seg *seg;
164   u8_t optlen;
165 
166   LWIP_ASSERT("tcp_create_segment: invalid pcb", pcb != NULL);
167   LWIP_ASSERT("tcp_create_segment: invalid pbuf", p != NULL);
168 
169   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
170 
171   if ((seg = (struct tcp_seg *)memp_malloc(MEMP_TCP_SEG)) == NULL) {
172     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no memory.\n"));
173     pbuf_free(p);
174     return NULL;
175   }
176   seg->flags = optflags;
177   seg->next = NULL;
178   seg->p = p;
179   LWIP_ASSERT("p->tot_len >= optlen", p->tot_len >= optlen);
180   seg->len = p->tot_len - optlen;
181 #if TCP_OVERSIZE_DBGCHECK
182   seg->oversize_left = 0;
183 #endif /* TCP_OVERSIZE_DBGCHECK */
184 #if TCP_CHECKSUM_ON_COPY
185   seg->chksum = 0;
186   seg->chksum_swapped = 0;
187   /* check optflags */
188   LWIP_ASSERT("invalid optflags passed: TF_SEG_DATA_CHECKSUMMED",
189               (optflags & TF_SEG_DATA_CHECKSUMMED) == 0);
190 #endif /* TCP_CHECKSUM_ON_COPY */
191 
192   /* build TCP header */
193   if (pbuf_add_header(p, TCP_HLEN)) {
194     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_create_segment: no room for TCP header in pbuf.\n"));
195     TCP_STATS_INC(tcp.err);
196     tcp_seg_free(seg);
197     return NULL;
198   }
199   seg->tcphdr = (struct tcp_hdr *)seg->p->payload;
200   seg->tcphdr->src = lwip_htons(pcb->local_port);
201   seg->tcphdr->dest = lwip_htons(pcb->remote_port);
202   seg->tcphdr->seqno = lwip_htonl(seqno);
203   /* ackno is set in tcp_output */
204   TCPH_HDRLEN_FLAGS_SET(seg->tcphdr, (5 + optlen / 4), hdrflags);
205   /* wnd and chksum are set in tcp_output */
206   seg->tcphdr->urgp = 0;
207   return seg;
208 }
209 
210 /**
211  * Allocate a PBUF_RAM pbuf, perhaps with extra space at the end.
212  *
213  * This function is like pbuf_alloc(layer, length, PBUF_RAM) except
214  * there may be extra bytes available at the end.
215  *
216  * Called by @ref tcp_write
217  *
218  * @param layer flag to define header size.
219  * @param length size of the pbuf's payload.
220  * @param max_length maximum usable size of payload+oversize.
221  * @param oversize pointer to a u16_t that will receive the number of usable tail bytes.
222  * @param pcb The TCP connection that will enqueue the pbuf.
223  * @param apiflags API flags given to tcp_write.
224  * @param first_seg true when this pbuf will be used in the first enqueued segment.
225  */
226 #if TCP_OVERSIZE
227 static struct pbuf *
tcp_pbuf_prealloc(pbuf_layer layer,u16_t length,u16_t max_length,u16_t * oversize,const struct tcp_pcb * pcb,u8_t apiflags,u8_t first_seg)228 tcp_pbuf_prealloc(pbuf_layer layer, u16_t length, u16_t max_length,
229                   u16_t *oversize, const struct tcp_pcb *pcb, u8_t apiflags,
230                   u8_t first_seg)
231 {
232   struct pbuf *p;
233   u16_t alloc = length;
234 
235   LWIP_ASSERT("tcp_pbuf_prealloc: invalid oversize", oversize != NULL);
236   LWIP_ASSERT("tcp_pbuf_prealloc: invalid pcb", pcb != NULL);
237 
238 #if LWIP_NETIF_TX_SINGLE_PBUF
239   LWIP_UNUSED_ARG(max_length);
240   LWIP_UNUSED_ARG(pcb);
241   LWIP_UNUSED_ARG(apiflags);
242   LWIP_UNUSED_ARG(first_seg);
243   alloc = max_length;
244 #else /* LWIP_NETIF_TX_SINGLE_PBUF */
245   if (length < max_length) {
246     /* Should we allocate an oversized pbuf, or just the minimum
247      * length required? If tcp_write is going to be called again
248      * before this segment is transmitted, we want the oversized
249      * buffer. If the segment will be transmitted immediately, we can
250      * save memory by allocating only length. We use a simple
251      * heuristic based on the following information:
252      *
253      * Did the user set TCP_WRITE_FLAG_MORE?
254      *
255      * Will the Nagle algorithm defer transmission of this segment?
256      */
257     if ((apiflags & TCP_WRITE_FLAG_MORE) ||
258         (!(pcb->flags & TF_NODELAY) &&
259          (!first_seg ||
260           pcb->unsent != NULL ||
261           pcb->unacked != NULL))) {
262       alloc = LWIP_MIN(max_length, LWIP_MEM_ALIGN_SIZE(TCP_OVERSIZE_CALC_LENGTH(length)));
263     }
264   }
265 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
266   p = pbuf_alloc(layer, alloc, PBUF_RAM);
267   if (p == NULL) {
268     return NULL;
269   }
270   LWIP_ASSERT("need unchained pbuf", p->next == NULL);
271   *oversize = p->len - length;
272   /* trim p->len to the currently used size */
273   p->len = p->tot_len = length;
274   return p;
275 }
276 #else /* TCP_OVERSIZE */
277 #define tcp_pbuf_prealloc(layer, length, mx, os, pcb, api, fst) pbuf_alloc((layer), (length), PBUF_RAM)
278 #endif /* TCP_OVERSIZE */
279 
280 #if TCP_CHECKSUM_ON_COPY
281 /** Add a checksum of newly added data to the segment.
282  *
283  * Called by tcp_write and tcp_split_unsent_seg.
284  */
285 static void
tcp_seg_add_chksum(u16_t chksum,u16_t len,u16_t * seg_chksum,u8_t * seg_chksum_swapped)286 tcp_seg_add_chksum(u16_t chksum, u16_t len, u16_t *seg_chksum,
287                    u8_t *seg_chksum_swapped)
288 {
289   u32_t helper;
290   /* add chksum to old chksum and fold to u16_t */
291   helper = chksum + *seg_chksum;
292   chksum = FOLD_U32T(helper);
293   if ((len & 1) != 0) {
294     *seg_chksum_swapped = 1 - *seg_chksum_swapped;
295     chksum = SWAP_BYTES_IN_WORD(chksum);
296   }
297   *seg_chksum = chksum;
298 }
299 #endif /* TCP_CHECKSUM_ON_COPY */
300 
301 /** Checks if tcp_write is allowed or not (checks state, snd_buf and snd_queuelen).
302  *
303  * @param pcb the tcp pcb to check for
304  * @param len length of data to send (checked agains snd_buf)
305  * @return ERR_OK if tcp_write is allowed to proceed, another err_t otherwise
306  */
307 static err_t
tcp_write_checks(struct tcp_pcb * pcb,u16_t len)308 tcp_write_checks(struct tcp_pcb *pcb, u16_t len)
309 {
310   LWIP_ASSERT("tcp_write_checks: invalid pcb", pcb != NULL);
311 
312   /* connection is in invalid state for data transmission? */
313   if ((pcb->state != ESTABLISHED) &&
314       (pcb->state != CLOSE_WAIT) &&
315       (pcb->state != SYN_SENT) &&
316       (pcb->state != SYN_RCVD)) {
317     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_STATE | LWIP_DBG_LEVEL_SEVERE, ("tcp_write() called in invalid state\n"));
318     return ERR_CONN;
319   } else if (len == 0) {
320     return ERR_OK;
321   }
322 
323   /* fail on too much data */
324   if (len > pcb->snd_buf) {
325     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE, ("tcp_write: too much data (len=%"U16_F" > snd_buf=%"TCPWNDSIZE_F")\n",
326                 len, pcb->snd_buf));
327     tcp_set_flags(pcb, TF_NAGLEMEMERR);
328     return ERR_MEM;
329   }
330 
331   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: queuelen: %"TCPWNDSIZE_F"\n", (tcpwnd_size_t)pcb->snd_queuelen));
332 
333   /* If total number of pbufs on the unsent/unacked queues exceeds the
334    * configured maximum, return an error */
335   /* check for configured max queuelen and possible overflow */
336   if (pcb->snd_queuelen >= LWIP_MIN(TCP_SND_QUEUELEN, (TCP_SNDQUEUELEN_OVERFLOW + 1))) {
337     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SEVERE, ("tcp_write: too long queue %"U16_F" (max %"U16_F")\n",
338                 pcb->snd_queuelen, (u16_t)TCP_SND_QUEUELEN));
339     TCP_STATS_INC(tcp.memerr);
340     tcp_set_flags(pcb, TF_NAGLEMEMERR);
341     return ERR_MEM;
342   }
343   if (pcb->snd_queuelen != 0) {
344     LWIP_ASSERT("tcp_write: pbufs on queue => at least one queue non-empty",
345                 pcb->unacked != NULL || pcb->unsent != NULL);
346   } else {
347     LWIP_ASSERT("tcp_write: no pbufs on queue => both queues empty",
348                 pcb->unacked == NULL && pcb->unsent == NULL);
349   }
350   return ERR_OK;
351 }
352 
353 /**
354  * @ingroup tcp_raw
355  * Write data for sending (but does not send it immediately).
356  *
357  * It waits in the expectation of more data being sent soon (as
358  * it can send them more efficiently by combining them together).
359  * To prompt the system to send data now, call tcp_output() after
360  * calling tcp_write().
361  *
362  * This function enqueues the data pointed to by the argument dataptr. The length of
363  * the data is passed as the len parameter. The apiflags can be one or more of:
364  * - TCP_WRITE_FLAG_COPY: indicates whether the new memory should be allocated
365  *   for the data to be copied into. If this flag is not given, no new memory
366  *   should be allocated and the data should only be referenced by pointer. This
367  *   also means that the memory behind dataptr must not change until the data is
368  *   ACKed by the remote host
369  * - TCP_WRITE_FLAG_MORE: indicates that more data follows. If this is omitted,
370  *   the PSH flag is set in the last segment created by this call to tcp_write.
371  *   If this flag is given, the PSH flag is not set.
372  *
373  * The tcp_write() function will fail and return ERR_MEM if the length
374  * of the data exceeds the current send buffer size or if the length of
375  * the queue of outgoing segment is larger than the upper limit defined
376  * in lwipopts.h. The number of bytes available in the output queue can
377  * be retrieved with the tcp_sndbuf() function.
378  *
379  * The proper way to use this function is to call the function with at
380  * most tcp_sndbuf() bytes of data. If the function returns ERR_MEM,
381  * the application should wait until some of the currently enqueued
382  * data has been successfully received by the other host and try again.
383  *
384  * @param pcb Protocol control block for the TCP connection to enqueue data for.
385  * @param arg Pointer to the data to be enqueued for sending.
386  * @param len Data length in bytes
387  * @param apiflags combination of following flags :
388  * - TCP_WRITE_FLAG_COPY (0x01) data will be copied into memory belonging to the stack
389  * - TCP_WRITE_FLAG_MORE (0x02) for TCP connection, PSH flag will not be set on last segment sent,
390  * @return ERR_OK if enqueued, another err_t on error
391  */
392 err_t
tcp_write(struct tcp_pcb * pcb,const void * arg,u16_t len,u8_t apiflags)393 tcp_write(struct tcp_pcb *pcb, const void *arg, u16_t len, u8_t apiflags)
394 {
395   struct pbuf *concat_p = NULL;
396   struct tcp_seg *last_unsent = NULL, *seg = NULL, *prev_seg = NULL, *queue = NULL;
397   u16_t pos = 0; /* position in 'arg' data */
398   u16_t queuelen;
399   u8_t optlen;
400   u8_t optflags = 0;
401 #if TCP_OVERSIZE
402   u16_t oversize = 0;
403   u16_t oversize_used = 0;
404 #if TCP_OVERSIZE_DBGCHECK
405   u16_t oversize_add = 0;
406 #endif /* TCP_OVERSIZE_DBGCHECK*/
407 #endif /* TCP_OVERSIZE */
408   u16_t extendlen = 0;
409 #if TCP_CHECKSUM_ON_COPY
410   u16_t concat_chksum = 0;
411   u8_t concat_chksum_swapped = 0;
412   u16_t concat_chksummed = 0;
413 #endif /* TCP_CHECKSUM_ON_COPY */
414   err_t err;
415   u16_t mss_local;
416 
417   LWIP_ERROR("tcp_write: invalid pcb", pcb != NULL, return ERR_ARG);
418 
419   /* don't allocate segments bigger than half the maximum window we ever received */
420   mss_local = LWIP_MIN(pcb->mss, TCPWND_MIN16(pcb->snd_wnd_max / 2));
421   mss_local = mss_local ? mss_local : pcb->mss;
422 
423   LWIP_ASSERT_CORE_LOCKED();
424 
425 #if LWIP_NETIF_TX_SINGLE_PBUF
426   /* Always copy to try to create single pbufs for TX */
427   apiflags |= TCP_WRITE_FLAG_COPY;
428 #endif /* LWIP_NETIF_TX_SINGLE_PBUF */
429 
430   LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_write(pcb=%p, data=%p, len=%"U16_F", apiflags=%"U16_F")\n",
431                                  (void *)pcb, arg, len, (u16_t)apiflags));
432   LWIP_ERROR("tcp_write: arg == NULL (programmer violates API)",
433              arg != NULL, return ERR_ARG;);
434 
435   err = tcp_write_checks(pcb, len);
436   if (err != ERR_OK) {
437     return err;
438   }
439   queuelen = pcb->snd_queuelen;
440 
441 #if LWIP_TCP_TIMESTAMPS
442   if ((pcb->flags & TF_TIMESTAMP)) {
443     /* Make sure the timestamp option is only included in data segments if we
444        agreed about it with the remote host. */
445     optflags = TF_SEG_OPTS_TS;
446     optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(TF_SEG_OPTS_TS, pcb);
447     /* ensure that segments can hold at least one data byte... */
448     mss_local = LWIP_MAX(mss_local, LWIP_TCP_OPT_LEN_TS + 1);
449   } else
450 #endif /* LWIP_TCP_TIMESTAMPS */
451   {
452     optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
453   }
454 
455 
456   /*
457    * TCP segmentation is done in three phases with increasing complexity:
458    *
459    * 1. Copy data directly into an oversized pbuf.
460    * 2. Chain a new pbuf to the end of pcb->unsent.
461    * 3. Create new segments.
462    *
463    * We may run out of memory at any point. In that case we must
464    * return ERR_MEM and not change anything in pcb. Therefore, all
465    * changes are recorded in local variables and committed at the end
466    * of the function. Some pcb fields are maintained in local copies:
467    *
468    * queuelen = pcb->snd_queuelen
469    * oversize = pcb->unsent_oversize
470    *
471    * These variables are set consistently by the phases:
472    *
473    * seg points to the last segment tampered with.
474    *
475    * pos records progress as data is segmented.
476    */
477 
478   /* Find the tail of the unsent queue. */
479   if (pcb->unsent != NULL) {
480     u16_t space;
481     u16_t unsent_optlen;
482 
483     /* @todo: this could be sped up by keeping last_unsent in the pcb */
484     for (last_unsent = pcb->unsent; last_unsent->next != NULL;
485          last_unsent = last_unsent->next);
486 
487     /* Usable space at the end of the last unsent segment */
488     unsent_optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(last_unsent->flags, pcb);
489     LWIP_ASSERT("mss_local is too small", mss_local >= last_unsent->len + unsent_optlen);
490     space = mss_local - (last_unsent->len + unsent_optlen);
491 
492     /*
493      * Phase 1: Copy data directly into an oversized pbuf.
494      *
495      * The number of bytes copied is recorded in the oversize_used
496      * variable. The actual copying is done at the bottom of the
497      * function.
498      */
499 #if TCP_OVERSIZE
500 #if TCP_OVERSIZE_DBGCHECK
501     /* check that pcb->unsent_oversize matches last_unsent->oversize_left */
502     LWIP_ASSERT("unsent_oversize mismatch (pcb vs. last_unsent)",
503                 pcb->unsent_oversize == last_unsent->oversize_left);
504 #endif /* TCP_OVERSIZE_DBGCHECK */
505     oversize = pcb->unsent_oversize;
506     if (oversize > 0) {
507       LWIP_ASSERT("inconsistent oversize vs. space", oversize <= space);
508       seg = last_unsent;
509       oversize_used = LWIP_MIN(space, LWIP_MIN(oversize, len));
510       pos += oversize_used;
511       oversize -= oversize_used;
512       space -= oversize_used;
513     }
514     /* now we are either finished or oversize is zero */
515     LWIP_ASSERT("inconsistent oversize vs. len", (oversize == 0) || (pos == len));
516 #endif /* TCP_OVERSIZE */
517 
518 #if !LWIP_NETIF_TX_SINGLE_PBUF
519     /*
520      * Phase 2: Chain a new pbuf to the end of pcb->unsent.
521      *
522      * As an exception when NOT copying the data, if the given data buffer
523      * directly follows the last unsent data buffer in memory, extend the last
524      * ROM pbuf reference to the buffer, thus saving a ROM pbuf allocation.
525      *
526      * We don't extend segments containing SYN/FIN flags or options
527      * (len==0). The new pbuf is kept in concat_p and pbuf_cat'ed at
528      * the end.
529      *
530      * This phase is skipped for LWIP_NETIF_TX_SINGLE_PBUF as we could only execute
531      * it after rexmit puts a segment from unacked to unsent and at this point,
532      * oversize info is lost.
533      */
534     if ((pos < len) && (space > 0) && (last_unsent->len > 0)) {
535       u16_t seglen = LWIP_MIN(space, len - pos);
536       seg = last_unsent;
537 
538       /* Create a pbuf with a copy or reference to seglen bytes. We
539        * can use PBUF_RAW here since the data appears in the middle of
540        * a segment. A header will never be prepended. */
541       if (apiflags & TCP_WRITE_FLAG_COPY) {
542         /* Data is copied */
543         if ((concat_p = tcp_pbuf_prealloc(PBUF_RAW, seglen, space, &oversize, pcb, apiflags, 1)) == NULL) {
544           LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
545                       ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n",
546                        seglen));
547           goto memerr;
548         }
549 #if TCP_OVERSIZE_DBGCHECK
550         oversize_add = oversize;
551 #endif /* TCP_OVERSIZE_DBGCHECK */
552         TCP_DATA_COPY2(concat_p->payload, (const u8_t *)arg + pos, seglen, &concat_chksum, &concat_chksum_swapped);
553 #if TCP_CHECKSUM_ON_COPY
554         concat_chksummed += seglen;
555 #endif /* TCP_CHECKSUM_ON_COPY */
556         queuelen += pbuf_clen(concat_p);
557       } else {
558         /* Data is not copied */
559         /* If the last unsent pbuf is of type PBUF_ROM, try to extend it. */
560         struct pbuf *p;
561         for (p = last_unsent->p; p->next != NULL; p = p->next);
562         if (((p->type_internal & (PBUF_TYPE_FLAG_STRUCT_DATA_CONTIGUOUS | PBUF_TYPE_FLAG_DATA_VOLATILE)) == 0) &&
563             (const u8_t *)p->payload + p->len == (const u8_t *)arg) {
564           LWIP_ASSERT("tcp_write: ROM pbufs cannot be oversized", pos == 0);
565           extendlen = seglen;
566         } else {
567           if ((concat_p = pbuf_alloc(PBUF_RAW, seglen, PBUF_ROM)) == NULL) {
568             LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
569                         ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
570             goto memerr;
571           }
572           /* reference the non-volatile payload data */
573           ((struct pbuf_rom *)concat_p)->payload = (const u8_t *)arg + pos;
574           queuelen += pbuf_clen(concat_p);
575         }
576 #if TCP_CHECKSUM_ON_COPY
577         /* calculate the checksum of nocopy-data */
578         tcp_seg_add_chksum(~inet_chksum((const u8_t *)arg + pos, seglen), seglen,
579                            &concat_chksum, &concat_chksum_swapped);
580         concat_chksummed += seglen;
581 #endif /* TCP_CHECKSUM_ON_COPY */
582       }
583 
584       pos += seglen;
585     }
586 #endif /* !LWIP_NETIF_TX_SINGLE_PBUF */
587   } else {
588 #if TCP_OVERSIZE
589     LWIP_ASSERT("unsent_oversize mismatch (pcb->unsent is NULL)",
590                 pcb->unsent_oversize == 0);
591 #endif /* TCP_OVERSIZE */
592   }
593 
594   /*
595    * Phase 3: Create new segments.
596    *
597    * The new segments are chained together in the local 'queue'
598    * variable, ready to be appended to pcb->unsent.
599    */
600   while (pos < len) {
601     struct pbuf *p;
602     u16_t left = len - pos;
603     u16_t max_len = mss_local - optlen;
604     u16_t seglen = LWIP_MIN(left, max_len);
605 #if TCP_CHECKSUM_ON_COPY
606     u16_t chksum = 0;
607     u8_t chksum_swapped = 0;
608 #endif /* TCP_CHECKSUM_ON_COPY */
609 
610     if (apiflags & TCP_WRITE_FLAG_COPY) {
611       /* If copy is set, memory should be allocated and data copied
612        * into pbuf */
613       if ((p = tcp_pbuf_prealloc(PBUF_TRANSPORT, seglen + optlen, mss_local, &oversize, pcb, apiflags, queue == NULL)) == NULL) {
614         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write : could not allocate memory for pbuf copy size %"U16_F"\n", seglen));
615         goto memerr;
616       }
617       LWIP_ASSERT("tcp_write: check that first pbuf can hold the complete seglen",
618                   (p->len >= seglen));
619       TCP_DATA_COPY2((char *)p->payload + optlen, (const u8_t *)arg + pos, seglen, &chksum, &chksum_swapped);
620     } else {
621       /* Copy is not set: First allocate a pbuf for holding the data.
622        * Since the referenced data is available at least until it is
623        * sent out on the link (as it has to be ACKed by the remote
624        * party) we can safely use PBUF_ROM instead of PBUF_REF here.
625        */
626       struct pbuf *p2;
627 #if TCP_OVERSIZE
628       LWIP_ASSERT("oversize == 0", oversize == 0);
629 #endif /* TCP_OVERSIZE */
630       if ((p2 = pbuf_alloc(PBUF_TRANSPORT, seglen, PBUF_ROM)) == NULL) {
631         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for zero-copy pbuf\n"));
632         goto memerr;
633       }
634 #if TCP_CHECKSUM_ON_COPY
635       /* calculate the checksum of nocopy-data */
636       chksum = ~inet_chksum((const u8_t *)arg + pos, seglen);
637       if (seglen & 1) {
638         chksum_swapped = 1;
639         chksum = SWAP_BYTES_IN_WORD(chksum);
640       }
641 #endif /* TCP_CHECKSUM_ON_COPY */
642       /* reference the non-volatile payload data */
643       ((struct pbuf_rom *)p2)->payload = (const u8_t *)arg + pos;
644 
645       /* Second, allocate a pbuf for the headers. */
646       if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
647         /* If allocation fails, we have to deallocate the data pbuf as
648          * well. */
649         pbuf_free(p2);
650         LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: could not allocate memory for header pbuf\n"));
651         goto memerr;
652       }
653       /* Concatenate the headers and data pbufs together. */
654       pbuf_cat(p/*header*/, p2/*data*/);
655     }
656 
657     queuelen += pbuf_clen(p);
658 
659     /* Now that there are more segments queued, we check again if the
660      * length of the queue exceeds the configured maximum or
661      * overflows. */
662     if (queuelen > LWIP_MIN(TCP_SND_QUEUELEN, TCP_SNDQUEUELEN_OVERFLOW)) {
663       LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_write: queue too long %"U16_F" (%d)\n",
664                   queuelen, (int)TCP_SND_QUEUELEN));
665       pbuf_free(p);
666       goto memerr;
667     }
668 
669     if ((seg = tcp_create_segment(pcb, p, 0, pcb->snd_lbb + pos, optflags)) == NULL) {
670       goto memerr;
671     }
672 #if TCP_OVERSIZE_DBGCHECK
673     seg->oversize_left = oversize;
674 #endif /* TCP_OVERSIZE_DBGCHECK */
675 #if TCP_CHECKSUM_ON_COPY
676     seg->chksum = chksum;
677     seg->chksum_swapped = chksum_swapped;
678     seg->flags |= TF_SEG_DATA_CHECKSUMMED;
679 #endif /* TCP_CHECKSUM_ON_COPY */
680 
681     /* first segment of to-be-queued data? */
682     if (queue == NULL) {
683       queue = seg;
684     } else {
685       /* Attach the segment to the end of the queued segments */
686       LWIP_ASSERT("prev_seg != NULL", prev_seg != NULL);
687       prev_seg->next = seg;
688     }
689     /* remember last segment of to-be-queued data for next iteration */
690     prev_seg = seg;
691 
692     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE, ("tcp_write: queueing %"U32_F":%"U32_F"\n",
693                 lwip_ntohl(seg->tcphdr->seqno),
694                 lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg)));
695 
696     pos += seglen;
697   }
698 
699   /*
700    * All three segmentation phases were successful. We can commit the
701    * transaction.
702    */
703 #if TCP_OVERSIZE_DBGCHECK
704   if ((last_unsent != NULL) && (oversize_add != 0)) {
705     last_unsent->oversize_left += oversize_add;
706   }
707 #endif /* TCP_OVERSIZE_DBGCHECK */
708 
709   /*
710    * Phase 1: If data has been added to the preallocated tail of
711    * last_unsent, we update the length fields of the pbuf chain.
712    */
713 #if TCP_OVERSIZE
714   if (oversize_used > 0) {
715     struct pbuf *p;
716     /* Bump tot_len of whole chain, len of tail */
717     for (p = last_unsent->p; p; p = p->next) {
718       p->tot_len += oversize_used;
719       if (p->next == NULL) {
720         TCP_DATA_COPY((char *)p->payload + p->len, arg, oversize_used, last_unsent);
721         p->len += oversize_used;
722       }
723     }
724     last_unsent->len += oversize_used;
725 #if TCP_OVERSIZE_DBGCHECK
726     LWIP_ASSERT("last_unsent->oversize_left >= oversize_used",
727                 last_unsent->oversize_left >= oversize_used);
728     last_unsent->oversize_left -= oversize_used;
729 #endif /* TCP_OVERSIZE_DBGCHECK */
730   }
731   pcb->unsent_oversize = oversize;
732 #endif /* TCP_OVERSIZE */
733 
734   /*
735    * Phase 2: concat_p can be concatenated onto last_unsent->p, unless we
736    * determined that the last ROM pbuf can be extended to include the new data.
737    */
738   if (concat_p != NULL) {
739     LWIP_ASSERT("tcp_write: cannot concatenate when pcb->unsent is empty",
740                 (last_unsent != NULL));
741     pbuf_cat(last_unsent->p, concat_p);
742     last_unsent->len += concat_p->tot_len;
743   } else if (extendlen > 0) {
744     struct pbuf *p;
745     LWIP_ASSERT("tcp_write: extension of reference requires reference",
746                 last_unsent != NULL && last_unsent->p != NULL);
747     for (p = last_unsent->p; p->next != NULL; p = p->next) {
748       p->tot_len += extendlen;
749     }
750     p->tot_len += extendlen;
751     p->len += extendlen;
752     last_unsent->len += extendlen;
753   }
754 
755 #if TCP_CHECKSUM_ON_COPY
756   if (concat_chksummed) {
757     LWIP_ASSERT("tcp_write: concat checksum needs concatenated data",
758                 concat_p != NULL || extendlen > 0);
759     /*if concat checksumm swapped - swap it back */
760     if (concat_chksum_swapped) {
761       concat_chksum = SWAP_BYTES_IN_WORD(concat_chksum);
762     }
763     tcp_seg_add_chksum(concat_chksum, concat_chksummed, &last_unsent->chksum,
764                        &last_unsent->chksum_swapped);
765     last_unsent->flags |= TF_SEG_DATA_CHECKSUMMED;
766   }
767 #endif /* TCP_CHECKSUM_ON_COPY */
768 
769   /*
770    * Phase 3: Append queue to pcb->unsent. Queue may be NULL, but that
771    * is harmless
772    */
773   if (last_unsent == NULL) {
774     pcb->unsent = queue;
775   } else {
776     last_unsent->next = queue;
777   }
778 
779   /*
780    * Finally update the pcb state.
781    */
782   pcb->snd_lbb += len;
783   pcb->snd_buf -= len;
784   pcb->snd_queuelen = queuelen;
785 
786   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_write: %"S16_F" (after enqueued)\n",
787                                pcb->snd_queuelen));
788   if (pcb->snd_queuelen != 0) {
789     LWIP_ASSERT("tcp_write: valid queue length",
790                 pcb->unacked != NULL || pcb->unsent != NULL);
791   }
792 
793   /* Set the PSH flag in the last segment that we enqueued. */
794   if (seg != NULL && seg->tcphdr != NULL && ((apiflags & TCP_WRITE_FLAG_MORE) == 0)) {
795     TCPH_SET_FLAG(seg->tcphdr, TCP_PSH);
796   }
797 
798   return ERR_OK;
799 memerr:
800   tcp_set_flags(pcb, TF_NAGLEMEMERR);
801   TCP_STATS_INC(tcp.memerr);
802 
803   if (concat_p != NULL) {
804     pbuf_free(concat_p);
805   }
806   if (queue != NULL) {
807     tcp_segs_free(queue);
808   }
809   if (pcb->snd_queuelen != 0) {
810     LWIP_ASSERT("tcp_write: valid queue length", pcb->unacked != NULL ||
811                 pcb->unsent != NULL);
812   }
813   LWIP_DEBUGF(TCP_QLEN_DEBUG | LWIP_DBG_STATE, ("tcp_write: %"S16_F" (with mem err)\n", pcb->snd_queuelen));
814   return ERR_MEM;
815 }
816 
817 /**
818  * Split segment on the head of the unsent queue.  If return is not
819  * ERR_OK, existing head remains intact
820  *
821  * The split is accomplished by creating a new TCP segment and pbuf
822  * which holds the remainder payload after the split.  The original
823  * pbuf is trimmed to new length.  This allows splitting of read-only
824  * pbufs
825  *
826  * @param pcb the tcp_pcb for which to split the unsent head
827  * @param split the amount of payload to remain in the head
828  */
829 err_t
tcp_split_unsent_seg(struct tcp_pcb * pcb,u16_t split)830 tcp_split_unsent_seg(struct tcp_pcb *pcb, u16_t split)
831 {
832   struct tcp_seg *seg = NULL, *useg = NULL;
833   struct pbuf *p = NULL;
834   u8_t optlen;
835   u8_t optflags;
836   u8_t split_flags;
837   u8_t remainder_flags;
838   u16_t remainder;
839   u16_t offset;
840 #if TCP_CHECKSUM_ON_COPY
841   u16_t chksum = 0;
842   u8_t chksum_swapped = 0;
843   struct pbuf *q;
844 #endif /* TCP_CHECKSUM_ON_COPY */
845 
846   LWIP_ASSERT("tcp_split_unsent_seg: invalid pcb", pcb != NULL);
847 
848   useg = pcb->unsent;
849   if (useg == NULL) {
850     return ERR_MEM;
851   }
852 
853   if (split == 0) {
854     LWIP_ASSERT("Can't split segment into length 0", 0);
855     return ERR_VAL;
856   }
857 
858   if (useg->len <= split) {
859     return ERR_OK;
860   }
861 
862   LWIP_ASSERT("split <= mss", split <= pcb->mss);
863   LWIP_ASSERT("useg->len > 0", useg->len > 0);
864 
865   /* We should check that we don't exceed TCP_SND_QUEUELEN but we need
866    * to split this packet so we may actually exceed the max value by
867    * one!
868    */
869   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue: split_unsent_seg: %u\n", (unsigned int)pcb->snd_queuelen));
870 
871   optflags = useg->flags;
872 #if TCP_CHECKSUM_ON_COPY
873   /* Remove since checksum is not stored until after tcp_create_segment() */
874   optflags &= ~TF_SEG_DATA_CHECKSUMMED;
875 #endif /* TCP_CHECKSUM_ON_COPY */
876   optlen = LWIP_TCP_OPT_LENGTH(optflags);
877   remainder = useg->len - split;
878 
879   /* Create new pbuf for the remainder of the split */
880   p = pbuf_alloc(PBUF_TRANSPORT, remainder + optlen, PBUF_RAM);
881   if (p == NULL) {
882     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
883                 ("tcp_split_unsent_seg: could not allocate memory for pbuf remainder %u\n", remainder));
884     goto memerr;
885   }
886 
887   /* Offset into the original pbuf is past TCP/IP headers, options, and split amount */
888   offset = useg->p->tot_len - useg->len + split;
889   /* Copy remainder into new pbuf, headers and options will not be filled out */
890   if (pbuf_copy_partial(useg->p, (u8_t *)p->payload + optlen, remainder, offset ) != remainder) {
891     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
892                 ("tcp_split_unsent_seg: could not copy pbuf remainder %u\n", remainder));
893     goto memerr;
894   }
895 #if TCP_CHECKSUM_ON_COPY
896   /* calculate the checksum on remainder data */
897   tcp_seg_add_chksum(~inet_chksum((const u8_t *)p->payload + optlen, remainder), remainder,
898                      &chksum, &chksum_swapped);
899 #endif /* TCP_CHECKSUM_ON_COPY */
900 
901   /* Options are created when calling tcp_output() */
902 
903   /* Migrate flags from original segment */
904   split_flags = TCPH_FLAGS(useg->tcphdr);
905   remainder_flags = 0; /* ACK added in tcp_output() */
906 
907   if (split_flags & TCP_PSH) {
908     split_flags &= ~TCP_PSH;
909     remainder_flags |= TCP_PSH;
910   }
911   if (split_flags & TCP_FIN) {
912     split_flags &= ~TCP_FIN;
913     remainder_flags |= TCP_FIN;
914   }
915   /* SYN should be left on split, RST should not be present with data */
916 
917   seg = tcp_create_segment(pcb, p, remainder_flags, lwip_ntohl(useg->tcphdr->seqno) + split, optflags);
918   if (seg == NULL) {
919     p = NULL; /* Freed by tcp_create_segment */
920     LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_LEVEL_SERIOUS,
921                 ("tcp_split_unsent_seg: could not create new TCP segment\n"));
922     goto memerr;
923   }
924 
925 #if TCP_CHECKSUM_ON_COPY
926   seg->chksum = chksum;
927   seg->chksum_swapped = chksum_swapped;
928   seg->flags |= TF_SEG_DATA_CHECKSUMMED;
929 #endif /* TCP_CHECKSUM_ON_COPY */
930 
931   /* Remove this segment from the queue since trimming it may free pbufs */
932   pcb->snd_queuelen -= pbuf_clen(useg->p);
933 
934   /* Trim the original pbuf into our split size.  At this point our remainder segment must be setup
935   successfully because we are modifying the original segment */
936   pbuf_realloc(useg->p, useg->p->tot_len - remainder);
937   useg->len -= remainder;
938   TCPH_SET_FLAG(useg->tcphdr, split_flags);
939 #if TCP_OVERSIZE_DBGCHECK
940   /* By trimming, realloc may have actually shrunk the pbuf, so clear oversize_left */
941   useg->oversize_left = 0;
942 #endif /* TCP_OVERSIZE_DBGCHECK */
943 
944   /* Add back to the queue with new trimmed pbuf */
945   pcb->snd_queuelen += pbuf_clen(useg->p);
946 
947 #if TCP_CHECKSUM_ON_COPY
948   /* The checksum on the split segment is now incorrect. We need to re-run it over the split */
949   useg->chksum = 0;
950   useg->chksum_swapped = 0;
951   q = useg->p;
952   offset = q->tot_len - useg->len; /* Offset due to exposed headers */
953 
954   /* Advance to the pbuf where the offset ends */
955   while (q != NULL && offset > q->len) {
956     offset -= q->len;
957     q = q->next;
958   }
959   LWIP_ASSERT("Found start of payload pbuf", q != NULL);
960   /* Checksum the first payload pbuf accounting for offset, then other pbufs are all payload */
961   for (; q != NULL; offset = 0, q = q->next) {
962     tcp_seg_add_chksum(~inet_chksum((const u8_t *)q->payload + offset, q->len - offset), q->len - offset,
963                        &useg->chksum, &useg->chksum_swapped);
964   }
965 #endif /* TCP_CHECKSUM_ON_COPY */
966 
967   /* Update number of segments on the queues. Note that length now may
968    * exceed TCP_SND_QUEUELEN! We don't have to touch pcb->snd_buf
969    * because the total amount of data is constant when packet is split */
970   pcb->snd_queuelen += pbuf_clen(seg->p);
971 
972   /* Finally insert remainder into queue after split (which stays head) */
973   seg->next = useg->next;
974   useg->next = seg;
975 
976 #if TCP_OVERSIZE
977   /* If remainder is last segment on the unsent, ensure we clear the oversize amount
978    * because the remainder is always sized to the exact remaining amount */
979   if (seg->next == NULL) {
980     pcb->unsent_oversize = 0;
981   }
982 #endif /* TCP_OVERSIZE */
983 
984   return ERR_OK;
985 memerr:
986   TCP_STATS_INC(tcp.memerr);
987 
988   LWIP_ASSERT("seg == NULL", seg == NULL);
989   if (p != NULL) {
990     pbuf_free(p);
991   }
992 
993   return ERR_MEM;
994 }
995 
996 /**
997  * Called by tcp_close() to send a segment including FIN flag but not data.
998  * This FIN may be added to an existing segment or a new, otherwise empty
999  * segment is enqueued.
1000  *
1001  * @param pcb the tcp_pcb over which to send a segment
1002  * @return ERR_OK if sent, another err_t otherwise
1003  */
1004 err_t
tcp_send_fin(struct tcp_pcb * pcb)1005 tcp_send_fin(struct tcp_pcb *pcb)
1006 {
1007   LWIP_ASSERT("tcp_send_fin: invalid pcb", pcb != NULL);
1008 
1009   /* first, try to add the fin to the last unsent segment */
1010   if (pcb->unsent != NULL) {
1011     struct tcp_seg *last_unsent;
1012     for (last_unsent = pcb->unsent; last_unsent->next != NULL;
1013          last_unsent = last_unsent->next);
1014 
1015     if ((TCPH_FLAGS(last_unsent->tcphdr) & (TCP_SYN | TCP_FIN | TCP_RST)) == 0) {
1016       /* no SYN/FIN/RST flag in the header, we can add the FIN flag */
1017       TCPH_SET_FLAG(last_unsent->tcphdr, TCP_FIN);
1018       tcp_set_flags(pcb, TF_FIN);
1019       return ERR_OK;
1020     }
1021   }
1022   /* no data, no length, flags, copy=1, no optdata */
1023   return tcp_enqueue_flags(pcb, TCP_FIN);
1024 }
1025 
1026 /**
1027  * Enqueue SYN or FIN for transmission.
1028  *
1029  * Called by @ref tcp_connect, tcp_listen_input, and @ref tcp_close
1030  * (via @ref tcp_send_fin)
1031  *
1032  * @param pcb Protocol control block for the TCP connection.
1033  * @param flags TCP header flags to set in the outgoing segment.
1034  */
1035 err_t
tcp_enqueue_flags(struct tcp_pcb * pcb,u8_t flags)1036 tcp_enqueue_flags(struct tcp_pcb *pcb, u8_t flags)
1037 {
1038   struct pbuf *p;
1039   struct tcp_seg *seg;
1040   u8_t optflags = 0;
1041   u8_t optlen = 0;
1042 
1043   LWIP_ASSERT("tcp_enqueue_flags: need either TCP_SYN or TCP_FIN in flags (programmer violates API)",
1044               (flags & (TCP_SYN | TCP_FIN)) != 0);
1045   LWIP_ASSERT("tcp_enqueue_flags: invalid pcb", pcb != NULL);
1046 
1047   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: queuelen: %"U16_F"\n", (u16_t)pcb->snd_queuelen));
1048 
1049   /* No need to check pcb->snd_queuelen if only SYN or FIN are allowed! */
1050 
1051   /* Get options for this segment. This is a special case since this is the
1052      only place where a SYN can be sent. */
1053   if (flags & TCP_SYN) {
1054     optflags = TF_SEG_OPTS_MSS;
1055 #if LWIP_WND_SCALE
1056     if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_WND_SCALE)) {
1057       /* In a <SYN,ACK> (sent in state SYN_RCVD), the window scale option may only
1058          be sent if we received a window scale option from the remote host. */
1059       optflags |= TF_SEG_OPTS_WND_SCALE;
1060     }
1061 #endif /* LWIP_WND_SCALE */
1062 #if LWIP_TCP_SACK_OUT
1063     if ((pcb->state != SYN_RCVD) || (pcb->flags & TF_SACK)) {
1064       /* In a <SYN,ACK> (sent in state SYN_RCVD), the SACK_PERM option may only
1065          be sent if we received a SACK_PERM option from the remote host. */
1066       optflags |= TF_SEG_OPTS_SACK_PERM;
1067     }
1068 #endif /* LWIP_TCP_SACK_OUT */
1069   }
1070 #if LWIP_TCP_TIMESTAMPS
1071   if ((pcb->flags & TF_TIMESTAMP) || ((flags & TCP_SYN) && (pcb->state != SYN_RCVD))) {
1072     /* Make sure the timestamp option is only included in data segments if we
1073        agreed about it with the remote host (and in active open SYN segments). */
1074     optflags |= TF_SEG_OPTS_TS;
1075   }
1076 #endif /* LWIP_TCP_TIMESTAMPS */
1077   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
1078 
1079   /* Allocate pbuf with room for TCP header + options */
1080   if ((p = pbuf_alloc(PBUF_TRANSPORT, optlen, PBUF_RAM)) == NULL) {
1081     tcp_set_flags(pcb, TF_NAGLEMEMERR);
1082     TCP_STATS_INC(tcp.memerr);
1083     return ERR_MEM;
1084   }
1085   LWIP_ASSERT("tcp_enqueue_flags: check that first pbuf can hold optlen",
1086               (p->len >= optlen));
1087 
1088   /* Allocate memory for tcp_seg, and fill in fields. */
1089   if ((seg = tcp_create_segment(pcb, p, flags, pcb->snd_lbb, optflags)) == NULL) {
1090     tcp_set_flags(pcb, TF_NAGLEMEMERR);
1091     TCP_STATS_INC(tcp.memerr);
1092     return ERR_MEM;
1093   }
1094   LWIP_ASSERT("seg->tcphdr not aligned", ((mem_ptr_t)seg->tcphdr % LWIP_MIN(MEM_ALIGNMENT, 4)) == 0);
1095   LWIP_ASSERT("tcp_enqueue_flags: invalid segment length", seg->len == 0);
1096 
1097   LWIP_DEBUGF(TCP_OUTPUT_DEBUG | LWIP_DBG_TRACE,
1098               ("tcp_enqueue_flags: queueing %"U32_F":%"U32_F" (0x%"X16_F")\n",
1099                lwip_ntohl(seg->tcphdr->seqno),
1100                lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg),
1101                (u16_t)flags));
1102 
1103   /* Now append seg to pcb->unsent queue */
1104   if (pcb->unsent == NULL) {
1105     pcb->unsent = seg;
1106   } else {
1107     struct tcp_seg *useg;
1108     for (useg = pcb->unsent; useg->next != NULL; useg = useg->next);
1109     useg->next = seg;
1110   }
1111 #if TCP_OVERSIZE
1112   /* The new unsent tail has no space */
1113   pcb->unsent_oversize = 0;
1114 #endif /* TCP_OVERSIZE */
1115 
1116   /* SYN and FIN bump the sequence number */
1117   if ((flags & TCP_SYN) || (flags & TCP_FIN)) {
1118     pcb->snd_lbb++;
1119     /* optlen does not influence snd_buf */
1120   }
1121   if (flags & TCP_FIN) {
1122     tcp_set_flags(pcb, TF_FIN);
1123   }
1124 
1125   /* update number of segments on the queues */
1126   pcb->snd_queuelen += pbuf_clen(seg->p);
1127   LWIP_DEBUGF(TCP_QLEN_DEBUG, ("tcp_enqueue_flags: %"S16_F" (after enqueued)\n", pcb->snd_queuelen));
1128   if (pcb->snd_queuelen != 0) {
1129     LWIP_ASSERT("tcp_enqueue_flags: invalid queue length",
1130                 pcb->unacked != NULL || pcb->unsent != NULL);
1131   }
1132 
1133   return ERR_OK;
1134 }
1135 
1136 #if LWIP_TCP_TIMESTAMPS
1137 /* Build a timestamp option (12 bytes long) at the specified options pointer)
1138  *
1139  * @param pcb tcp_pcb
1140  * @param opts option pointer where to store the timestamp option
1141  */
1142 static void
tcp_build_timestamp_option(const struct tcp_pcb * pcb,u32_t * opts)1143 tcp_build_timestamp_option(const struct tcp_pcb *pcb, u32_t *opts)
1144 {
1145   LWIP_ASSERT("tcp_build_timestamp_option: invalid pcb", pcb != NULL);
1146 
1147   /* Pad with two NOP options to make everything nicely aligned */
1148   opts[0] = PP_HTONL(0x0101080A);
1149   opts[1] = lwip_htonl(sys_now());
1150   opts[2] = lwip_htonl(pcb->ts_recent);
1151 }
1152 #endif
1153 
1154 #if LWIP_TCP_SACK_OUT
1155 /**
1156  * Calculates the number of SACK entries that should be generated.
1157  * It takes into account whether TF_SACK flag is set,
1158  * the number of SACK entries in tcp_pcb that are valid,
1159  * as well as the available options size.
1160  *
1161  * @param pcb tcp_pcb
1162  * @param optlen the length of other TCP options (in bytes)
1163  * @return the number of SACK ranges that can be used
1164  */
1165 static u8_t
tcp_get_num_sacks(const struct tcp_pcb * pcb,u8_t optlen)1166 tcp_get_num_sacks(const struct tcp_pcb *pcb, u8_t optlen)
1167 {
1168   u8_t num_sacks = 0;
1169 
1170   LWIP_ASSERT("tcp_get_num_sacks: invalid pcb", pcb != NULL);
1171 
1172   if (pcb->flags & TF_SACK) {
1173     u8_t i;
1174 
1175     /* The first SACK takes up 12 bytes (it includes SACK header and two NOP options),
1176        each additional one - 8 bytes. */
1177     optlen += 12;
1178 
1179     /* Max options size = 40, number of SACK array entries = LWIP_TCP_MAX_SACK_NUM */
1180     for (i = 0; (i < LWIP_TCP_MAX_SACK_NUM) && (optlen <= TCP_MAX_OPTION_BYTES) &&
1181          LWIP_TCP_SACK_VALID(pcb, i); ++i) {
1182       ++num_sacks;
1183       optlen += 8;
1184     }
1185   }
1186 
1187   return num_sacks;
1188 }
1189 
1190 /** Build a SACK option (12 or more bytes long) at the specified options pointer)
1191  *
1192  * @param pcb tcp_pcb
1193  * @param opts option pointer where to store the SACK option
1194  * @param num_sacks the number of SACKs to store
1195  */
1196 static void
tcp_build_sack_option(const struct tcp_pcb * pcb,u32_t * opts,u8_t num_sacks)1197 tcp_build_sack_option(const struct tcp_pcb *pcb, u32_t *opts, u8_t num_sacks)
1198 {
1199   u8_t i;
1200 
1201   LWIP_ASSERT("tcp_build_sack_option: invalid pcb", pcb != NULL);
1202   LWIP_ASSERT("tcp_build_sack_option: invalid opts", opts != NULL);
1203 
1204   /* Pad with two NOP options to make everything nicely aligned.
1205      We add the length (of just the SACK option, not the NOPs in front of it),
1206      which is 2B of header, plus 8B for each SACK. */
1207   *(opts++) = PP_HTONL(0x01010500 + 2 + num_sacks * 8);
1208 
1209   for (i = 0; i < num_sacks; ++i) {
1210     *(opts++) = lwip_htonl(pcb->rcv_sacks[i].left);
1211     *(opts++) = lwip_htonl(pcb->rcv_sacks[i].right);
1212   }
1213 }
1214 
1215 #endif
1216 
1217 #if LWIP_WND_SCALE
1218 /** Build a window scale option (3 bytes long) at the specified options pointer)
1219  *
1220  * @param opts option pointer where to store the window scale option
1221  */
1222 static void
tcp_build_wnd_scale_option(u32_t * opts)1223 tcp_build_wnd_scale_option(u32_t *opts)
1224 {
1225   LWIP_ASSERT("tcp_build_wnd_scale_option: invalid opts", opts != NULL);
1226 
1227   /* Pad with one NOP option to make everything nicely aligned */
1228   opts[0] = PP_HTONL(0x01030300 | TCP_RCV_SCALE);
1229 }
1230 #endif
1231 
1232 /**
1233  * @ingroup tcp_raw
1234  * Find out what we can send and send it
1235  *
1236  * @param pcb Protocol control block for the TCP connection to send data
1237  * @return ERR_OK if data has been sent or nothing to send
1238  *         another err_t on error
1239  */
1240 err_t
tcp_output(struct tcp_pcb * pcb)1241 tcp_output(struct tcp_pcb *pcb)
1242 {
1243   struct tcp_seg *seg, *useg;
1244   u32_t wnd, snd_nxt;
1245   err_t err;
1246   struct netif *netif;
1247 #if TCP_CWND_DEBUG
1248   s16_t i = 0;
1249 #endif /* TCP_CWND_DEBUG */
1250 
1251   LWIP_ASSERT_CORE_LOCKED();
1252 
1253   LWIP_ASSERT("tcp_output: invalid pcb", pcb != NULL);
1254   /* pcb->state LISTEN not allowed here */
1255   LWIP_ASSERT("don't call tcp_output for listen-pcbs",
1256               pcb->state != LISTEN);
1257 
1258   /* First, check if we are invoked by the TCP input processing
1259      code. If so, we do not output anything. Instead, we rely on the
1260      input processing code to call us when input processing is done
1261      with. */
1262   if (tcp_input_pcb == pcb) {
1263     return ERR_OK;
1264   }
1265 
1266   wnd = LWIP_MIN(pcb->snd_wnd, pcb->cwnd);
1267 
1268   seg = pcb->unsent;
1269 
1270   if (seg == NULL) {
1271     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: nothing to send (%p)\n",
1272                                    (void *)pcb->unsent));
1273     LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F
1274                                  ", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1275                                  ", seg == NULL, ack %"U32_F"\n",
1276                                  pcb->snd_wnd, pcb->cwnd, wnd, pcb->lastack));
1277 
1278     /* If the TF_ACK_NOW flag is set and the ->unsent queue is empty, construct
1279      * an empty ACK segment and send it. */
1280     if (pcb->flags & TF_ACK_NOW) {
1281       return tcp_send_empty_ack(pcb);
1282     }
1283     /* nothing to send: shortcut out of here */
1284     goto output_done;
1285   } else {
1286     LWIP_DEBUGF(TCP_CWND_DEBUG,
1287                 ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F
1288                  ", effwnd %"U32_F", seq %"U32_F", ack %"U32_F"\n",
1289                  pcb->snd_wnd, pcb->cwnd, wnd,
1290                  lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len,
1291                  lwip_ntohl(seg->tcphdr->seqno), pcb->lastack));
1292   }
1293 
1294   netif = tcp_route(pcb, &pcb->local_ip, &pcb->remote_ip);
1295   if (netif == NULL) {
1296     return ERR_RTE;
1297   }
1298 
1299   /* If we don't have a local IP address, we get one from netif */
1300   if (ip_addr_isany(&pcb->local_ip)) {
1301     const ip_addr_t *local_ip = ip_netif_get_local_ip(netif, &pcb->remote_ip);
1302     if (local_ip == NULL) {
1303       return ERR_RTE;
1304     }
1305     ip_addr_copy(pcb->local_ip, *local_ip);
1306   }
1307 
1308   /* Handle the current segment not fitting within the window */
1309   if (lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len > wnd) {
1310     /* We need to start the persistent timer when the next unsent segment does not fit
1311      * within the remaining (could be 0) send window and RTO timer is not running (we
1312      * have no in-flight data). If window is still too small after persist timer fires,
1313      * then we split the segment. We don't consider the congestion window since a cwnd
1314      * smaller than 1 SMSS implies in-flight data
1315      */
1316     if (wnd == pcb->snd_wnd && pcb->unacked == NULL && pcb->persist_backoff == 0) {
1317       pcb->persist_cnt = 0;
1318       pcb->persist_backoff = 1;
1319       pcb->persist_probe = 0;
1320     }
1321     /* We need an ACK, but can't send data now, so send an empty ACK */
1322     if (pcb->flags & TF_ACK_NOW) {
1323       return tcp_send_empty_ack(pcb);
1324     }
1325     goto output_done;
1326   }
1327   /* Stop persist timer, above conditions are not active */
1328   pcb->persist_backoff = 0;
1329 
1330   /* useg should point to last segment on unacked queue */
1331   useg = pcb->unacked;
1332   if (useg != NULL) {
1333     for (; useg->next != NULL; useg = useg->next);
1334   }
1335   /* data available and window allows it to be sent? */
1336   while (seg != NULL &&
1337          lwip_ntohl(seg->tcphdr->seqno) - pcb->lastack + seg->len <= wnd) {
1338     LWIP_ASSERT("RST not expected here!",
1339                 (TCPH_FLAGS(seg->tcphdr) & TCP_RST) == 0);
1340     /* Stop sending if the nagle algorithm would prevent it
1341      * Don't stop:
1342      * - if tcp_write had a memory error before (prevent delayed ACK timeout) or
1343      * - if FIN was already enqueued for this PCB (SYN is always alone in a segment -
1344      *   either seg->next != NULL or pcb->unacked == NULL;
1345      *   RST is no sent using tcp_write/tcp_output.
1346      */
1347     if ((tcp_do_output_nagle(pcb) == 0) &&
1348         ((pcb->flags & (TF_NAGLEMEMERR | TF_FIN)) == 0)) {
1349       break;
1350     }
1351 #if TCP_CWND_DEBUG
1352     LWIP_DEBUGF(TCP_CWND_DEBUG, ("tcp_output: snd_wnd %"TCPWNDSIZE_F", cwnd %"TCPWNDSIZE_F", wnd %"U32_F", effwnd %"U32_F", seq %"U32_F", ack %"U32_F", i %"S16_F"\n",
1353                                  pcb->snd_wnd, pcb->cwnd, wnd,
1354                                  lwip_ntohl(seg->tcphdr->seqno) + seg->len -
1355                                  pcb->lastack,
1356                                  lwip_ntohl(seg->tcphdr->seqno), pcb->lastack, i));
1357     ++i;
1358 #endif /* TCP_CWND_DEBUG */
1359 
1360     if (pcb->state != SYN_SENT) {
1361       TCPH_SET_FLAG(seg->tcphdr, TCP_ACK);
1362     }
1363 
1364     err = tcp_output_segment(seg, pcb, netif);
1365     if (err != ERR_OK) {
1366       /* segment could not be sent, for whatever reason */
1367       tcp_set_flags(pcb, TF_NAGLEMEMERR);
1368       return err;
1369     }
1370 #if TCP_OVERSIZE_DBGCHECK
1371     seg->oversize_left = 0;
1372 #endif /* TCP_OVERSIZE_DBGCHECK */
1373     pcb->unsent = seg->next;
1374     if (pcb->state != SYN_SENT) {
1375       tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
1376     }
1377     snd_nxt = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
1378     if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
1379       pcb->snd_nxt = snd_nxt;
1380     }
1381     /* put segment on unacknowledged list if length > 0 */
1382     if (TCP_TCPLEN(seg) > 0) {
1383       seg->next = NULL;
1384       /* unacked list is empty? */
1385       if (pcb->unacked == NULL) {
1386         pcb->unacked = seg;
1387         useg = seg;
1388         /* unacked list is not empty? */
1389       } else {
1390         /* In the case of fast retransmit, the packet should not go to the tail
1391          * of the unacked queue, but rather somewhere before it. We need to check for
1392          * this case. -STJ Jul 27, 2004 */
1393         if (TCP_SEQ_LT(lwip_ntohl(seg->tcphdr->seqno), lwip_ntohl(useg->tcphdr->seqno))) {
1394           /* add segment to before tail of unacked list, keeping the list sorted */
1395           struct tcp_seg **cur_seg = &(pcb->unacked);
1396           while (*cur_seg &&
1397                  TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
1398             cur_seg = &((*cur_seg)->next );
1399           }
1400           seg->next = (*cur_seg);
1401           (*cur_seg) = seg;
1402         } else {
1403           /* add segment to tail of unacked list */
1404           useg->next = seg;
1405           useg = useg->next;
1406         }
1407       }
1408       /* do not queue empty segments on the unacked list */
1409     } else {
1410       tcp_seg_free(seg);
1411     }
1412     seg = pcb->unsent;
1413   }
1414 #if TCP_OVERSIZE
1415   if (pcb->unsent == NULL) {
1416     /* last unsent has been removed, reset unsent_oversize */
1417     pcb->unsent_oversize = 0;
1418   }
1419 #endif /* TCP_OVERSIZE */
1420 
1421 output_done:
1422   tcp_clear_flags(pcb, TF_NAGLEMEMERR);
1423   return ERR_OK;
1424 }
1425 
1426 /** Check if a segment's pbufs are used by someone else than TCP.
1427  * This can happen on retransmission if the pbuf of this segment is still
1428  * referenced by the netif driver due to deferred transmission.
1429  * This is the case (only!) if someone down the TX call path called
1430  * pbuf_ref() on one of the pbufs!
1431  *
1432  * @arg seg the tcp segment to check
1433  * @return 1 if ref != 1, 0 if ref == 1
1434  */
1435 static int
tcp_output_segment_busy(const struct tcp_seg * seg)1436 tcp_output_segment_busy(const struct tcp_seg *seg)
1437 {
1438   LWIP_ASSERT("tcp_output_segment_busy: invalid seg", seg != NULL);
1439 
1440   /* We only need to check the first pbuf here:
1441      If a pbuf is queued for transmission, a driver calls pbuf_ref(),
1442      which only changes the ref count of the first pbuf */
1443   if (seg->p->ref != 1) {
1444     /* other reference found */
1445     return 1;
1446   }
1447   /* no other references found */
1448   return 0;
1449 }
1450 
1451 /**
1452  * Called by tcp_output() to actually send a TCP segment over IP.
1453  *
1454  * @param seg the tcp_seg to send
1455  * @param pcb the tcp_pcb for the TCP connection used to send the segment
1456  * @param netif the netif used to send the segment
1457  */
1458 static err_t
tcp_output_segment(struct tcp_seg * seg,struct tcp_pcb * pcb,struct netif * netif)1459 tcp_output_segment(struct tcp_seg *seg, struct tcp_pcb *pcb, struct netif *netif)
1460 {
1461   err_t err;
1462   u16_t len;
1463   u32_t *opts;
1464 #if TCP_CHECKSUM_ON_COPY
1465   int seg_chksum_was_swapped = 0;
1466 #endif
1467 
1468   LWIP_ASSERT("tcp_output_segment: invalid seg", seg != NULL);
1469   LWIP_ASSERT("tcp_output_segment: invalid pcb", pcb != NULL);
1470   LWIP_ASSERT("tcp_output_segment: invalid netif", netif != NULL);
1471 
1472   if (tcp_output_segment_busy(seg)) {
1473     /* This should not happen: rexmit functions should have checked this.
1474        However, since this function modifies p->len, we must not continue in this case. */
1475     LWIP_DEBUGF(TCP_RTO_DEBUG | LWIP_DBG_LEVEL_SERIOUS, ("tcp_output_segment: segment busy\n"));
1476     return ERR_OK;
1477   }
1478 
1479   /* The TCP header has already been constructed, but the ackno and
1480    wnd fields remain. */
1481   seg->tcphdr->ackno = lwip_htonl(pcb->rcv_nxt);
1482 
1483   /* advertise our receive window size in this TCP segment */
1484 #if LWIP_WND_SCALE
1485   if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1486     /* The Window field in a SYN segment itself (the only type where we send
1487        the window scale option) is never scaled. */
1488     seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(pcb->rcv_ann_wnd));
1489   } else
1490 #endif /* LWIP_WND_SCALE */
1491   {
1492     seg->tcphdr->wnd = lwip_htons(TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
1493   }
1494 
1495   pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
1496 
1497   /* Add any requested options.  NB MSS option is only set on SYN
1498      packets, so ignore it here */
1499   /* cast through void* to get rid of alignment warnings */
1500   opts = (u32_t *)(void *)(seg->tcphdr + 1);
1501   if (seg->flags & TF_SEG_OPTS_MSS) {
1502     u16_t mss;
1503 #if TCP_CALCULATE_EFF_SEND_MSS
1504     mss = tcp_eff_send_mss_netif(TCP_MSS, netif, &pcb->remote_ip);
1505 #else /* TCP_CALCULATE_EFF_SEND_MSS */
1506     mss = TCP_MSS;
1507 #endif /* TCP_CALCULATE_EFF_SEND_MSS */
1508     *opts = TCP_BUILD_MSS_OPTION(mss);
1509     opts += 1;
1510   }
1511 #if LWIP_TCP_TIMESTAMPS
1512   pcb->ts_lastacksent = pcb->rcv_nxt;
1513 
1514   if (seg->flags & TF_SEG_OPTS_TS) {
1515     tcp_build_timestamp_option(pcb, opts);
1516     opts += 3;
1517   }
1518 #endif
1519 #if LWIP_WND_SCALE
1520   if (seg->flags & TF_SEG_OPTS_WND_SCALE) {
1521     tcp_build_wnd_scale_option(opts);
1522     opts += 1;
1523   }
1524 #endif
1525 #if LWIP_TCP_SACK_OUT
1526   if (seg->flags & TF_SEG_OPTS_SACK_PERM) {
1527     /* Pad with two NOP options to make everything nicely aligned
1528      * NOTE: When we send both timestamp and SACK_PERM options,
1529      * we could use the first two NOPs before the timestamp to store SACK_PERM option,
1530      * but that would complicate the code.
1531      */
1532     *(opts++) = PP_HTONL(0x01010402);
1533   }
1534 #endif
1535 
1536   /* Set retransmission timer running if it is not currently enabled
1537      This must be set before checking the route. */
1538   if (pcb->rtime < 0) {
1539     pcb->rtime = 0;
1540   }
1541 
1542   if (pcb->rttest == 0) {
1543     pcb->rttest = tcp_ticks;
1544     pcb->rtseq = lwip_ntohl(seg->tcphdr->seqno);
1545 
1546     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_output_segment: rtseq %"U32_F"\n", pcb->rtseq));
1547   }
1548   LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output_segment: %"U32_F":%"U32_F"\n",
1549                                  lwip_htonl(seg->tcphdr->seqno), lwip_htonl(seg->tcphdr->seqno) +
1550                                  seg->len));
1551 
1552   len = (u16_t)((u8_t *)seg->tcphdr - (u8_t *)seg->p->payload);
1553   if (len == 0) {
1554     /** Exclude retransmitted segments from this count. */
1555     MIB2_STATS_INC(mib2.tcpoutsegs);
1556   }
1557 
1558   seg->p->len -= len;
1559   seg->p->tot_len -= len;
1560 
1561   seg->p->payload = seg->tcphdr;
1562 
1563   seg->tcphdr->chksum = 0;
1564 
1565 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
1566   opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(seg->p, seg->tcphdr, pcb, opts);
1567 #endif
1568   LWIP_ASSERT("options not filled", (u8_t *)opts == ((u8_t *)(seg->tcphdr + 1)) + LWIP_TCP_OPT_LENGTH_SEGMENT(seg->flags, pcb));
1569 
1570 #if CHECKSUM_GEN_TCP
1571   IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
1572 #if TCP_CHECKSUM_ON_COPY
1573     u32_t acc;
1574 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1575     u16_t chksum_slow = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1576                                          seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1577 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1578     if ((seg->flags & TF_SEG_DATA_CHECKSUMMED) == 0) {
1579       LWIP_ASSERT("data included but not checksummed",
1580                   seg->p->tot_len == TCPH_HDRLEN_BYTES(seg->tcphdr));
1581     }
1582 
1583     /* rebuild TCP header checksum (TCP header changes for retransmissions!) */
1584     acc = ip_chksum_pseudo_partial(seg->p, IP_PROTO_TCP,
1585                                    seg->p->tot_len, TCPH_HDRLEN_BYTES(seg->tcphdr), &pcb->local_ip, &pcb->remote_ip);
1586     /* add payload checksum */
1587     if (seg->chksum_swapped) {
1588       seg_chksum_was_swapped = 1;
1589       seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1590       seg->chksum_swapped = 0;
1591     }
1592     acc = (u16_t)~acc + seg->chksum;
1593     seg->tcphdr->chksum = (u16_t)~FOLD_U32T(acc);
1594 #if TCP_CHECKSUM_ON_COPY_SANITY_CHECK
1595     if (chksum_slow != seg->tcphdr->chksum) {
1596       TCP_CHECKSUM_ON_COPY_SANITY_CHECK_FAIL(
1597         ("tcp_output_segment: calculated checksum is %"X16_F" instead of %"X16_F"\n",
1598          seg->tcphdr->chksum, chksum_slow));
1599       seg->tcphdr->chksum = chksum_slow;
1600     }
1601 #endif /* TCP_CHECKSUM_ON_COPY_SANITY_CHECK */
1602 #else /* TCP_CHECKSUM_ON_COPY */
1603     seg->tcphdr->chksum = ip_chksum_pseudo(seg->p, IP_PROTO_TCP,
1604                                            seg->p->tot_len, &pcb->local_ip, &pcb->remote_ip);
1605 #endif /* TCP_CHECKSUM_ON_COPY */
1606   }
1607 #endif /* CHECKSUM_GEN_TCP */
1608   TCP_STATS_INC(tcp.xmit);
1609 
1610   NETIF_SET_HINTS(netif, &(pcb->netif_hints));
1611   err = ip_output_if(seg->p, &pcb->local_ip, &pcb->remote_ip, pcb->ttl,
1612                      pcb->tos, IP_PROTO_TCP, netif);
1613   NETIF_RESET_HINTS(netif);
1614 
1615 #if TCP_CHECKSUM_ON_COPY
1616   if (seg_chksum_was_swapped) {
1617     /* if data is added to this segment later, chksum needs to be swapped,
1618        so restore this now */
1619     seg->chksum = SWAP_BYTES_IN_WORD(seg->chksum);
1620     seg->chksum_swapped = 1;
1621   }
1622 #endif
1623 
1624   return err;
1625 }
1626 
1627 /**
1628  * Requeue all unacked segments for retransmission
1629  *
1630  * Called by tcp_slowtmr() for slow retransmission.
1631  *
1632  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
1633  */
1634 err_t
tcp_rexmit_rto_prepare(struct tcp_pcb * pcb)1635 tcp_rexmit_rto_prepare(struct tcp_pcb *pcb)
1636 {
1637   struct tcp_seg *seg;
1638 
1639   LWIP_ASSERT("tcp_rexmit_rto_prepare: invalid pcb", pcb != NULL);
1640 
1641   if (pcb->unacked == NULL) {
1642     return ERR_VAL;
1643   }
1644 
1645   /* Move all unacked segments to the head of the unsent queue.
1646      However, give up if any of the unsent pbufs are still referenced by the
1647      netif driver due to deferred transmission. No point loading the link further
1648      if it is struggling to flush its buffered writes. */
1649   for (seg = pcb->unacked; seg->next != NULL; seg = seg->next) {
1650     if (tcp_output_segment_busy(seg)) {
1651       LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
1652       return ERR_VAL;
1653     }
1654   }
1655   if (tcp_output_segment_busy(seg)) {
1656     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit_rto: segment busy\n"));
1657     return ERR_VAL;
1658   }
1659   /* concatenate unsent queue after unacked queue */
1660   seg->next = pcb->unsent;
1661 #if TCP_OVERSIZE_DBGCHECK
1662   /* if last unsent changed, we need to update unsent_oversize */
1663   if (pcb->unsent == NULL) {
1664     pcb->unsent_oversize = seg->oversize_left;
1665   }
1666 #endif /* TCP_OVERSIZE_DBGCHECK */
1667   /* unsent queue is the concatenated queue (of unacked, unsent) */
1668   pcb->unsent = pcb->unacked;
1669   /* unacked queue is now empty */
1670   pcb->unacked = NULL;
1671 
1672   /* Mark RTO in-progress */
1673   tcp_set_flags(pcb, TF_RTO);
1674   /* Record the next byte following retransmit */
1675   pcb->rto_end = lwip_ntohl(seg->tcphdr->seqno) + TCP_TCPLEN(seg);
1676   /* Don't take any RTT measurements after retransmitting. */
1677   pcb->rttest = 0;
1678 
1679   return ERR_OK;
1680 }
1681 
1682 /**
1683  * Requeue all unacked segments for retransmission
1684  *
1685  * Called by tcp_slowtmr() for slow retransmission.
1686  *
1687  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
1688  */
1689 void
tcp_rexmit_rto_commit(struct tcp_pcb * pcb)1690 tcp_rexmit_rto_commit(struct tcp_pcb *pcb)
1691 {
1692   LWIP_ASSERT("tcp_rexmit_rto_commit: invalid pcb", pcb != NULL);
1693 
1694   /* increment number of retransmissions */
1695   if (pcb->nrtx < 0xFF) {
1696     ++pcb->nrtx;
1697   }
1698   /* Do the actual retransmission */
1699   tcp_output(pcb);
1700 }
1701 
1702 /**
1703  * Requeue all unacked segments for retransmission
1704  *
1705  * Called by tcp_process() only, tcp_slowtmr() needs to do some things between
1706  * "prepare" and "commit".
1707  *
1708  * @param pcb the tcp_pcb for which to re-enqueue all unacked segments
1709  */
1710 void
tcp_rexmit_rto(struct tcp_pcb * pcb)1711 tcp_rexmit_rto(struct tcp_pcb *pcb)
1712 {
1713   LWIP_ASSERT("tcp_rexmit_rto: invalid pcb", pcb != NULL);
1714 
1715   if (tcp_rexmit_rto_prepare(pcb) == ERR_OK) {
1716     tcp_rexmit_rto_commit(pcb);
1717   }
1718 }
1719 
1720 /**
1721  * Requeue the first unacked segment for retransmission
1722  *
1723  * Called by tcp_receive() for fast retransmit.
1724  *
1725  * @param pcb the tcp_pcb for which to retransmit the first unacked segment
1726  */
1727 err_t
tcp_rexmit(struct tcp_pcb * pcb)1728 tcp_rexmit(struct tcp_pcb *pcb)
1729 {
1730   struct tcp_seg *seg;
1731   struct tcp_seg **cur_seg;
1732 
1733   LWIP_ASSERT("tcp_rexmit: invalid pcb", pcb != NULL);
1734 
1735   if (pcb->unacked == NULL) {
1736     return ERR_VAL;
1737   }
1738 
1739   seg = pcb->unacked;
1740 
1741   /* Give up if the segment is still referenced by the netif driver
1742      due to deferred transmission. */
1743   if (tcp_output_segment_busy(seg)) {
1744     LWIP_DEBUGF(TCP_RTO_DEBUG, ("tcp_rexmit busy\n"));
1745     return ERR_VAL;
1746   }
1747 
1748   /* Move the first unacked segment to the unsent queue */
1749   /* Keep the unsent queue sorted. */
1750   pcb->unacked = seg->next;
1751 
1752   cur_seg = &(pcb->unsent);
1753   while (*cur_seg &&
1754          TCP_SEQ_LT(lwip_ntohl((*cur_seg)->tcphdr->seqno), lwip_ntohl(seg->tcphdr->seqno))) {
1755     cur_seg = &((*cur_seg)->next );
1756   }
1757   seg->next = *cur_seg;
1758   *cur_seg = seg;
1759 #if TCP_OVERSIZE
1760   if (seg->next == NULL) {
1761     /* the retransmitted segment is last in unsent, so reset unsent_oversize */
1762     pcb->unsent_oversize = 0;
1763   }
1764 #endif /* TCP_OVERSIZE */
1765 
1766   if (pcb->nrtx < 0xFF) {
1767     ++pcb->nrtx;
1768   }
1769 
1770   /* Don't take any rtt measurements after retransmitting. */
1771   pcb->rttest = 0;
1772 
1773   /* Do the actual retransmission. */
1774   MIB2_STATS_INC(mib2.tcpretranssegs);
1775   /* No need to call tcp_output: we are always called from tcp_input()
1776      and thus tcp_output directly returns. */
1777   return ERR_OK;
1778 }
1779 
1780 
1781 /**
1782  * Handle retransmission after three dupacks received
1783  *
1784  * @param pcb the tcp_pcb for which to retransmit the first unacked segment
1785  */
1786 void
tcp_rexmit_fast(struct tcp_pcb * pcb)1787 tcp_rexmit_fast(struct tcp_pcb *pcb)
1788 {
1789   LWIP_ASSERT("tcp_rexmit_fast: invalid pcb", pcb != NULL);
1790 
1791   if (pcb->unacked != NULL && !(pcb->flags & TF_INFR)) {
1792     /* This is fast retransmit. Retransmit the first unacked segment. */
1793     LWIP_DEBUGF(TCP_FR_DEBUG,
1794                 ("tcp_receive: dupacks %"U16_F" (%"U32_F
1795                  "), fast retransmit %"U32_F"\n",
1796                  (u16_t)pcb->dupacks, pcb->lastack,
1797                  lwip_ntohl(pcb->unacked->tcphdr->seqno)));
1798     if (tcp_rexmit(pcb) == ERR_OK) {
1799       /* Set ssthresh to half of the minimum of the current
1800        * cwnd and the advertised window */
1801       pcb->ssthresh = LWIP_MIN(pcb->cwnd, pcb->snd_wnd) / 2;
1802 
1803       /* The minimum value for ssthresh should be 2 MSS */
1804       if (pcb->ssthresh < (2U * pcb->mss)) {
1805         LWIP_DEBUGF(TCP_FR_DEBUG,
1806                     ("tcp_receive: The minimum value for ssthresh %"TCPWNDSIZE_F
1807                      " should be min 2 mss %"U16_F"...\n",
1808                      pcb->ssthresh, (u16_t)(2 * pcb->mss)));
1809         pcb->ssthresh = 2 * pcb->mss;
1810       }
1811 
1812       pcb->cwnd = pcb->ssthresh + 3 * pcb->mss;
1813       tcp_set_flags(pcb, TF_INFR);
1814 
1815       /* Reset the retransmission timer to prevent immediate rto retransmissions */
1816       pcb->rtime = 0;
1817     }
1818   }
1819 }
1820 
1821 static struct pbuf *
tcp_output_alloc_header_common(u32_t ackno,u16_t optlen,u16_t datalen,u32_t seqno_be,u16_t src_port,u16_t dst_port,u8_t flags,u16_t wnd)1822 tcp_output_alloc_header_common(u32_t ackno, u16_t optlen, u16_t datalen,
1823                         u32_t seqno_be /* already in network byte order */,
1824                         u16_t src_port, u16_t dst_port, u8_t flags, u16_t wnd)
1825 {
1826   struct tcp_hdr *tcphdr;
1827   struct pbuf *p;
1828 
1829   p = pbuf_alloc(PBUF_IP, TCP_HLEN + optlen + datalen, PBUF_RAM);
1830   if (p != NULL) {
1831     LWIP_ASSERT("check that first pbuf can hold struct tcp_hdr",
1832                 (p->len >= TCP_HLEN + optlen));
1833     tcphdr = (struct tcp_hdr *)p->payload;
1834     tcphdr->src = lwip_htons(src_port);
1835     tcphdr->dest = lwip_htons(dst_port);
1836     tcphdr->seqno = seqno_be;
1837     tcphdr->ackno = lwip_htonl(ackno);
1838     TCPH_HDRLEN_FLAGS_SET(tcphdr, (5 + optlen / 4), flags);
1839     tcphdr->wnd = lwip_htons(wnd);
1840     tcphdr->chksum = 0;
1841     tcphdr->urgp = 0;
1842   }
1843   return p;
1844 }
1845 
1846 /** Allocate a pbuf and create a tcphdr at p->payload, used for output
1847  * functions other than the default tcp_output -> tcp_output_segment
1848  * (e.g. tcp_send_empty_ack, etc.)
1849  *
1850  * @param pcb tcp pcb for which to send a packet (used to initialize tcp_hdr)
1851  * @param optlen length of header-options
1852  * @param datalen length of tcp data to reserve in pbuf
1853  * @param seqno_be seqno in network byte order (big-endian)
1854  * @return pbuf with p->payload being the tcp_hdr
1855  */
1856 static struct pbuf *
tcp_output_alloc_header(struct tcp_pcb * pcb,u16_t optlen,u16_t datalen,u32_t seqno_be)1857 tcp_output_alloc_header(struct tcp_pcb *pcb, u16_t optlen, u16_t datalen,
1858                         u32_t seqno_be /* already in network byte order */)
1859 {
1860   struct pbuf *p;
1861 
1862   LWIP_ASSERT("tcp_output_alloc_header: invalid pcb", pcb != NULL);
1863 
1864   p = tcp_output_alloc_header_common(pcb->rcv_nxt, optlen, datalen,
1865     seqno_be, pcb->local_port, pcb->remote_port, TCP_ACK,
1866     TCPWND_MIN16(RCV_WND_SCALE(pcb, pcb->rcv_ann_wnd)));
1867   if (p != NULL) {
1868     /* If we're sending a packet, update the announced right window edge */
1869     pcb->rcv_ann_right_edge = pcb->rcv_nxt + pcb->rcv_ann_wnd;
1870   }
1871   return p;
1872 }
1873 
1874 /* Fill in options for control segments */
1875 static void
tcp_output_fill_options(const struct tcp_pcb * pcb,struct pbuf * p,u8_t optflags,u8_t num_sacks)1876 tcp_output_fill_options(const struct tcp_pcb *pcb, struct pbuf *p, u8_t optflags, u8_t num_sacks)
1877 {
1878   struct tcp_hdr *tcphdr;
1879   u32_t *opts;
1880   u16_t sacks_len = 0;
1881 
1882   LWIP_ASSERT("tcp_output_fill_options: invalid pbuf", p != NULL);
1883 
1884   tcphdr = (struct tcp_hdr *)p->payload;
1885   opts = (u32_t *)(void *)(tcphdr + 1);
1886 
1887   /* NB. MSS and window scale options are only sent on SYNs, so ignore them here */
1888 
1889 #if LWIP_TCP_TIMESTAMPS
1890   if (optflags & TF_SEG_OPTS_TS) {
1891     tcp_build_timestamp_option(pcb, opts);
1892     opts += 3;
1893   }
1894 #endif
1895 
1896 #if LWIP_TCP_SACK_OUT
1897   if (pcb && (num_sacks > 0)) {
1898     tcp_build_sack_option(pcb, opts, num_sacks);
1899     /* 1 word for SACKs header (including 2xNOP), and 2 words for each SACK */
1900     sacks_len = 1 + num_sacks * 2;
1901     opts += sacks_len;
1902   }
1903 #else
1904   LWIP_UNUSED_ARG(num_sacks);
1905 #endif
1906 
1907 #ifdef LWIP_HOOK_TCP_OUT_ADD_TCPOPTS
1908   opts = LWIP_HOOK_TCP_OUT_ADD_TCPOPTS(p, tcphdr, pcb, opts);
1909 #endif
1910 
1911   LWIP_UNUSED_ARG(pcb);
1912   LWIP_UNUSED_ARG(sacks_len);
1913   LWIP_ASSERT("options not filled", (u8_t *)opts == ((u8_t *)(tcphdr + 1)) + sacks_len * 4 + LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb));
1914   LWIP_UNUSED_ARG(optflags); /* for LWIP_NOASSERT */
1915   LWIP_UNUSED_ARG(opts); /* for LWIP_NOASSERT */
1916 }
1917 
1918 /** Output a control segment pbuf to IP.
1919  *
1920  * Called from tcp_rst, tcp_send_empty_ack, tcp_keepalive and tcp_zero_window_probe,
1921  * this function combines selecting a netif for transmission, generating the tcp
1922  * header checksum and calling ip_output_if while handling netif hints and stats.
1923  */
1924 static err_t
tcp_output_control_segment(const struct tcp_pcb * pcb,struct pbuf * p,const ip_addr_t * src,const ip_addr_t * dst)1925 tcp_output_control_segment(const struct tcp_pcb *pcb, struct pbuf *p,
1926                            const ip_addr_t *src, const ip_addr_t *dst)
1927 {
1928   struct netif *netif;
1929 
1930   LWIP_ASSERT("tcp_output_control_segment: invalid pbuf", p != NULL);
1931 
1932   netif = tcp_route(pcb, src, dst);
1933   if (netif == NULL) {
1934     pbuf_free(p);
1935     return ERR_RTE;
1936   }
1937   return tcp_output_control_segment_netif(pcb, p, src, dst, netif);
1938 }
1939 
1940 /** Output a control segment pbuf to IP.
1941  *
1942  * Called instead of tcp_output_control_segment when we don't have a pcb but we
1943  * do know the interface to send to.
1944  */
1945 static err_t
tcp_output_control_segment_netif(const struct tcp_pcb * pcb,struct pbuf * p,const ip_addr_t * src,const ip_addr_t * dst,struct netif * netif)1946 tcp_output_control_segment_netif(const struct tcp_pcb *pcb, struct pbuf *p,
1947                                  const ip_addr_t *src, const ip_addr_t *dst,
1948                                  struct netif *netif)
1949 {
1950   err_t err;
1951   u8_t ttl, tos;
1952 
1953   LWIP_ASSERT("tcp_output_control_segment_netif: no netif given", netif != NULL);
1954 
1955 #if CHECKSUM_GEN_TCP
1956   IF__NETIF_CHECKSUM_ENABLED(netif, NETIF_CHECKSUM_GEN_TCP) {
1957     struct tcp_hdr *tcphdr = (struct tcp_hdr *)p->payload;
1958     tcphdr->chksum = ip_chksum_pseudo(p, IP_PROTO_TCP, p->tot_len,
1959                                       src, dst);
1960   }
1961 #endif
1962   if (pcb != NULL) {
1963     NETIF_SET_HINTS(netif, LWIP_CONST_CAST(struct netif_hint*, &(pcb->netif_hints)));
1964     ttl = pcb->ttl;
1965     tos = pcb->tos;
1966   } else {
1967     /* Send output with hardcoded TTL/HL since we have no access to the pcb */
1968     ttl = TCP_TTL;
1969     tos = 0;
1970   }
1971   TCP_STATS_INC(tcp.xmit);
1972   err = ip_output_if(p, src, dst, ttl, tos, IP_PROTO_TCP, netif);
1973   NETIF_RESET_HINTS(netif);
1974 
1975   pbuf_free(p);
1976   return err;
1977 }
1978 
1979 static struct pbuf *
tcp_rst_common(const struct tcp_pcb * pcb,u32_t seqno,u32_t ackno,const ip_addr_t * local_ip,const ip_addr_t * remote_ip,u16_t local_port,u16_t remote_port)1980 tcp_rst_common(const struct tcp_pcb *pcb, u32_t seqno, u32_t ackno,
1981                const ip_addr_t *local_ip, const ip_addr_t *remote_ip,
1982                u16_t local_port, u16_t remote_port)
1983 {
1984   struct pbuf *p;
1985   u16_t wnd;
1986   u8_t optlen;
1987 
1988   LWIP_ASSERT("tcp_rst: invalid local_ip", local_ip != NULL);
1989   LWIP_ASSERT("tcp_rst: invalid remote_ip", remote_ip != NULL);
1990 
1991   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
1992 
1993 #if LWIP_WND_SCALE
1994   wnd = PP_HTONS(((TCP_WND >> TCP_RCV_SCALE) & 0xFFFF));
1995 #else
1996   wnd = PP_HTONS(TCP_WND);
1997 #endif
1998 
1999   p = tcp_output_alloc_header_common(ackno, optlen, 0, lwip_htonl(seqno), local_port,
2000     remote_port, TCP_RST | TCP_ACK, wnd);
2001   if (p == NULL) {
2002     LWIP_DEBUGF(TCP_DEBUG, ("tcp_rst: could not allocate memory for pbuf\n"));
2003     return NULL;
2004   }
2005   tcp_output_fill_options(pcb, p, 0, 0);
2006 
2007   MIB2_STATS_INC(mib2.tcpoutrsts);
2008 
2009   LWIP_DEBUGF(TCP_RST_DEBUG, ("tcp_rst: seqno %"U32_F" ackno %"U32_F".\n", seqno, ackno));
2010   return p;
2011 }
2012 
2013 /**
2014  * Send a TCP RESET packet (empty segment with RST flag set) to abort a
2015  * connection.
2016  *
2017  * Called by tcp_abort() (to abort a local connection), tcp_closen() (if not
2018  * all data has been received by the application), tcp_timewait_input() (if a
2019  * SYN is received) and tcp_process() (received segment in the wrong state).
2020  *
2021  * Since a RST segment is in most cases not sent for an active connection,
2022  * tcp_rst() has a number of arguments that are taken from a tcp_pcb for
2023  * most other segment output functions.
2024  *
2025  * @param pcb TCP pcb (may be NULL if no pcb is available)
2026  * @param seqno the sequence number to use for the outgoing segment
2027  * @param ackno the acknowledge number to use for the outgoing segment
2028  * @param local_ip the local IP address to send the segment from
2029  * @param remote_ip the remote IP address to send the segment to
2030  * @param local_port the local TCP port to send the segment from
2031  * @param remote_port the remote TCP port to send the segment to
2032  */
2033 void
tcp_rst(const struct tcp_pcb * pcb,u32_t seqno,u32_t ackno,const ip_addr_t * local_ip,const ip_addr_t * remote_ip,u16_t local_port,u16_t remote_port)2034 tcp_rst(const struct tcp_pcb *pcb, u32_t seqno, u32_t ackno,
2035         const ip_addr_t *local_ip, const ip_addr_t *remote_ip,
2036         u16_t local_port, u16_t remote_port)
2037 {
2038   struct pbuf *p;
2039 
2040   p = tcp_rst_common(pcb, seqno, ackno, local_ip, remote_ip, local_port, remote_port);
2041   if (p != NULL) {
2042     tcp_output_control_segment(pcb, p, local_ip, remote_ip);
2043   }
2044 }
2045 
2046 /**
2047  * Send a TCP RESET packet (empty segment with RST flag set) to show that there
2048  * is no matching local connection for a received segment.
2049  *
2050  * Called by tcp_input() (if no matching local pcb was found) and
2051  * tcp_listen_input() (if incoming segment has ACK flag set).
2052  *
2053  * Since a RST segment is in most cases not sent for an active connection,
2054  * tcp_rst() has a number of arguments that are taken from a tcp_pcb for
2055  * most other segment output functions.
2056  *
2057  * @param netif the netif on which to send the RST (since we have no pcb)
2058  * @param seqno the sequence number to use for the outgoing segment
2059  * @param ackno the acknowledge number to use for the outgoing segment
2060  * @param local_ip the local IP address to send the segment from
2061  * @param remote_ip the remote IP address to send the segment to
2062  * @param local_port the local TCP port to send the segment from
2063  * @param remote_port the remote TCP port to send the segment to
2064  */
2065 void
tcp_rst_netif(struct netif * netif,u32_t seqno,u32_t ackno,const ip_addr_t * local_ip,const ip_addr_t * remote_ip,u16_t local_port,u16_t remote_port)2066 tcp_rst_netif(struct netif *netif, u32_t seqno, u32_t ackno,
2067               const ip_addr_t *local_ip, const ip_addr_t *remote_ip,
2068               u16_t local_port, u16_t remote_port)
2069 {
2070   if (netif) {
2071     struct pbuf *p = tcp_rst_common(NULL, seqno, ackno, local_ip, remote_ip, local_port, remote_port);
2072     if (p != NULL) {
2073       tcp_output_control_segment_netif(NULL, p, local_ip, remote_ip, netif);
2074     }
2075   } else {
2076     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_rst_netif: no netif given\n"));
2077   }
2078 }
2079 
2080 /**
2081  * Send an ACK without data.
2082  *
2083  * @param pcb Protocol control block for the TCP connection to send the ACK
2084  */
2085 err_t
tcp_send_empty_ack(struct tcp_pcb * pcb)2086 tcp_send_empty_ack(struct tcp_pcb *pcb)
2087 {
2088   err_t err;
2089   struct pbuf *p;
2090   u8_t optlen, optflags = 0;
2091   u8_t num_sacks = 0;
2092 
2093   LWIP_ASSERT("tcp_send_empty_ack: invalid pcb", pcb != NULL);
2094 
2095 #if LWIP_TCP_TIMESTAMPS
2096   if (pcb->flags & TF_TIMESTAMP) {
2097     optflags = TF_SEG_OPTS_TS;
2098   }
2099 #endif
2100   optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(optflags, pcb);
2101 
2102 #if LWIP_TCP_SACK_OUT
2103   /* For now, SACKs are only sent with empty ACKs */
2104   if ((num_sacks = tcp_get_num_sacks(pcb, optlen)) > 0) {
2105     optlen += 4 + num_sacks * 8; /* 4 bytes for header (including 2*NOP), plus 8B for each SACK */
2106   }
2107 #endif
2108 
2109   p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt));
2110   if (p == NULL) {
2111     /* let tcp_fasttmr retry sending this ACK */
2112     tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2113     LWIP_DEBUGF(TCP_OUTPUT_DEBUG, ("tcp_output: (ACK) could not allocate pbuf\n"));
2114     return ERR_BUF;
2115   }
2116   tcp_output_fill_options(pcb, p, optflags, num_sacks);
2117 
2118 #if LWIP_TCP_TIMESTAMPS
2119   pcb->ts_lastacksent = pcb->rcv_nxt;
2120 #endif
2121 
2122   LWIP_DEBUGF(TCP_OUTPUT_DEBUG,
2123               ("tcp_output: sending ACK for %"U32_F"\n", pcb->rcv_nxt));
2124   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2125   if (err != ERR_OK) {
2126     /* let tcp_fasttmr retry sending this ACK */
2127     tcp_set_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2128   } else {
2129     /* remove ACK flags from the PCB, as we sent an empty ACK now */
2130     tcp_clear_flags(pcb, TF_ACK_DELAY | TF_ACK_NOW);
2131   }
2132 
2133   return err;
2134 }
2135 
2136 /**
2137  * Send keepalive packets to keep a connection active although
2138  * no data is sent over it.
2139  *
2140  * Called by tcp_slowtmr()
2141  *
2142  * @param pcb the tcp_pcb for which to send a keepalive packet
2143  */
2144 err_t
tcp_keepalive(struct tcp_pcb * pcb)2145 tcp_keepalive(struct tcp_pcb *pcb)
2146 {
2147   err_t err;
2148   struct pbuf *p;
2149   u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2150 
2151   LWIP_ASSERT("tcp_keepalive: invalid pcb", pcb != NULL);
2152 
2153   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: sending KEEPALIVE probe to "));
2154   ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2155   LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2156 
2157   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: tcp_ticks %"U32_F"   pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2158                           tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2159 
2160   p = tcp_output_alloc_header(pcb, optlen, 0, lwip_htonl(pcb->snd_nxt - 1));
2161   if (p == NULL) {
2162     LWIP_DEBUGF(TCP_DEBUG,
2163                 ("tcp_keepalive: could not allocate memory for pbuf\n"));
2164     return ERR_MEM;
2165   }
2166   tcp_output_fill_options(pcb, p, 0, 0);
2167   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2168 
2169   LWIP_DEBUGF(TCP_DEBUG, ("tcp_keepalive: seqno %"U32_F" ackno %"U32_F" err %d.\n",
2170                           pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2171   return err;
2172 }
2173 
2174 /**
2175  * Send persist timer zero-window probes to keep a connection active
2176  * when a window update is lost.
2177  *
2178  * Called by tcp_slowtmr()
2179  *
2180  * @param pcb the tcp_pcb for which to send a zero-window probe packet
2181  */
2182 err_t
tcp_zero_window_probe(struct tcp_pcb * pcb)2183 tcp_zero_window_probe(struct tcp_pcb *pcb)
2184 {
2185   err_t err;
2186   struct pbuf *p;
2187   struct tcp_hdr *tcphdr;
2188   struct tcp_seg *seg;
2189   u16_t len;
2190   u8_t is_fin;
2191   u32_t snd_nxt;
2192   u8_t optlen = LWIP_TCP_OPT_LENGTH_SEGMENT(0, pcb);
2193 
2194   LWIP_ASSERT("tcp_zero_window_probe: invalid pcb", pcb != NULL);
2195 
2196   LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: sending ZERO WINDOW probe to "));
2197   ip_addr_debug_print_val(TCP_DEBUG, pcb->remote_ip);
2198   LWIP_DEBUGF(TCP_DEBUG, ("\n"));
2199 
2200   LWIP_DEBUGF(TCP_DEBUG,
2201               ("tcp_zero_window_probe: tcp_ticks %"U32_F
2202                "   pcb->tmr %"U32_F" pcb->keep_cnt_sent %"U16_F"\n",
2203                tcp_ticks, pcb->tmr, (u16_t)pcb->keep_cnt_sent));
2204 
2205   /* Only consider unsent, persist timer should be off when there is data in-flight */
2206   seg = pcb->unsent;
2207   if (seg == NULL) {
2208     /* Not expected, persist timer should be off when the send buffer is empty */
2209     return ERR_OK;
2210   }
2211 
2212   /* increment probe count. NOTE: we record probe even if it fails
2213      to actually transmit due to an error. This ensures memory exhaustion/
2214      routing problem doesn't leave a zero-window pcb as an indefinite zombie.
2215      RTO mechanism has similar behavior, see pcb->nrtx */
2216   if (pcb->persist_probe < 0xFF) {
2217     ++pcb->persist_probe;
2218   }
2219 
2220   is_fin = ((TCPH_FLAGS(seg->tcphdr) & TCP_FIN) != 0) && (seg->len == 0);
2221   /* we want to send one seqno: either FIN or data (no options) */
2222   len = is_fin ? 0 : 1;
2223 
2224   p = tcp_output_alloc_header(pcb, optlen, len, seg->tcphdr->seqno);
2225   if (p == NULL) {
2226     LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: no memory for pbuf\n"));
2227     return ERR_MEM;
2228   }
2229   tcphdr = (struct tcp_hdr *)p->payload;
2230 
2231   if (is_fin) {
2232     /* FIN segment, no data */
2233     TCPH_FLAGS_SET(tcphdr, TCP_ACK | TCP_FIN);
2234   } else {
2235     /* Data segment, copy in one byte from the head of the unacked queue */
2236     char *d = ((char *)p->payload + TCP_HLEN);
2237     /* Depending on whether the segment has already been sent (unacked) or not
2238        (unsent), seg->p->payload points to the IP header or TCP header.
2239        Ensure we copy the first TCP data byte: */
2240     pbuf_copy_partial(seg->p, d, 1, seg->p->tot_len - seg->len);
2241   }
2242 
2243   /* The byte may be acknowledged without the window being opened. */
2244   snd_nxt = lwip_ntohl(seg->tcphdr->seqno) + 1;
2245   if (TCP_SEQ_LT(pcb->snd_nxt, snd_nxt)) {
2246     pcb->snd_nxt = snd_nxt;
2247   }
2248   tcp_output_fill_options(pcb, p, 0, 0);
2249 
2250   err = tcp_output_control_segment(pcb, p, &pcb->local_ip, &pcb->remote_ip);
2251 
2252   LWIP_DEBUGF(TCP_DEBUG, ("tcp_zero_window_probe: seqno %"U32_F
2253                           " ackno %"U32_F" err %d.\n",
2254                           pcb->snd_nxt - 1, pcb->rcv_nxt, (int)err));
2255   return err;
2256 }
2257 #endif /* LWIP_TCP */
2258