1 /*
2  * This file is part of the Nice GLib ICE library.
3  *
4  * (C) 2010, 2014 Collabora Ltd.
5  *  Contact: Philip Withnall
6 
7  *
8  * The contents of this file are subject to the Mozilla Public License Version
9  * 1.1 (the "License"); you may not use this file except in compliance with
10  * the License. You may obtain a copy of the License at
11  * http://www.mozilla.org/MPL/
12  *
13  * Software distributed under the License is distributed on an "AS IS" basis,
14  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
15  * for the specific language governing rights and limitations under the
16  * License.
17  *
18  * The Original Code is the Nice GLib ICE library.
19  *
20  * The Initial Developers of the Original Code are Collabora Ltd and Nokia
21  * Corporation. All Rights Reserved.
22  *
23  * Contributors:
24  *   Youness Alaoui, Collabora Ltd.
25  *   Philip Withnall, Collabora Ltd.
26  *
27  * Alternatively, the contents of this file may be used under the terms of the
28  * the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
29  * case the provisions of LGPL are applicable instead of those above. If you
30  * wish to allow use of your version of this file only under the terms of the
31  * LGPL and not to allow others to use your version of this file under the
32  * MPL, indicate your decision by deleting the provisions above and replace
33  * them with the notice and other provisions required by the LGPL. If you do
34  * not delete the provisions above, a recipient may use your version of this
35  * file under either the MPL or the LGPL.
36  */
37 
38 /* Reproducing license from libjingle for copied code */
39 
40 /*
41  * libjingle
42  * Copyright 2004--2005, Google Inc.
43  *
44  * Redistribution and use in source and binary forms, with or without
45  * modification, are permitted provided that the following conditions are met:
46  *
47  *  1. Redistributions of source code must retain the above copyright notice,
48  *     this list of conditions and the following disclaimer.
49  *  2. Redistributions in binary form must reproduce the above copyright notice,
50  *     this list of conditions and the following disclaimer in the documentation
51  *     and/or other materials provided with the distribution.
52  *  3. The name of the author may not be used to endorse or promote products
53  *     derived from this software without specific prior written permission.
54  *
55  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
56  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
57  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
58  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
61  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
62  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
63  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
64  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65  */
66 
67 #include <stdlib.h>
68 #include <errno.h>
69 #include <string.h>
70 
71 #include <glib.h>
72 
73 #ifndef G_OS_WIN32
74 #  include <arpa/inet.h>
75 #endif
76 
77 #include "pseudotcp.h"
78 #include "agent-priv.h"
79 
80 struct _PseudoTcpSocketClass {
81     GObjectClass parent_class;
82 };
83 
84 typedef struct _PseudoTcpSocketPrivate PseudoTcpSocketPrivate;
85 
86 
87 struct _PseudoTcpSocket {
88     GObject parent;
89     PseudoTcpSocketPrivate *priv;
90 };
91 
92 G_DEFINE_TYPE (PseudoTcpSocket, pseudo_tcp_socket, G_TYPE_OBJECT);
93 
94 //////////////////////////////////////////////////////////////////////
95 // Network Constants
96 //////////////////////////////////////////////////////////////////////
97 
98 // Standard MTUs
99 const guint16 PACKET_MAXIMUMS[] = {
100   65535,    // Theoretical maximum, Hyperchannel
101   32000,    // Nothing
102   17914,    // 16Mb IBM Token Ring
103   8166,   // IEEE 802.4
104   //4464,   // IEEE 802.5 (4Mb max)
105   4352,   // FDDI
106   //2048,   // Wideband Network
107   2002,   // IEEE 802.5 (4Mb recommended)
108   //1536,   // Expermental Ethernet Networks
109   //1500,   // Ethernet, Point-to-Point (default)
110   1492,   // IEEE 802.3
111   1006,   // SLIP, ARPANET
112   //576,    // X.25 Networks
113   //544,    // DEC IP Portal
114   //512,    // NETBIOS
115   508,    // IEEE 802/Source-Rt Bridge, ARCNET
116   296,    // Point-to-Point (low delay)
117   //68,     // Official minimum
118   0,      // End of list marker
119 };
120 
121 // FIXME: This is a reasonable MTU, but we should get it from the lower layer
122 #define DEF_MTU 1400
123 #define MAX_PACKET 65532
124 // Note: we removed lowest level because packet overhead was larger!
125 #define MIN_PACKET 296
126 
127 // (+ up to 40 bytes of options?)
128 #define IP_HEADER_SIZE 20
129 #define ICMP_HEADER_SIZE 8
130 #define UDP_HEADER_SIZE 8
131 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
132 // when relay framing is in use
133 #define JINGLE_HEADER_SIZE 64
134 
135 //////////////////////////////////////////////////////////////////////
136 // Global Constants and Functions
137 //////////////////////////////////////////////////////////////////////
138 //
139 //    0                   1                   2                   3
140 //    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
141 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
142 //  0 |                      Conversation Number                      |
143 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
144 //  4 |                        Sequence Number                        |
145 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
146 //  8 |                     Acknowledgment Number                     |
147 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
148 //    |               |   |U|A|P|R|S|F|                               |
149 // 12 |    Control    |   |R|C|S|S|Y|I|            Window             |
150 //    |               |   |G|K|H|T|N|N|                               |
151 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
152 // 16 |                       Timestamp sending                       |
153 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
154 // 20 |                      Timestamp receiving                      |
155 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
156 // 24 |                             data                              |
157 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
158 //
159 //////////////////////////////////////////////////////////////////////
160 
161 #define MAX_SEQ 0xFFFFFFFF
162 #define HEADER_SIZE 24
163 
164 #define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
165       IP_HEADER_SIZE + JINGLE_HEADER_SIZE)
166 
167 // MIN_RTO = 1 second (RFC6298, Sec 2.4)
168 #define MIN_RTO     1000
169 #define DEF_RTO     1000 /* 1 seconds (RFC 6298 sect 2.1) */
170 #define MAX_RTO    60000 /* 60 seconds */
171 #define DEFAULT_ACK_DELAY    100 /* 100 milliseconds */
172 #define DEFAULT_NO_DELAY     FALSE
173 
174 #define DEFAULT_RCV_BUF_SIZE (60 * 1024)
175 #define DEFAULT_SND_BUF_SIZE (90 * 1024)
176 
177 /* NOTE: This must fit in 8 bits. This is used on the wire. */
178 typedef enum {
179   /* Google-provided options: */
180   TCP_OPT_EOL = 0,  /* end of list */
181   TCP_OPT_NOOP = 1,  /* no-op */
182   TCP_OPT_MSS = 2,  /* maximum segment size */
183   TCP_OPT_WND_SCALE = 3,  /* window scale factor */
184   /* libnice extensions: */
185   TCP_OPT_FIN_ACK = 254,  /* FIN-ACK support */
186 } TcpOption;
187 
188 
189 /*
190 #define FLAG_SYN 0x02
191 #define FLAG_ACK 0x10
192 */
193 
194 /* NOTE: This must fit in 5 bits. This is used on the wire. */
195 typedef enum {
196   FLAG_NONE = 0,
197   FLAG_FIN = 1 << 0,
198   FLAG_CTL = 1 << 1,
199   FLAG_RST = 1 << 2,
200 } TcpFlags;
201 
202 #define CTL_CONNECT  0
203 //#define CTL_REDIRECT  1
204 #define CTL_EXTRA 255
205 
206 
207 #define CTRL_BOUND 0x80000000
208 
209 /* Maximum segment lifetime (1 minute).
210  * RFC 793, §3.3 specifies 2 minutes; but Linux uses 1 minute, so let’s go with
211  * that. */
212 #define TCP_MSL (60 * 1000)
213 
214 // If there are no pending clocks, wake up every 4 seconds
215 #define DEFAULT_TIMEOUT 4000
216 // If the connection is closed, once per minute
217 #define CLOSED_TIMEOUT (60 * 1000)
218 /* Timeout after reaching the TIME_WAIT state, in milliseconds.
219  * See: RFC 1122, §4.2.2.13.
220  *
221  * XXX: Since we can control the underlying layer’s channel ID, we can guarantee
222  * delayed segments won’t affect subsequent connections, so can radically
223  * shorten the TIME-WAIT timeout (to the extent that it basically doesn’t
224  * exist). It would normally be (2 * TCP_MSL). */
225 #define TIME_WAIT_TIMEOUT 1
226 
227 //////////////////////////////////////////////////////////////////////
228 // Helper Functions
229 //////////////////////////////////////////////////////////////////////
230 #ifndef G_OS_WIN32
231 #  define min(first, second) ((first) < (second) ? (first) : (second))
232 #  define max(first, second) ((first) > (second) ? (first) : (second))
233 #endif
234 
235 static guint32
bound(guint32 lower,guint32 middle,guint32 upper)236 bound(guint32 lower, guint32 middle, guint32 upper)
237 {
238    return min (max (lower, middle), upper);
239 }
240 
241 static gboolean
time_is_between(guint32 later,guint32 middle,guint32 earlier)242 time_is_between(guint32 later, guint32 middle, guint32 earlier)
243 {
244   if (earlier <= later) {
245     return ((earlier <= middle) && (middle <= later));
246   } else {
247     return !((later < middle) && (middle < earlier));
248   }
249 }
250 
251 static gint32
time_diff(guint32 later,guint32 earlier)252 time_diff(guint32 later, guint32 earlier)
253 {
254   guint32 LAST = 0xFFFFFFFF;
255   guint32 HALF = 0x80000000;
256   if (time_is_between(earlier + HALF, later, earlier)) {
257     if (earlier <= later) {
258       return (long)(later - earlier);
259     } else {
260       return (long)(later + (LAST - earlier) + 1);
261     }
262   } else {
263     if (later <= earlier) {
264       return -(long) (earlier - later);
265     } else {
266       return -(long)(earlier + (LAST - later) + 1);
267     }
268   }
269 }
270 
271 ////////////////////////////////////////////////////////
272 // PseudoTcpFifo works exactly like FifoBuffer in libjingle
273 ////////////////////////////////////////////////////////
274 
275 
276 typedef struct {
277   guint8 *buffer;
278   gsize buffer_length;
279   gsize data_length;
280   gsize read_position;
281 } PseudoTcpFifo;
282 
283 
284 static void
pseudo_tcp_fifo_init(PseudoTcpFifo * b,gsize size)285 pseudo_tcp_fifo_init (PseudoTcpFifo *b, gsize size)
286 {
287   b->buffer = g_slice_alloc (size);
288   b->buffer_length = size;
289 }
290 
291 static void
pseudo_tcp_fifo_clear(PseudoTcpFifo * b)292 pseudo_tcp_fifo_clear (PseudoTcpFifo *b)
293 {
294   if (b->buffer)
295     g_slice_free1 (b->buffer_length, b->buffer);
296   b->buffer = NULL;
297   b->buffer_length = 0;
298 }
299 
300 static gsize
pseudo_tcp_fifo_get_buffered(PseudoTcpFifo * b)301 pseudo_tcp_fifo_get_buffered (PseudoTcpFifo *b)
302 {
303   return b->data_length;
304 }
305 
306 static gboolean
pseudo_tcp_fifo_set_capacity(PseudoTcpFifo * b,gsize size)307 pseudo_tcp_fifo_set_capacity (PseudoTcpFifo *b, gsize size)
308 {
309   if (b->data_length > size)
310     return FALSE;
311 
312   if (size != b->data_length) {
313     guint8 *buffer = g_slice_alloc (size);
314     gsize copy = b->data_length;
315     gsize tail_copy = min (copy, b->buffer_length - b->read_position);
316 
317     memcpy (buffer, &b->buffer[b->read_position], tail_copy);
318     memcpy (buffer + tail_copy, &b->buffer[0], copy - tail_copy);
319     g_slice_free1 (b->buffer_length, b->buffer);
320     b->buffer = buffer;
321     b->buffer_length = size;
322     b->read_position = 0;
323   }
324 
325   return TRUE;
326 }
327 
328 static void
pseudo_tcp_fifo_consume_read_data(PseudoTcpFifo * b,gsize size)329 pseudo_tcp_fifo_consume_read_data (PseudoTcpFifo *b, gsize size)
330 {
331   g_assert_cmpint (size, <=, b->data_length);
332 
333   b->read_position = (b->read_position + size) % b->buffer_length;
334   b->data_length -= size;
335 }
336 
337 static void
pseudo_tcp_fifo_consume_write_buffer(PseudoTcpFifo * b,gsize size)338 pseudo_tcp_fifo_consume_write_buffer (PseudoTcpFifo *b, gsize size)
339 {
340   g_assert_cmpint (size, <=, b->buffer_length - b->data_length);
341 
342   b->data_length += size;
343 }
344 
345 static gsize
pseudo_tcp_fifo_get_write_remaining(PseudoTcpFifo * b)346 pseudo_tcp_fifo_get_write_remaining (PseudoTcpFifo *b)
347 {
348   return b->buffer_length - b->data_length;
349 }
350 
351 static gsize
pseudo_tcp_fifo_read_offset(PseudoTcpFifo * b,guint8 * buffer,gsize bytes,gsize offset)352 pseudo_tcp_fifo_read_offset (PseudoTcpFifo *b, guint8 *buffer, gsize bytes,
353     gsize offset)
354 {
355   gsize available = b->data_length - offset;
356   gsize read_position = (b->read_position + offset) % b->buffer_length;
357   gsize copy = min (bytes, available);
358   gsize tail_copy = min(copy, b->buffer_length - read_position);
359 
360   /* EOS */
361   if (offset >= b->data_length)
362     return 0;
363 
364   memcpy(buffer, &b->buffer[read_position], tail_copy);
365   memcpy(buffer + tail_copy, &b->buffer[0], copy - tail_copy);
366 
367   return copy;
368 }
369 
370 static gsize
pseudo_tcp_fifo_write_offset(PseudoTcpFifo * b,const guint8 * buffer,gsize bytes,gsize offset)371 pseudo_tcp_fifo_write_offset (PseudoTcpFifo *b, const guint8 *buffer,
372     gsize bytes, gsize offset)
373 {
374   gsize available = b->buffer_length - b->data_length - offset;
375   gsize write_position = (b->read_position + b->data_length + offset)
376       % b->buffer_length;
377   gsize copy = min (bytes, available);
378   gsize tail_copy = min(copy, b->buffer_length - write_position);
379 
380   if (b->data_length + offset >= b->buffer_length) {
381     return 0;
382   }
383 
384   memcpy(&b->buffer[write_position], buffer, tail_copy);
385   memcpy(&b->buffer[0], buffer + tail_copy, copy - tail_copy);
386 
387   return copy;
388 }
389 
390 static gsize
pseudo_tcp_fifo_read(PseudoTcpFifo * b,guint8 * buffer,gsize bytes)391 pseudo_tcp_fifo_read (PseudoTcpFifo *b, guint8 *buffer, gsize bytes)
392 {
393   gsize copy;
394 
395   copy = pseudo_tcp_fifo_read_offset (b, buffer, bytes, 0);
396 
397   b->read_position = (b->read_position + copy) % b->buffer_length;
398   b->data_length -= copy;
399 
400   return copy;
401 }
402 
403 static gsize
pseudo_tcp_fifo_write(PseudoTcpFifo * b,const guint8 * buffer,gsize bytes)404 pseudo_tcp_fifo_write (PseudoTcpFifo *b, const guint8 *buffer, gsize bytes)
405 {
406   gsize copy;
407 
408   copy = pseudo_tcp_fifo_write_offset (b, buffer, bytes, 0);
409   b->data_length += copy;
410 
411   return copy;
412 }
413 
414 
415 //////////////////////////////////////////////////////////////////////
416 // PseudoTcp
417 //////////////////////////////////////////////////////////////////////
418 
419 /* Only used if FIN-ACK support is disabled. */
420 typedef enum {
421   SD_NONE,
422   SD_GRACEFUL,
423   SD_FORCEFUL
424 } Shutdown;
425 
426 typedef enum {
427   sfNone,
428   sfDelayedAck,
429   sfImmediateAck,
430   sfFin,
431   sfRst,
432   sfDuplicateAck,
433 } SendFlags;
434 
435 typedef struct {
436   guint32 conv, seq, ack;
437   TcpFlags flags;
438   guint16 wnd;
439   const gchar * data;
440   guint32 len;
441   guint32 tsval, tsecr;
442 } Segment;
443 
444 typedef struct {
445   guint32 seq, len;
446   guint8 xmit;
447   TcpFlags flags;
448 } SSegment;
449 
450 typedef struct {
451   guint32 seq, len;
452 } RSegment;
453 
454 /**
455  * ClosedownSource:
456  * @CLOSEDOWN_LOCAL: Error detected locally, or connection forcefully closed
457  * locally.
458  * @CLOSEDOWN_REMOTE: RST segment received from the peer.
459  *
460  * Reasons for calling closedown().
461  *
462  * Since: 0.1.8
463  */
464 typedef enum {
465   CLOSEDOWN_LOCAL,
466   CLOSEDOWN_REMOTE,
467 } ClosedownSource;
468 
469 
470 struct _PseudoTcpSocketPrivate {
471   PseudoTcpCallbacks callbacks;
472 
473   Shutdown shutdown;  /* only used if !support_fin_ack */
474   gboolean shutdown_reads;
475   gint error;
476 
477   // TCB data
478   PseudoTcpState state;
479   guint32 conv;
480   gboolean bReadEnable, bWriteEnable, bOutgoing;
481   guint32 last_traffic;
482 
483   // Incoming data
484   GList *rlist;
485   guint32 rbuf_len, rcv_nxt, rcv_wnd, lastrecv;
486   guint8 rwnd_scale; // Window scale factor
487   PseudoTcpFifo rbuf;
488   guint32 rcv_fin;  /* sequence number of the received FIN octet, or 0 */
489 
490   // Outgoing data
491   GQueue slist;
492   GQueue unsent_slist;
493   guint32 sbuf_len, snd_nxt, snd_wnd, lastsend;
494   guint32 snd_una;  /* oldest unacknowledged sequence number */
495   guint8 swnd_scale; // Window scale factor
496   PseudoTcpFifo sbuf;
497 
498   // Maximum segment size, estimated protocol level, largest segment sent
499   guint32 mss, msslevel, largest, mtu_advise;
500   // Retransmit timer
501   guint32 rto_base;
502 
503   // Timestamp tracking
504   guint32 ts_recent, ts_lastack;
505 
506   // Round-trip calculation
507   guint32 rx_rttvar, rx_srtt, rx_rto;
508 
509   // Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
510   guint32 ssthresh, cwnd;
511   guint8 dup_acks;
512   guint32 recover;
513   gboolean fast_recovery;
514   guint32 t_ack;  /* time a delayed ack was scheduled; 0 if no acks scheduled */
515   guint32 last_acked_ts;
516 
517   gboolean use_nagling;
518   guint32 ack_delay;
519 
520   // This is used by unit tests to test backward compatibility of
521   // PseudoTcp implementations that don't support window scaling.
522   gboolean support_wnd_scale;
523 
524   /* Current time. Typically only used for testing, when non-zero. When zero,
525    * the system monotonic clock is used. Units: monotonic milliseconds. */
526   guint32 current_time;
527 
528   /* This is used by compatible implementations (with the TCP_OPT_FIN_ACK
529    * option) to enable correct FIN-ACK connection termination. Defaults to
530    * TRUE unless no compatible option is received. */
531   gboolean support_fin_ack;
532 };
533 
534 #define LARGER(a,b) (((a) - (b) - 1) < (G_MAXUINT32 >> 1))
535 #define LARGER_OR_EQUAL(a,b) (((a) - (b)) < (G_MAXUINT32 >> 1))
536 #define SMALLER(a,b) LARGER ((b),(a))
537 #define SMALLER_OR_EQUAL(a,b) LARGER_OR_EQUAL ((b),(a))
538 
539 /* properties */
540 enum
541 {
542   PROP_CONVERSATION = 1,
543   PROP_CALLBACKS,
544   PROP_STATE,
545   PROP_ACK_DELAY,
546   PROP_NO_DELAY,
547   PROP_RCV_BUF,
548   PROP_SND_BUF,
549   PROP_SUPPORT_FIN_ACK,
550   LAST_PROPERTY
551 };
552 
553 
554 static void pseudo_tcp_socket_get_property (GObject *object, guint property_id,
555     GValue *value,  GParamSpec *pspec);
556 static void pseudo_tcp_socket_set_property (GObject *object, guint property_id,
557     const GValue *value, GParamSpec *pspec);
558 static void pseudo_tcp_socket_finalize (GObject *object);
559 
560 
561 static void queue_connect_message (PseudoTcpSocket *self);
562 static guint32 queue (PseudoTcpSocket *self, const gchar *data,
563     guint32 len, TcpFlags flags);
564 static PseudoTcpWriteResult packet(PseudoTcpSocket *self, guint32 seq,
565     TcpFlags flags, guint32 offset, guint32 len, guint32 now);
566 static gboolean parse (PseudoTcpSocket *self,
567     const guint8 *_header_buf, gsize header_buf_len,
568     const guint8 *data_buf, gsize data_buf_len);
569 static gboolean process(PseudoTcpSocket *self, Segment *seg);
570 static int transmit(PseudoTcpSocket *self, SSegment *sseg, guint32 now);
571 static void attempt_send(PseudoTcpSocket *self, SendFlags sflags);
572 static void closedown (PseudoTcpSocket *self, guint32 err,
573     ClosedownSource source);
574 static void adjustMTU(PseudoTcpSocket *self);
575 static void parse_options (PseudoTcpSocket *self, const guint8 *data,
576     guint32 len);
577 static void resize_send_buffer (PseudoTcpSocket *self, guint32 new_size);
578 static void resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size);
579 static void set_state (PseudoTcpSocket *self, PseudoTcpState new_state);
580 static void set_state_established (PseudoTcpSocket *self);
581 static void set_state_closed (PseudoTcpSocket *self, guint32 err);
582 
583 static const gchar *pseudo_tcp_state_get_name (PseudoTcpState state);
584 static gboolean pseudo_tcp_state_has_sent_fin (PseudoTcpState state);
585 static gboolean pseudo_tcp_state_has_received_fin (PseudoTcpState state);
586 static gboolean pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state);
587 
588 // The following logging is for detailed (packet-level) pseudotcp analysis only.
589 static PseudoTcpDebugLevel debug_level = PSEUDO_TCP_DEBUG_NONE;
590 
591 #define DEBUG(level, fmt, ...)                                          \
592   if (debug_level >= level)                                             \
593     g_log (level == PSEUDO_TCP_DEBUG_NORMAL ? "libnice-pseudotcp" : "libnice-pseudotcp-verbose", G_LOG_LEVEL_DEBUG, "PseudoTcpSocket %p %s: " fmt, \
594         self, pseudo_tcp_state_get_name (self->priv->state), ## __VA_ARGS__)
595 
596 void
pseudo_tcp_set_debug_level(PseudoTcpDebugLevel level)597 pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level)
598 {
599   debug_level = level;
600 }
601 
602 static guint32
get_current_time(PseudoTcpSocket * socket)603 get_current_time (PseudoTcpSocket *socket)
604 {
605   if (G_UNLIKELY (socket->priv->current_time != 0))
606     return socket->priv->current_time;
607 
608   return g_get_monotonic_time () / 1000;
609 }
610 
611 void
pseudo_tcp_socket_set_time(PseudoTcpSocket * self,guint32 current_time)612 pseudo_tcp_socket_set_time (PseudoTcpSocket *self, guint32 current_time)
613 {
614   self->priv->current_time = current_time;
615 }
616 
617 static void
pseudo_tcp_socket_class_init(PseudoTcpSocketClass * cls)618 pseudo_tcp_socket_class_init (PseudoTcpSocketClass *cls)
619 {
620   GObjectClass *object_class = G_OBJECT_CLASS (cls);
621 
622   object_class->get_property = pseudo_tcp_socket_get_property;
623   object_class->set_property = pseudo_tcp_socket_set_property;
624   object_class->finalize = pseudo_tcp_socket_finalize;
625 
626   g_object_class_install_property (object_class, PROP_CONVERSATION,
627       g_param_spec_uint ("conversation", "TCP Conversation ID",
628           "The TCP Conversation ID",
629           0, G_MAXUINT32, 0,
630           G_PARAM_CONSTRUCT_ONLY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
631 
632   g_object_class_install_property (object_class, PROP_CALLBACKS,
633       g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
634           "Structure with the callbacks to call when PseudoTcp events happen",
635           G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
636 
637   g_object_class_install_property (object_class, PROP_STATE,
638       g_param_spec_uint ("state", "PseudoTcp State",
639           "The current state (enum PseudoTcpState) of the PseudoTcp socket",
640           PSEUDO_TCP_LISTEN, PSEUDO_TCP_CLOSED, PSEUDO_TCP_LISTEN,
641           G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
642 
643   g_object_class_install_property (object_class, PROP_ACK_DELAY,
644       g_param_spec_uint ("ack-delay", "ACK Delay",
645           "Delayed ACK timeout (in milliseconds)",
646           0, G_MAXUINT, DEFAULT_ACK_DELAY,
647           G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
648 
649   g_object_class_install_property (object_class, PROP_NO_DELAY,
650       g_param_spec_boolean ("no-delay", "No Delay",
651           "Disable the Nagle algorithm (like the TCP_NODELAY option)",
652           DEFAULT_NO_DELAY,
653           G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
654 
655   g_object_class_install_property (object_class, PROP_RCV_BUF,
656       g_param_spec_uint ("rcv-buf", "Receive Buffer",
657           "Receive Buffer size",
658           1, G_MAXUINT, DEFAULT_RCV_BUF_SIZE,
659           G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
660 
661   g_object_class_install_property (object_class, PROP_SND_BUF,
662       g_param_spec_uint ("snd-buf", "Send Buffer",
663           "Send Buffer size",
664           1, G_MAXUINT, DEFAULT_SND_BUF_SIZE,
665           G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
666 
667   /**
668    * PseudoTcpSocket:support-fin-ack:
669    *
670    * Whether to support the FIN–ACK extension to the pseudo-TCP protocol for
671    * this socket. The extension is only compatible with other libnice pseudo-TCP
672    * stacks, and not with Jingle pseudo-TCP stacks. If enabled, support is
673    * negotiatied on connection setup, so it is safe for a #PseudoTcpSocket with
674    * support enabled to be used with one with it disabled, or with a Jingle
675    * pseudo-TCP socket which doesn’t support it at all.
676    *
677    * Support is enabled by default.
678    *
679    * Since: 0.1.8
680    */
681   g_object_class_install_property (object_class, PROP_SUPPORT_FIN_ACK,
682       g_param_spec_boolean ("support-fin-ack", "Support FIN–ACK",
683           "Whether to enable the optional FIN–ACK support.",
684           TRUE,
685           G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
686 }
687 
688 
689 static void
pseudo_tcp_socket_get_property(GObject * object,guint property_id,GValue * value,GParamSpec * pspec)690 pseudo_tcp_socket_get_property (GObject *object,
691                                   guint property_id,
692                                   GValue *value,
693                                   GParamSpec *pspec)
694 {
695   PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
696 
697   switch (property_id) {
698     case PROP_CONVERSATION:
699       g_value_set_uint (value, self->priv->conv);
700       break;
701     case PROP_CALLBACKS:
702       g_value_set_pointer (value, (gpointer) &self->priv->callbacks);
703       break;
704     case PROP_STATE:
705       g_value_set_uint (value, self->priv->state);
706       break;
707     case PROP_ACK_DELAY:
708       g_value_set_uint (value, self->priv->ack_delay);
709       break;
710     case PROP_NO_DELAY:
711       g_value_set_boolean (value, !self->priv->use_nagling);
712       break;
713     case PROP_RCV_BUF:
714       g_value_set_uint (value, self->priv->rbuf_len);
715       break;
716     case PROP_SND_BUF:
717       g_value_set_uint (value, self->priv->sbuf_len);
718       break;
719     case PROP_SUPPORT_FIN_ACK:
720       g_value_set_boolean (value, self->priv->support_fin_ack);
721       break;
722     default:
723       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
724       break;
725   }
726 }
727 
728 static void
pseudo_tcp_socket_set_property(GObject * object,guint property_id,const GValue * value,GParamSpec * pspec)729 pseudo_tcp_socket_set_property (GObject *object,
730                                   guint property_id,
731                                   const GValue *value,
732                                   GParamSpec *pspec)
733 {
734   PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
735 
736   switch (property_id) {
737     case PROP_CONVERSATION:
738       self->priv->conv = g_value_get_uint (value);
739       break;
740     case PROP_CALLBACKS:
741       {
742         PseudoTcpCallbacks *c = g_value_get_pointer (value);
743         self->priv->callbacks = *c;
744       }
745       break;
746     case PROP_ACK_DELAY:
747       self->priv->ack_delay = g_value_get_uint (value);
748       break;
749     case PROP_NO_DELAY:
750       self->priv->use_nagling = !g_value_get_boolean (value);
751       break;
752     case PROP_RCV_BUF:
753       g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
754       resize_receive_buffer (self, g_value_get_uint (value));
755       break;
756     case PROP_SND_BUF:
757       g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
758       resize_send_buffer (self, g_value_get_uint (value));
759       break;
760     case PROP_SUPPORT_FIN_ACK:
761       self->priv->support_fin_ack = g_value_get_boolean (value);
762       break;
763     default:
764       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
765       break;
766   }
767 }
768 
769 static void
pseudo_tcp_socket_finalize(GObject * object)770 pseudo_tcp_socket_finalize (GObject *object)
771 {
772   PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
773   PseudoTcpSocketPrivate *priv = self->priv;
774   GList *i;
775   SSegment *sseg;
776 
777   if (priv == NULL)
778     return;
779 
780   while ((sseg = g_queue_pop_head (&priv->slist)))
781     g_slice_free (SSegment, sseg);
782   g_queue_clear (&priv->unsent_slist);
783   for (i = priv->rlist; i; i = i->next) {
784     RSegment *rseg = i->data;
785     g_slice_free (RSegment, rseg);
786   }
787   g_list_free (priv->rlist);
788   priv->rlist = NULL;
789 
790   pseudo_tcp_fifo_clear (&priv->rbuf);
791   pseudo_tcp_fifo_clear (&priv->sbuf);
792 
793   g_free (priv);
794   self->priv = NULL;
795 
796   if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize)
797     G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize (object);
798 }
799 
800 
801 static void
pseudo_tcp_socket_init(PseudoTcpSocket * obj)802 pseudo_tcp_socket_init (PseudoTcpSocket *obj)
803 {
804   /* Use g_new0, and do not use g_object_set_private because the size of
805    * our private data is too big (150KB+) and the g_slice_allow cannot allocate
806    * it. So we handle the private ourselves */
807   PseudoTcpSocketPrivate *priv = g_new0 (PseudoTcpSocketPrivate, 1);
808 
809   obj->priv = priv;
810 
811   priv->shutdown = SD_NONE;
812   priv->error = 0;
813 
814   priv->rbuf_len = DEFAULT_RCV_BUF_SIZE;
815   pseudo_tcp_fifo_init (&priv->rbuf, priv->rbuf_len);
816   priv->sbuf_len = DEFAULT_SND_BUF_SIZE;
817   pseudo_tcp_fifo_init (&priv->sbuf, priv->sbuf_len);
818 
819   priv->state = PSEUDO_TCP_LISTEN;
820   priv->conv = 0;
821   g_queue_init (&priv->slist);
822   g_queue_init (&priv->unsent_slist);
823   priv->rcv_wnd = priv->rbuf_len;
824   priv->rwnd_scale = priv->swnd_scale = 0;
825   priv->snd_nxt = 0;
826   priv->snd_wnd = 1;
827   priv->snd_una = priv->rcv_nxt = 0;
828   priv->bReadEnable = TRUE;
829   priv->bWriteEnable = FALSE;
830   priv->rcv_fin = 0;
831 
832   priv->t_ack = 0;
833 
834   priv->msslevel = 0;
835   priv->largest = 0;
836   priv->mss = MIN_PACKET - PACKET_OVERHEAD;
837   priv->mtu_advise = DEF_MTU;
838 
839   priv->rto_base = 0;
840 
841   priv->cwnd = 2 * priv->mss;
842   priv->ssthresh = priv->rbuf_len;
843   priv->lastrecv = priv->lastsend = priv->last_traffic = 0;
844   priv->bOutgoing = FALSE;
845 
846   priv->dup_acks = 0;
847   priv->recover = 0;
848   priv->last_acked_ts = 0;
849 
850   priv->ts_recent = priv->ts_lastack = 0;
851 
852   priv->rx_rto = DEF_RTO;
853   priv->rx_srtt = priv->rx_rttvar = 0;
854 
855   priv->ack_delay = DEFAULT_ACK_DELAY;
856   priv->use_nagling = !DEFAULT_NO_DELAY;
857 
858   priv->support_wnd_scale = TRUE;
859   priv->support_fin_ack = TRUE;
860 }
861 
pseudo_tcp_socket_new(guint32 conversation,PseudoTcpCallbacks * callbacks)862 PseudoTcpSocket *pseudo_tcp_socket_new (guint32 conversation,
863     PseudoTcpCallbacks *callbacks)
864 {
865 
866   return g_object_new (PSEUDO_TCP_SOCKET_TYPE,
867       "conversation", conversation,
868       "callbacks", callbacks,
869       NULL);
870 }
871 
872 static void
queue_connect_message(PseudoTcpSocket * self)873 queue_connect_message (PseudoTcpSocket *self)
874 {
875   PseudoTcpSocketPrivate *priv = self->priv;
876   guint8 buf[8];
877   gsize size = 0;
878 
879   buf[size++] = CTL_CONNECT;
880 
881   if (priv->support_wnd_scale) {
882     buf[size++] = TCP_OPT_WND_SCALE;
883     buf[size++] = 1;
884     buf[size++] = priv->rwnd_scale;
885   }
886 
887   if (priv->support_fin_ack) {
888     buf[size++] = TCP_OPT_FIN_ACK;
889     buf[size++] = 1;  /* option length; zero is invalid (RFC 1122, §4.2.2.5) */
890     buf[size++] = 0;  /* currently unused */
891   }
892 
893   priv->snd_wnd = size;
894 
895   queue (self, (char *) buf, size, FLAG_CTL);
896 }
897 
898 static void
queue_fin_message(PseudoTcpSocket * self)899 queue_fin_message (PseudoTcpSocket *self)
900 {
901   g_assert (self->priv->support_fin_ack);
902 
903   /* FIN segments are always zero-length. */
904   queue (self, "", 0, FLAG_FIN);
905 }
906 
907 static void
queue_rst_message(PseudoTcpSocket * self)908 queue_rst_message (PseudoTcpSocket *self)
909 {
910   g_assert (self->priv->support_fin_ack);
911 
912   /* RST segments are always zero-length. */
913   queue (self, "", 0, FLAG_RST);
914 }
915 
916 gboolean
pseudo_tcp_socket_connect(PseudoTcpSocket * self)917 pseudo_tcp_socket_connect(PseudoTcpSocket *self)
918 {
919   PseudoTcpSocketPrivate *priv = self->priv;
920 
921   if (priv->state != PSEUDO_TCP_LISTEN) {
922     priv->error = EINVAL;
923     return FALSE;
924   }
925 
926   set_state (self, PSEUDO_TCP_SYN_SENT);
927 
928   queue_connect_message (self);
929   attempt_send(self, sfNone);
930 
931   return TRUE;
932 }
933 
934 void
pseudo_tcp_socket_notify_mtu(PseudoTcpSocket * self,guint16 mtu)935 pseudo_tcp_socket_notify_mtu(PseudoTcpSocket *self, guint16 mtu)
936 {
937   PseudoTcpSocketPrivate *priv = self->priv;
938   priv->mtu_advise = mtu;
939   if (priv->state == PSEUDO_TCP_ESTABLISHED) {
940     adjustMTU(self);
941   }
942 }
943 
944 void
pseudo_tcp_socket_notify_clock(PseudoTcpSocket * self)945 pseudo_tcp_socket_notify_clock(PseudoTcpSocket *self)
946 {
947   PseudoTcpSocketPrivate *priv = self->priv;
948   guint32 now = get_current_time (self);
949 
950   if (priv->state == PSEUDO_TCP_CLOSED)
951     return;
952 
953   /* If in the TIME-WAIT state, any delayed segments have passed and the
954    * connection can be considered closed from both ends.
955    * FIXME: This should probably actually compare a timestamp before
956    * operating. */
957   if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
958     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
959         "Notified clock in TIME-WAIT state; closing connection.");
960     set_state_closed (self, 0);
961   }
962 
963   /* If in the LAST-ACK state, resend the FIN because it hasn’t been ACKed yet.
964    * FIXME: This should probably actually compare a timestamp before
965    * operating. */
966   if (priv->support_fin_ack && priv->state == PSEUDO_TCP_LAST_ACK) {
967     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
968         "Notified clock in LAST-ACK state; resending FIN segment.");
969     queue_fin_message (self);
970     attempt_send (self, sfFin);
971   }
972 
973   // Check if it's time to retransmit a segment
974   if (priv->rto_base &&
975       (time_diff(priv->rto_base + priv->rx_rto, now) <= 0)) {
976     if (g_queue_get_length (&priv->slist) == 0) {
977       g_assert_not_reached ();
978     } else {
979       // Note: (priv->slist.front().xmit == 0)) {
980       // retransmit segments
981       guint32 nInFlight;
982       guint32 rto_limit;
983       int transmit_status;
984 
985       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "timeout retransmit (rto: %u) "
986           "(rto_base: %u) (now: %u) (dup_acks: %u)",
987           priv->rx_rto, priv->rto_base, now, (guint) priv->dup_acks);
988 
989       transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
990       if (transmit_status != 0) {
991         DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
992             "Error transmitting segment. Closing down.");
993         closedown (self, transmit_status, CLOSEDOWN_LOCAL);
994         return;
995       }
996 
997       nInFlight = priv->snd_nxt - priv->snd_una;
998       priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
999       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "ssthresh: %u = (nInFlight: %u / 2) + "
1000           "2 * mss: %u", priv->ssthresh, nInFlight, priv->mss);
1001       //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << "  nInFlight: " << nInFlight << "  priv->mss: " << priv->mss;
1002       priv->cwnd = priv->mss;
1003 
1004       // Back off retransmit timer.  Note: the limit is lower when connecting.
1005       rto_limit = (priv->state < PSEUDO_TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
1006       priv->rx_rto = min(rto_limit, priv->rx_rto * 2);
1007       priv->rto_base = now;
1008 
1009       priv->recover = priv->snd_nxt;
1010       if (priv->dup_acks >= 3) {
1011         priv->dup_acks = 0;
1012         priv->fast_recovery = FALSE;
1013         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery on timeout");
1014       }
1015     }
1016   }
1017 
1018   // Check if it's time to probe closed windows
1019   if ((priv->snd_wnd == 0)
1020         && (time_diff(priv->lastsend + priv->rx_rto, now) <= 0)) {
1021     if (time_diff(now, priv->lastrecv) >= 15000) {
1022       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Receive window closed. Closing down.");
1023       closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1024       return;
1025     }
1026 
1027     // probe the window
1028     packet(self, priv->snd_nxt - 1, 0, 0, 0, now);
1029     priv->lastsend = now;
1030 
1031     // back off retransmit timer
1032     priv->rx_rto = min(MAX_RTO, priv->rx_rto * 2);
1033   }
1034 
1035   // Check if it's time to send delayed acks
1036   if (priv->t_ack && (time_diff(priv->t_ack + priv->ack_delay, now) <= 0)) {
1037     packet(self, priv->snd_nxt, 0, 0, 0, now);
1038   }
1039 
1040 }
1041 
1042 gboolean
pseudo_tcp_socket_notify_packet(PseudoTcpSocket * self,const gchar * buffer,guint32 len)1043 pseudo_tcp_socket_notify_packet(PseudoTcpSocket *self,
1044     const gchar * buffer, guint32 len)
1045 {
1046   gboolean retval;
1047 
1048   if (len > MAX_PACKET) {
1049     //LOG_F(WARNING) << "packet too large";
1050     self->priv->error = EMSGSIZE;
1051     return FALSE;
1052   } else if (len < HEADER_SIZE) {
1053     //LOG_F(WARNING) << "packet too small";
1054     self->priv->error = EINVAL;
1055     return FALSE;
1056   }
1057 
1058   /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1059    * closed from within a callback. */
1060   g_object_ref (self);
1061   retval = parse (self, (guint8 *) buffer, HEADER_SIZE,
1062       (guint8 *) buffer + HEADER_SIZE, len - HEADER_SIZE);
1063   g_object_unref (self);
1064 
1065   return retval;
1066 }
1067 
1068 /* Assume there are two buffers in the given #NiceInputMessage: a 24-byte one
1069  * containing the header, and a bigger one for the data. */
1070 gboolean
pseudo_tcp_socket_notify_message(PseudoTcpSocket * self,NiceInputMessage * message)1071 pseudo_tcp_socket_notify_message (PseudoTcpSocket *self,
1072     NiceInputMessage *message)
1073 {
1074   gboolean retval;
1075 
1076   g_assert_cmpuint (message->n_buffers, >, 0);
1077 
1078   if (message->n_buffers == 1)
1079     return pseudo_tcp_socket_notify_packet (self, message->buffers[0].buffer,
1080         message->buffers[0].size);
1081 
1082   g_assert_cmpuint (message->n_buffers, ==, 2);
1083   g_assert_cmpuint (message->buffers[0].size, ==, HEADER_SIZE);
1084 
1085   if (message->length > MAX_PACKET) {
1086     //LOG_F(WARNING) << "packet too large";
1087     return FALSE;
1088   } else if (message->length < HEADER_SIZE) {
1089     //LOG_F(WARNING) << "packet too small";
1090     return FALSE;
1091   }
1092 
1093   /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1094    * closed from within a callback. */
1095   g_object_ref (self);
1096   retval = parse (self, message->buffers[0].buffer, message->buffers[0].size,
1097       message->buffers[1].buffer, message->length - message->buffers[0].size);
1098   g_object_unref (self);
1099 
1100   return retval;
1101 }
1102 
1103 gboolean
pseudo_tcp_socket_get_next_clock(PseudoTcpSocket * self,guint64 * timeout)1104 pseudo_tcp_socket_get_next_clock(PseudoTcpSocket *self, guint64 *timeout)
1105 {
1106   PseudoTcpSocketPrivate *priv = self->priv;
1107   guint32 now = get_current_time (self);
1108   gsize snd_buffered;
1109   guint32 closed_timeout;
1110 
1111   if (priv->shutdown == SD_FORCEFUL) {
1112     if (priv->support_fin_ack) {
1113       DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1114           "‘Forceful’ shutdown used when FIN-ACK support is enabled");
1115     }
1116 
1117     /* Transition to the CLOSED state. */
1118     closedown (self, 0, CLOSEDOWN_REMOTE);
1119 
1120     return FALSE;
1121   }
1122 
1123   snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1124   if ((priv->shutdown == SD_GRACEFUL)
1125       && ((priv->state != PSEUDO_TCP_ESTABLISHED)
1126           || ((snd_buffered == 0) && (priv->t_ack == 0)))) {
1127     if (priv->support_fin_ack) {
1128       DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1129           "‘Graceful’ shutdown used when FIN-ACK support is enabled");
1130     }
1131 
1132     /* Transition to the CLOSED state. */
1133     closedown (self, 0, CLOSEDOWN_REMOTE);
1134 
1135     return FALSE;
1136   }
1137 
1138   /* FIN-ACK support. The timeout for closing the socket if nothing is received
1139    * varies depending on whether the socket is waiting in the TIME-WAIT state
1140    * for delayed segments to pass.
1141    *
1142    * See: http://vincent.bernat.im/en/blog/2014-tcp-time-wait-state-linux.html
1143    */
1144   closed_timeout = CLOSED_TIMEOUT;
1145   if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT)
1146     closed_timeout = TIME_WAIT_TIMEOUT;
1147 
1148   if (priv->support_fin_ack && priv->state == PSEUDO_TCP_CLOSED) {
1149     return FALSE;
1150   }
1151 
1152   if (*timeout == 0 || *timeout < now)
1153     *timeout = now + closed_timeout;
1154 
1155   if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
1156     *timeout = min (*timeout, now + TIME_WAIT_TIMEOUT);
1157     return TRUE;
1158   }
1159 
1160   if (priv->state == PSEUDO_TCP_CLOSED && !priv->support_fin_ack) {
1161     *timeout = min (*timeout, now + CLOSED_TIMEOUT);
1162     return TRUE;
1163   }
1164 
1165   *timeout = min (*timeout, now + DEFAULT_TIMEOUT);
1166 
1167   if (priv->t_ack) {
1168     *timeout = min(*timeout, priv->t_ack + priv->ack_delay);
1169   }
1170   if (priv->rto_base) {
1171     *timeout = min(*timeout, priv->rto_base + priv->rx_rto);
1172   }
1173   if (priv->snd_wnd == 0) {
1174     *timeout = min(*timeout, priv->lastsend + priv->rx_rto);
1175   }
1176 
1177   return TRUE;
1178 }
1179 
1180 
1181 gint
pseudo_tcp_socket_recv(PseudoTcpSocket * self,char * buffer,size_t len)1182 pseudo_tcp_socket_recv(PseudoTcpSocket *self, char * buffer, size_t len)
1183 {
1184   PseudoTcpSocketPrivate *priv = self->priv;
1185   gsize bytesread;
1186   gsize available_space;
1187 
1188   /* Received a FIN from the peer, so return 0. RFC 793, §3.5, Case 2. */
1189   if (priv->support_fin_ack && priv->shutdown_reads) {
1190     return 0;
1191   }
1192 
1193   /* Return 0 if FIN-ACK is not supported but the socket has been closed. */
1194   if (!priv->support_fin_ack && pseudo_tcp_socket_is_closed (self)) {
1195     return 0;
1196   }
1197 
1198   /* Return ENOTCONN if FIN-ACK is not supported and the connection is not
1199    * ESTABLISHED. */
1200   if (!priv->support_fin_ack && priv->state != PSEUDO_TCP_ESTABLISHED) {
1201     priv->error = ENOTCONN;
1202     return -1;
1203   }
1204 
1205   if (len == 0)
1206     return 0;
1207 
1208   bytesread = pseudo_tcp_fifo_read (&priv->rbuf, (guint8 *) buffer, len);
1209 
1210  // If there's no data in |m_rbuf|.
1211   if (bytesread == 0 &&
1212       !(pseudo_tcp_state_has_received_fin (priv->state) ||
1213         pseudo_tcp_state_has_received_fin_ack (priv->state))) {
1214     priv->bReadEnable = TRUE;
1215     priv->error = EWOULDBLOCK;
1216     return -1;
1217   }
1218 
1219   available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1220 
1221   if (available_space - priv->rcv_wnd >=
1222       min (priv->rbuf_len / 2, priv->mss)) {
1223     // !?! Not sure about this was closed business
1224     gboolean bWasClosed = (priv->rcv_wnd == 0);
1225 
1226     priv->rcv_wnd = available_space;
1227 
1228     if (bWasClosed) {
1229       attempt_send(self, sfImmediateAck);
1230     }
1231   }
1232 
1233   return bytesread;
1234 }
1235 
1236 gint
pseudo_tcp_socket_send(PseudoTcpSocket * self,const char * buffer,guint32 len)1237 pseudo_tcp_socket_send(PseudoTcpSocket *self, const char * buffer, guint32 len)
1238 {
1239   PseudoTcpSocketPrivate *priv = self->priv;
1240   gint written;
1241   gsize available_space;
1242 
1243   if (priv->state != PSEUDO_TCP_ESTABLISHED) {
1244     priv->error = pseudo_tcp_state_has_sent_fin (priv->state) ? EPIPE : ENOTCONN;
1245     return -1;
1246   }
1247 
1248   available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1249 
1250   if (!available_space) {
1251     priv->bWriteEnable = TRUE;
1252     priv->error = EWOULDBLOCK;
1253     return -1;
1254   }
1255 
1256   written = queue (self, buffer, len, FLAG_NONE);
1257   attempt_send(self, sfNone);
1258 
1259   if (written > 0 && (guint32)written < len) {
1260     priv->bWriteEnable = TRUE;
1261   }
1262 
1263   return written;
1264 }
1265 
1266 void
pseudo_tcp_socket_close(PseudoTcpSocket * self,gboolean force)1267 pseudo_tcp_socket_close(PseudoTcpSocket *self, gboolean force)
1268 {
1269   PseudoTcpSocketPrivate *priv = self->priv;
1270 
1271   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing socket %p %s", self,
1272       force ? "forcefully" : "gracefully");
1273 
1274   /* Forced closure by sending an RST segment. RFC 1122, §4.2.2.13. */
1275   if (force && priv->state != PSEUDO_TCP_CLOSED) {
1276     closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1277     return;
1278   }
1279 
1280   /* Fall back to shutdown(). */
1281   pseudo_tcp_socket_shutdown (self, PSEUDO_TCP_SHUTDOWN_RDWR);
1282 }
1283 
1284 void
pseudo_tcp_socket_shutdown(PseudoTcpSocket * self,PseudoTcpShutdown how)1285 pseudo_tcp_socket_shutdown (PseudoTcpSocket *self, PseudoTcpShutdown how)
1286 {
1287   PseudoTcpSocketPrivate *priv = self->priv;
1288 
1289   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Shutting down socket %p: %u", self, how);
1290 
1291   /* FIN-ACK--only stuff below here. */
1292   if (!priv->support_fin_ack) {
1293     if (priv->shutdown == SD_NONE)
1294       priv->shutdown = SD_GRACEFUL;
1295     return;
1296   }
1297 
1298   /* What needs shutting down? */
1299   switch (how) {
1300   case PSEUDO_TCP_SHUTDOWN_RD:
1301   case PSEUDO_TCP_SHUTDOWN_RDWR:
1302     priv->shutdown_reads = TRUE;
1303     break;
1304   case PSEUDO_TCP_SHUTDOWN_WR:
1305     /* Handled below. */
1306     break;
1307   default:
1308     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid shutdown method: %u.", how);
1309     break;
1310   }
1311 
1312   if (how == PSEUDO_TCP_SHUTDOWN_RD) {
1313     return;
1314   }
1315 
1316   /* Unforced write closure. */
1317   switch (priv->state) {
1318   case PSEUDO_TCP_LISTEN:
1319   case PSEUDO_TCP_SYN_SENT:
1320     /* Just abort the connection without completing the handshake. */
1321     set_state_closed (self, 0);
1322     break;
1323   case PSEUDO_TCP_SYN_RECEIVED:
1324   case PSEUDO_TCP_ESTABLISHED:
1325     /* Local user initiating the close: RFC 793, §3.5, Cases 1 and 3.
1326      * If there is pending receive data, send RST instead of FIN;
1327      * see RFC 1122, §4.2.2.13. */
1328     if (pseudo_tcp_socket_get_available_bytes (self) > 0) {
1329       closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1330     } else {
1331       queue_fin_message (self);
1332       attempt_send (self, sfFin);
1333       set_state (self, PSEUDO_TCP_FIN_WAIT_1);
1334     }
1335     break;
1336   case PSEUDO_TCP_CLOSE_WAIT:
1337     /* Remote user initiating the close: RFC 793, §3.5, Case 2.
1338      * We’ve previously received a FIN from the peer; now the user is closing
1339      * the local end of the connection. */
1340     queue_fin_message (self);
1341     attempt_send (self, sfFin);
1342     set_state (self, PSEUDO_TCP_LAST_ACK);
1343     break;
1344   case PSEUDO_TCP_CLOSING:
1345   case PSEUDO_TCP_CLOSED:
1346     /* Already closed on both sides. */
1347     break;
1348   case PSEUDO_TCP_FIN_WAIT_1:
1349   case PSEUDO_TCP_FIN_WAIT_2:
1350   case PSEUDO_TCP_TIME_WAIT:
1351   case PSEUDO_TCP_LAST_ACK:
1352     /* Already closed locally. */
1353     break;
1354   default:
1355     /* Do nothing. */
1356     break;
1357   }
1358 }
1359 
1360 int
pseudo_tcp_socket_get_error(PseudoTcpSocket * self)1361 pseudo_tcp_socket_get_error(PseudoTcpSocket *self)
1362 {
1363   PseudoTcpSocketPrivate *priv = self->priv;
1364   return priv->error;
1365 }
1366 
1367 //
1368 // Internal Implementation
1369 //
1370 
1371 static guint32
queue(PseudoTcpSocket * self,const gchar * data,guint32 len,TcpFlags flags)1372 queue (PseudoTcpSocket *self, const gchar * data, guint32 len, TcpFlags flags)
1373 {
1374   PseudoTcpSocketPrivate *priv = self->priv;
1375   gsize available_space;
1376 
1377   available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1378   if (len > available_space) {
1379     g_assert_cmpint (flags, ==, FLAG_NONE);
1380     len = available_space;
1381   }
1382 
1383   // We can concatenate data if the last segment is the same type
1384   // (control v. regular data), and has not been transmitted yet
1385   if (g_queue_get_length (&priv->slist) &&
1386       (((SSegment *)g_queue_peek_tail (&priv->slist))->flags == flags) &&
1387       (((SSegment *)g_queue_peek_tail (&priv->slist))->xmit == 0)) {
1388     ((SSegment *)g_queue_peek_tail (&priv->slist))->len += len;
1389   } else {
1390     SSegment *sseg = g_slice_new0 (SSegment);
1391     gsize snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1392 
1393     sseg->seq = priv->snd_una + snd_buffered;
1394     sseg->len = len;
1395     sseg->flags = flags;
1396     g_queue_push_tail (&priv->slist, sseg);
1397     g_queue_push_tail (&priv->unsent_slist, sseg);
1398   }
1399 
1400   //LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
1401   return pseudo_tcp_fifo_write (&priv->sbuf, (guint8*) data, len);;
1402 }
1403 
1404 // Creates a packet and submits it to the network. This method can either
1405 // send payload or just an ACK packet.
1406 //
1407 // |seq| is the sequence number of this packet.
1408 // |flags| is the flags for sending this packet.
1409 // |offset| is the offset to read from |m_sbuf|.
1410 // |len| is the number of bytes to read from |m_sbuf| as payload. If this
1411 // value is 0 then this is an ACK packet, otherwise this packet has payload.
1412 
1413 static PseudoTcpWriteResult
packet(PseudoTcpSocket * self,guint32 seq,TcpFlags flags,guint32 offset,guint32 len,guint32 now)1414 packet(PseudoTcpSocket *self, guint32 seq, TcpFlags flags,
1415     guint32 offset, guint32 len, guint32 now)
1416 {
1417   PseudoTcpSocketPrivate *priv = self->priv;
1418   union {
1419     guint8 u8[MAX_PACKET];
1420     guint16 u16[MAX_PACKET / 2];
1421     guint32 u32[MAX_PACKET / 4];
1422   } buffer;
1423   PseudoTcpWriteResult wres = WR_SUCCESS;
1424 
1425   g_assert_cmpuint (HEADER_SIZE + len, <=, MAX_PACKET);
1426 
1427   *buffer.u32 = htonl(priv->conv);
1428   *(buffer.u32 + 1) = htonl(seq);
1429   *(buffer.u32 + 2) = htonl(priv->rcv_nxt);
1430   buffer.u8[12] = 0;
1431   buffer.u8[13] = flags;
1432   *(buffer.u16 + 7) = htons((guint16)(priv->rcv_wnd >> priv->rwnd_scale));
1433 
1434   // Timestamp computations
1435   *(buffer.u32 + 4) = htonl(now);
1436   *(buffer.u32 + 5) = htonl(priv->ts_recent);
1437   priv->ts_lastack = priv->rcv_nxt;
1438 
1439   if (len) {
1440     gsize bytes_read;
1441 
1442     bytes_read = pseudo_tcp_fifo_read_offset (&priv->sbuf, buffer.u8 + HEADER_SIZE,
1443         len, offset);
1444     g_assert_cmpint (bytes_read, ==, len);
1445   }
1446 
1447   DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Sending <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1448       "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1449       priv->conv, (unsigned)flags, seq, seq + len, priv->rcv_nxt, priv->rcv_wnd,
1450       now % 10000, priv->ts_recent % 10000, len);
1451 
1452   wres = priv->callbacks.WritePacket(self, (gchar *) buffer.u8, len + HEADER_SIZE,
1453                                      priv->callbacks.user_data);
1454   /* Note: When len is 0, this is an ACK packet.  We don't read the
1455      return value for those, and thus we won't retry.  So go ahead and treat
1456      the packet as a success (basically simulate as if it were dropped),
1457      which will prevent our timers from being messed up. */
1458   if ((wres != WR_SUCCESS) && (0 != len))
1459     return wres;
1460 
1461   priv->t_ack = 0;
1462   if (len > 0) {
1463     priv->lastsend = now;
1464   }
1465   priv->last_traffic = now;
1466   priv->bOutgoing = TRUE;
1467 
1468   return WR_SUCCESS;
1469 }
1470 
1471 static gboolean
parse(PseudoTcpSocket * self,const guint8 * _header_buf,gsize header_buf_len,const guint8 * data_buf,gsize data_buf_len)1472 parse (PseudoTcpSocket *self, const guint8 *_header_buf, gsize header_buf_len,
1473     const guint8 *data_buf, gsize data_buf_len)
1474 {
1475   Segment seg;
1476 
1477   union {
1478     const guint8 *u8;
1479     const guint16 *u16;
1480     const guint32 *u32;
1481   } header_buf;
1482 
1483   header_buf.u8 = _header_buf;
1484 
1485   if (header_buf_len != 24)
1486     return FALSE;
1487 
1488   seg.conv = ntohl(*header_buf.u32);
1489   seg.seq = ntohl(*(header_buf.u32 + 1));
1490   seg.ack = ntohl(*(header_buf.u32 + 2));
1491   seg.flags = header_buf.u8[13];
1492   seg.wnd = ntohs(*(header_buf.u16 + 7));
1493 
1494   seg.tsval = ntohl(*(header_buf.u32 + 4));
1495   seg.tsecr = ntohl(*(header_buf.u32 + 5));
1496 
1497   seg.data = (const gchar *) data_buf;
1498   seg.len = data_buf_len;
1499 
1500   DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1501       "Received <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1502       "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1503       seg.conv, (unsigned)seg.flags, seg.seq, seg.seq + seg.len, seg.ack,
1504       seg.wnd, seg.tsval % 10000, seg.tsecr % 10000, seg.len);
1505 
1506   return process(self, &seg);
1507 }
1508 
1509 /* True iff the @state requires that a FIN has already been sent by this
1510  * host. */
1511 static gboolean
pseudo_tcp_state_has_sent_fin(PseudoTcpState state)1512 pseudo_tcp_state_has_sent_fin (PseudoTcpState state)
1513 {
1514   switch (state) {
1515   case PSEUDO_TCP_LISTEN:
1516   case PSEUDO_TCP_SYN_SENT:
1517   case PSEUDO_TCP_SYN_RECEIVED:
1518   case PSEUDO_TCP_ESTABLISHED:
1519   case PSEUDO_TCP_CLOSE_WAIT:
1520     return FALSE;
1521   case PSEUDO_TCP_CLOSED:
1522   case PSEUDO_TCP_FIN_WAIT_1:
1523   case PSEUDO_TCP_FIN_WAIT_2:
1524   case PSEUDO_TCP_CLOSING:
1525   case PSEUDO_TCP_TIME_WAIT:
1526   case PSEUDO_TCP_LAST_ACK:
1527     return TRUE;
1528   default:
1529     return FALSE;
1530   }
1531 }
1532 
1533 /* True iff the @state requires that a FIN has already been received from the
1534  * peer. */
1535 static gboolean
pseudo_tcp_state_has_received_fin(PseudoTcpState state)1536 pseudo_tcp_state_has_received_fin (PseudoTcpState state)
1537 {
1538   switch (state) {
1539   case PSEUDO_TCP_LISTEN:
1540   case PSEUDO_TCP_SYN_SENT:
1541   case PSEUDO_TCP_SYN_RECEIVED:
1542   case PSEUDO_TCP_ESTABLISHED:
1543   case PSEUDO_TCP_FIN_WAIT_1:
1544   case PSEUDO_TCP_FIN_WAIT_2:
1545     return FALSE;
1546   case PSEUDO_TCP_CLOSED:
1547   case PSEUDO_TCP_CLOSING:
1548   case PSEUDO_TCP_TIME_WAIT:
1549   case PSEUDO_TCP_CLOSE_WAIT:
1550   case PSEUDO_TCP_LAST_ACK:
1551     return TRUE;
1552   default:
1553     return FALSE;
1554   }
1555 }
1556 
1557 /* True iff the @state requires that a FIN-ACK has already been received from
1558  * the peer. */
1559 static gboolean
pseudo_tcp_state_has_received_fin_ack(PseudoTcpState state)1560 pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state)
1561 {
1562   switch (state) {
1563   case PSEUDO_TCP_LISTEN:
1564   case PSEUDO_TCP_SYN_SENT:
1565   case PSEUDO_TCP_SYN_RECEIVED:
1566   case PSEUDO_TCP_ESTABLISHED:
1567   case PSEUDO_TCP_FIN_WAIT_1:
1568   case PSEUDO_TCP_FIN_WAIT_2:
1569   case PSEUDO_TCP_CLOSING:
1570   case PSEUDO_TCP_CLOSE_WAIT:
1571   case PSEUDO_TCP_LAST_ACK:
1572     return FALSE;
1573   case PSEUDO_TCP_CLOSED:
1574   case PSEUDO_TCP_TIME_WAIT:
1575     return TRUE;
1576   default:
1577     return FALSE;
1578   }
1579 }
1580 
1581 static gboolean
process(PseudoTcpSocket * self,Segment * seg)1582 process(PseudoTcpSocket *self, Segment *seg)
1583 {
1584   PseudoTcpSocketPrivate *priv = self->priv;
1585   guint32 now;
1586   SendFlags sflags = sfNone;
1587   gboolean bIgnoreData;
1588   gboolean bNewData;
1589   gboolean bConnect = FALSE;
1590   gsize snd_buffered;
1591   gsize available_space;
1592   guint32 kIdealRefillSize;
1593   gboolean is_valuable_ack, is_duplicate_ack, is_fin_ack = FALSE;
1594   gboolean received_fin = FALSE;
1595 
1596   /* If this is the wrong conversation, send a reset!?!
1597      (with the correct conversation?) */
1598   if (seg->conv != priv->conv) {
1599     //if ((seg->flags & FLAG_RST) == 0) {
1600     //  packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
1601     //}
1602     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "wrong conversation");
1603     return FALSE;
1604   }
1605 
1606   now = get_current_time (self);
1607   priv->last_traffic = priv->lastrecv = now;
1608   priv->bOutgoing = FALSE;
1609 
1610   if (priv->state == PSEUDO_TCP_CLOSED ||
1611       (pseudo_tcp_state_has_received_fin_ack (priv->state) && seg->len > 0)) {
1612     /* Send an RST segment. See: RFC 1122, §4.2.2.13; RFC 793, §3.4, point 3,
1613      * page 37. We can only send RST if we know the peer knows we’re closed;
1614      * otherwise this could be a timeout retransmit from them, due to our
1615      * packets from data through to FIN being dropped. */
1616     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1617         "Segment received while closed; sending RST.");
1618     if ((seg->flags & FLAG_RST) == 0) {
1619       closedown (self, 0, CLOSEDOWN_LOCAL);
1620     }
1621 
1622     return FALSE;
1623   }
1624 
1625   // Check if this is a reset segment
1626   if (seg->flags & FLAG_RST) {
1627     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Received RST segment; closing down.");
1628     closedown (self, ECONNRESET, CLOSEDOWN_REMOTE);
1629     return FALSE;
1630   }
1631 
1632   // Check for control data
1633   bConnect = FALSE;
1634   if (seg->flags & FLAG_CTL) {
1635     if (seg->len == 0) {
1636       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Missing control code");
1637       return FALSE;
1638     } else if (seg->data[0] == CTL_CONNECT) {
1639       bConnect = TRUE;
1640 
1641       parse_options (self, (guint8 *) &seg->data[1], seg->len - 1);
1642 
1643       if (priv->state == PSEUDO_TCP_LISTEN) {
1644         set_state (self, PSEUDO_TCP_SYN_RECEIVED);
1645         queue_connect_message (self);
1646       } else if (priv->state == PSEUDO_TCP_SYN_SENT) {
1647         set_state_established (self);
1648       }
1649     } else {
1650       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Unknown control code: %u", seg->data[0]);
1651       return FALSE;
1652     }
1653   }
1654 
1655   // Update timestamp
1656   if (SMALLER_OR_EQUAL (seg->seq, priv->ts_lastack) &&
1657       SMALLER (priv->ts_lastack, seg->seq + seg->len)) {
1658     priv->ts_recent = seg->tsval;
1659   }
1660 
1661   // Check if this is a valuable ack
1662   is_valuable_ack = (LARGER(seg->ack, priv->snd_una) &&
1663       SMALLER_OR_EQUAL(seg->ack, priv->snd_nxt));
1664   is_duplicate_ack = (seg->ack == priv->snd_una);
1665 
1666   if (is_valuable_ack) {
1667     guint32 nAcked;
1668     guint32 nFree;
1669 
1670     // Calculate round-trip time
1671     if (seg->tsecr) {
1672       long rtt = time_diff(now, seg->tsecr);
1673       if (rtt >= 0) {
1674         if (priv->rx_srtt == 0) {
1675           priv->rx_srtt = rtt;
1676           priv->rx_rttvar = rtt / 2;
1677         } else {
1678           priv->rx_rttvar = (3 * priv->rx_rttvar +
1679               labs((long)(rtt - priv->rx_srtt))) / 4;
1680           priv->rx_srtt = (7 * priv->rx_srtt + rtt) / 8;
1681         }
1682         priv->rx_rto = bound(MIN_RTO,
1683             priv->rx_srtt + max(1LU, 4 * priv->rx_rttvar), MAX_RTO);
1684 
1685         DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "rtt: %ld srtt: %u rttvar: %u rto: %u",
1686             rtt, priv->rx_srtt, priv->rx_rttvar, priv->rx_rto);
1687       } else {
1688         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid RTT: %ld", rtt);
1689         return FALSE;
1690       }
1691 
1692       priv->last_acked_ts = seg->tsecr;
1693     }
1694 
1695     priv->snd_wnd = seg->wnd << priv->swnd_scale;
1696 
1697     nAcked = seg->ack - priv->snd_una;
1698     priv->snd_una = seg->ack;
1699 
1700     priv->rto_base = (priv->snd_una == priv->snd_nxt) ? 0 : now;
1701 
1702     /* ACKs for FIN segments give an increment on nAcked, but there is no
1703      * corresponding byte to read because the FIN segment is empty (it just has
1704      * a sequence number). */
1705     if (nAcked == priv->sbuf.data_length + 1 &&
1706         pseudo_tcp_state_has_sent_fin (priv->state)) {
1707       is_fin_ack = TRUE;
1708       nAcked--;
1709     }
1710 
1711     pseudo_tcp_fifo_consume_read_data (&priv->sbuf, nAcked);
1712 
1713     for (nFree = nAcked; nFree > 0; ) {
1714       SSegment *data;
1715 
1716       g_assert_cmpuint (g_queue_get_length (&priv->slist), !=, 0);
1717       data = (SSegment *) g_queue_peek_head (&priv->slist);
1718 
1719       if (nFree < data->len) {
1720         data->len -= nFree;
1721         data->seq += nFree;
1722         nFree = 0;
1723       } else {
1724         if (data->len > priv->largest) {
1725           priv->largest = data->len;
1726         }
1727         nFree -= data->len;
1728         g_slice_free (SSegment, data);
1729         g_queue_pop_head (&priv->slist);
1730       }
1731     }
1732 
1733     if (priv->dup_acks >= 3) {
1734       if (LARGER_OR_EQUAL (priv->snd_una, priv->recover)) { // NewReno
1735         guint32 nInFlight = priv->snd_nxt - priv->snd_una;
1736         // (Fast Retransmit)
1737         priv->cwnd = min(priv->ssthresh,
1738             max (nInFlight, priv->mss) + priv->mss);
1739         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery cwnd=%d ssthresh=%d nInFlight=%d mss: %d", priv->cwnd, priv->ssthresh, nInFlight, priv->mss);
1740         priv->fast_recovery = FALSE;
1741         priv->dup_acks = 0;
1742       } else {
1743         int transmit_status;
1744 
1745         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1746         transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
1747         if (transmit_status != 0) {
1748           DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1749               "Error transmitting recovery retransmit segment. Closing down.");
1750           closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1751           return FALSE;
1752         }
1753         priv->cwnd += (nAcked > priv->mss ? priv->mss : 0) -
1754             min(nAcked, priv->cwnd);
1755       }
1756     } else {
1757       priv->dup_acks = 0;
1758       // Slow start, congestion avoidance
1759       if (priv->cwnd < priv->ssthresh) {
1760         priv->cwnd += priv->mss;
1761       } else {
1762         priv->cwnd += max(1LU, priv->mss * priv->mss / priv->cwnd);
1763       }
1764     }
1765   } else if (is_duplicate_ack) {
1766     /* !?! Note, tcp says don't do this... but otherwise how does a
1767        closed window become open? */
1768     priv->snd_wnd = seg->wnd << priv->swnd_scale;
1769 
1770     // Check duplicate acks
1771     if (seg->len > 0) {
1772       // it's a dup ack, but with a data payload, so don't modify priv->dup_acks
1773     } else if (priv->snd_una != priv->snd_nxt) {
1774       guint32 nInFlight;
1775 
1776       priv->dup_acks += 1;
1777       DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Received dup ack (dups: %u)",
1778           priv->dup_acks);
1779       if (priv->dup_acks == 3) { // (Fast Retransmit)
1780         int transmit_status;
1781 
1782 
1783         if (LARGER_OR_EQUAL (priv->snd_una, priv->recover) ||
1784             seg->tsecr == priv->last_acked_ts) { /* NewReno */
1785           /* Invoke fast retransmit  RFC3782 section 3 step 1A*/
1786           DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "enter recovery");
1787           DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1788 
1789           transmit_status = transmit(self, g_queue_peek_head (&priv->slist),
1790               now);
1791           if (transmit_status != 0) {
1792             DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1793                 "Error transmitting recovery retransmit segment. Closing down.");
1794 
1795             closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1796             return FALSE;
1797           }
1798           priv->recover = priv->snd_nxt;
1799           nInFlight = priv->snd_nxt - priv->snd_una;
1800           priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
1801           DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1802               "ssthresh: %u = max((nInFlight: %u / 2), 2 * mss: %u)",
1803               priv->ssthresh, nInFlight, priv->mss);
1804           priv->cwnd = priv->ssthresh + 3 * priv->mss;
1805           priv->fast_recovery = TRUE;
1806         } else {
1807           DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1808               "Skipping fast recovery: recover: %u snd_una: %u", priv->recover,
1809               priv->snd_una);
1810         }
1811       } else if (priv->dup_acks > 3) {
1812         if (priv->fast_recovery)
1813           priv->cwnd += priv->mss;
1814       }
1815     } else {
1816       priv->dup_acks = 0;
1817     }
1818   }
1819 
1820   // !?! A bit hacky
1821   if ((priv->state == PSEUDO_TCP_SYN_RECEIVED) && !bConnect) {
1822     set_state_established (self);
1823   }
1824 
1825   /* Check for connection closure. Only pay attention to FIN segments if they
1826    * are in sequence; otherwise we’ve missed a packet earlier in the stream and
1827    * need to request retransmission first. */
1828   if (priv->support_fin_ack) {
1829     /* @received_fin is set when, and only when, all segments preceding the FIN
1830      * have been acknowledged. This is to handle the case where the FIN arrives
1831      * out of order with a preceding data segment. */
1832     if (seg->flags & FLAG_FIN) {
1833       priv->rcv_fin = seg->seq;
1834       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting rcv_fin = %u", priv->rcv_fin);
1835     }
1836 
1837     /* For the moment, FIN segments must not contain data. */
1838     if (seg->flags & FLAG_FIN && seg->len != 0) {
1839       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN segment contained data; ignored");
1840       return FALSE;
1841     }
1842 
1843     received_fin = (priv->rcv_nxt != 0 && priv->rcv_nxt + seg->len == priv->rcv_fin);
1844 
1845     /* Update the state machine, implementing all transitions on ‘rcv FIN’ or
1846      * ‘rcv ACK of FIN’ from RFC 793, Figure 6; and RFC 1122, §4.2.2.8. */
1847     switch (priv->state) {
1848     case PSEUDO_TCP_ESTABLISHED:
1849       if (received_fin) {
1850         /* Received a FIN from the network, RFC 793, §3.5, Case 2.
1851          * The code below will send an ACK for the FIN. */
1852          set_state (self, PSEUDO_TCP_CLOSE_WAIT);
1853       }
1854       break;
1855     case PSEUDO_TCP_CLOSING:
1856       if (is_fin_ack) {
1857         /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 3. */
1858         set_state (self, PSEUDO_TCP_TIME_WAIT);
1859       }
1860       break;
1861     case PSEUDO_TCP_LAST_ACK:
1862       if (is_fin_ack) {
1863         /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 2. */
1864         set_state_closed (self, 0);
1865       }
1866       break;
1867     case PSEUDO_TCP_FIN_WAIT_1:
1868       if (is_fin_ack && received_fin) {
1869         /* Simultaneous close with an ACK for a FIN previously sent,
1870          * RFC 793, §3.5, Case 3. */
1871         set_state (self, PSEUDO_TCP_TIME_WAIT);
1872       } else if (is_fin_ack) {
1873         /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 1. */
1874         set_state (self, PSEUDO_TCP_FIN_WAIT_2);
1875       } else if (received_fin) {
1876         /* Simultaneous close, RFC 793, §3.5, Case 3. */
1877         set_state (self, PSEUDO_TCP_CLOSING);
1878       }
1879       break;
1880     case PSEUDO_TCP_FIN_WAIT_2:
1881       if (received_fin) {
1882         /* Local user closed the connection, RFC 793, §3.5, Case 1. */
1883         set_state (self, PSEUDO_TCP_TIME_WAIT);
1884       }
1885       break;
1886     case PSEUDO_TCP_LISTEN:
1887     case PSEUDO_TCP_SYN_SENT:
1888     case PSEUDO_TCP_SYN_RECEIVED:
1889     case PSEUDO_TCP_TIME_WAIT:
1890     case PSEUDO_TCP_CLOSED:
1891     case PSEUDO_TCP_CLOSE_WAIT:
1892       /* Shouldn’t ever hit these cases. */
1893       if (received_fin) {
1894         DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1895            "Unexpected state %u when FIN received", priv->state);
1896       } else if (is_fin_ack) {
1897         DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1898            "Unexpected state %u when FIN-ACK received", priv->state);
1899       }
1900       break;
1901     default:
1902       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid state %u when FIN received",
1903           priv->state);
1904       return FALSE;
1905     }
1906   } else if (seg->flags & FLAG_FIN) {
1907     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1908         "Invalid FIN received when FIN-ACK support is disabled");
1909   } else if (is_fin_ack) {
1910     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1911         "Invalid FIN-ACK received when FIN-ACK support is disabled");
1912   }
1913 
1914   // If we make room in the send queue, notify the user
1915   // The goal it to make sure we always have at least enough data to fill the
1916   // window.  We'd like to notify the app when we are halfway to that point.
1917   kIdealRefillSize = (priv->sbuf_len + priv->rbuf_len) / 2;
1918 
1919   snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1920   if (priv->bWriteEnable && snd_buffered < kIdealRefillSize) {
1921     priv->bWriteEnable = FALSE;
1922     if (priv->callbacks.PseudoTcpWritable)
1923       priv->callbacks.PseudoTcpWritable(self, priv->callbacks.user_data);
1924   }
1925 
1926   /* Conditions where acks must be sent:
1927    * 1) Segment is too old (they missed an ACK) (immediately)
1928    * 2) Segment is too new (we missed a segment) (immediately)
1929    * 3) Segment has data (so we need to ACK!) (delayed)
1930    * ... so the only time we don't need to ACK, is an empty segment
1931    * that points to rcv_nxt!
1932    * 4) Segment has the FIN flag set (immediately) — note that the FIN flag
1933    *    itself has to be included in the ACK as a numbered byte;
1934    *    see RFC 793, §3.3. Also see: RFC 793, §3.5.
1935    */
1936   if (seg->seq != priv->rcv_nxt) {
1937     sflags = sfDuplicateAck; // (Fast Recovery)
1938   } else if (seg->len != 0) {
1939     if (priv->ack_delay == 0) {
1940       sflags = sfImmediateAck;
1941     } else {
1942       sflags = sfDelayedAck;
1943     }
1944   } else if (received_fin) {
1945     /* FIN flags have a sequence number. Only acknowledge them after all
1946      * preceding octets have been acknowledged. */
1947     sflags = sfImmediateAck;
1948   }
1949 
1950   if (sflags == sfDuplicateAck) {
1951     if (seg->seq > priv->rcv_nxt) {
1952       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too new");
1953     } else if (SMALLER_OR_EQUAL(seg->seq + seg->len, priv->rcv_nxt)) {
1954       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too old");
1955     }
1956   }
1957 
1958   // Adjust the incoming segment to fit our receive buffer
1959   if (SMALLER(seg->seq, priv->rcv_nxt)) {
1960     guint32 nAdjust = priv->rcv_nxt - seg->seq;
1961     if (nAdjust < seg->len) {
1962       seg->seq += nAdjust;
1963       seg->data += nAdjust;
1964       seg->len -= nAdjust;
1965     } else {
1966       seg->len = 0;
1967     }
1968   }
1969 
1970   available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1971 
1972   if ((seg->seq + seg->len - priv->rcv_nxt) > available_space) {
1973     guint32 nAdjust = seg->seq + seg->len - priv->rcv_nxt - available_space;
1974     if (nAdjust < seg->len) {
1975       seg->len -= nAdjust;
1976     } else {
1977       seg->len = 0;
1978     }
1979   }
1980 
1981   bIgnoreData = (seg->flags & FLAG_CTL);
1982   if (!priv->support_fin_ack)
1983     bIgnoreData |= (priv->shutdown != SD_NONE);
1984 
1985   bNewData = FALSE;
1986 
1987   if (seg->len > 0) {
1988     if (bIgnoreData) {
1989       if (seg->seq == priv->rcv_nxt) {
1990         priv->rcv_nxt += seg->len;
1991       }
1992     } else {
1993       guint32 nOffset = seg->seq - priv->rcv_nxt;
1994       gsize res;
1995 
1996       res = pseudo_tcp_fifo_write_offset (&priv->rbuf, (guint8 *) seg->data,
1997           seg->len, nOffset);
1998       g_assert_cmpint (res, ==, seg->len);
1999 
2000       if (seg->seq == priv->rcv_nxt) {
2001         GList *iter = NULL;
2002 
2003         pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, seg->len);
2004         priv->rcv_nxt += seg->len;
2005         priv->rcv_wnd -= seg->len;
2006         bNewData = TRUE;
2007 
2008         iter = priv->rlist;
2009         while (iter &&
2010             SMALLER_OR_EQUAL(((RSegment *)iter->data)->seq, priv->rcv_nxt)) {
2011           RSegment *data = (RSegment *)(iter->data);
2012           if (LARGER (data->seq + data->len, priv->rcv_nxt)) {
2013             guint32 nAdjust = (data->seq + data->len) - priv->rcv_nxt;
2014             sflags = sfImmediateAck; // (Fast Recovery)
2015             DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Recovered %u bytes (%u -> %u)",
2016                 nAdjust, priv->rcv_nxt, priv->rcv_nxt + nAdjust);
2017             pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, nAdjust);
2018             priv->rcv_nxt += nAdjust;
2019             priv->rcv_wnd -= nAdjust;
2020           }
2021           g_slice_free (RSegment, priv->rlist->data);
2022           priv->rlist = g_list_delete_link (priv->rlist, priv->rlist);
2023           iter = priv->rlist;
2024         }
2025       } else {
2026         GList *iter = NULL;
2027         RSegment *rseg = g_slice_new0 (RSegment);
2028 
2029         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Saving %u bytes (%u -> %u)",
2030             seg->len, seg->seq, seg->seq + seg->len);
2031         rseg->seq = seg->seq;
2032         rseg->len = seg->len;
2033         iter = priv->rlist;
2034         while (iter && SMALLER (((RSegment*)iter->data)->seq, rseg->seq)) {
2035           iter = g_list_next (iter);
2036         }
2037         priv->rlist = g_list_insert_before(priv->rlist, iter, rseg);
2038       }
2039     }
2040   }
2041 
2042   if (received_fin) {
2043     /* FIN flags have a sequence number. */
2044     priv->rcv_nxt++;
2045   }
2046 
2047 
2048   attempt_send(self, sflags);
2049 
2050   // If we have new data, notify the user
2051   if (bNewData && priv->bReadEnable) {
2052     /* priv->bReadEnable = FALSE; — removed so that we’re always notified of
2053      * incoming pseudo-TCP data, rather than having to read the entire buffer
2054      * on each readable() callback before the next callback is enabled.
2055      * (When client-provided buffers are small, this is not possible.) */
2056     if (priv->callbacks.PseudoTcpReadable)
2057       priv->callbacks.PseudoTcpReadable(self, priv->callbacks.user_data);
2058   }
2059 
2060   return TRUE;
2061 }
2062 
2063 static gboolean
transmit(PseudoTcpSocket * self,SSegment * segment,guint32 now)2064 transmit(PseudoTcpSocket *self, SSegment *segment, guint32 now)
2065 {
2066   PseudoTcpSocketPrivate *priv = self->priv;
2067   guint32 nTransmit = min(segment->len, priv->mss);
2068 
2069   if (segment->xmit >= ((priv->state == PSEUDO_TCP_ESTABLISHED) ? 15 : 30)) {
2070     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too many retransmits");
2071     return ETIMEDOUT;
2072   }
2073 
2074   while (TRUE) {
2075     guint32 seq = segment->seq;
2076     guint8 flags = segment->flags;
2077     PseudoTcpWriteResult wres;
2078 
2079     /* The packet must not have already been acknowledged. */
2080     g_assert_cmpuint (segment->seq - priv->snd_una, <=, 1024 * 1024 * 64);
2081 
2082     /* Write out the packet. */
2083     wres = packet(self, seq, flags,
2084         segment->seq - priv->snd_una, nTransmit, now);
2085 
2086     if (wres == WR_SUCCESS)
2087       break;
2088 
2089     if (wres == WR_FAIL) {
2090       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "packet failed");
2091       return ECONNABORTED;  /* FIXME: This error code doesn’t quite seem right */
2092     }
2093 
2094     g_assert_cmpint (wres, ==, WR_TOO_LARGE);
2095 
2096     while (TRUE) {
2097       if (PACKET_MAXIMUMS[priv->msslevel + 1] == 0) {
2098         DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "MTU too small");
2099         return EMSGSIZE;
2100       }
2101       /* !?! We need to break up all outstanding and pending packets
2102          and then retransmit!?! */
2103 
2104       priv->mss = PACKET_MAXIMUMS[++priv->msslevel] - PACKET_OVERHEAD;
2105       // I added this... haven't researched actual formula
2106       priv->cwnd = 2 * priv->mss;
2107 
2108       if (priv->mss < nTransmit) {
2109         nTransmit = priv->mss;
2110         break;
2111       }
2112     }
2113     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes ", priv->mss);
2114   }
2115 
2116   if (nTransmit < segment->len) {
2117     SSegment *subseg = g_slice_new0 (SSegment);
2118     subseg->seq = segment->seq + nTransmit;
2119     subseg->len = segment->len - nTransmit;
2120     subseg->flags = segment->flags;
2121     subseg->xmit = segment->xmit;
2122 
2123     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "mss reduced to %u", priv->mss);
2124 
2125     segment->len = nTransmit;
2126     g_queue_insert_after (&priv->slist,
2127         g_queue_find (&priv->slist, segment), subseg);
2128     if (subseg->xmit == 0)
2129       g_queue_insert_after (&priv->unsent_slist,
2130           g_queue_find (&priv->unsent_slist, segment), subseg);
2131   }
2132 
2133   if (segment->xmit == 0) {
2134     g_assert (g_queue_peek_head (&priv->unsent_slist) == segment);
2135     g_queue_pop_head (&priv->unsent_slist);
2136     priv->snd_nxt += segment->len;
2137 
2138     /* FIN flags require acknowledgement. */
2139     if (segment->len == 0 && segment->flags & FLAG_FIN)
2140       priv->snd_nxt++;
2141   }
2142   segment->xmit += 1;
2143 
2144   if (priv->rto_base == 0) {
2145     priv->rto_base = now;
2146   }
2147 
2148   return 0;
2149 }
2150 
2151 static void
attempt_send(PseudoTcpSocket * self,SendFlags sflags)2152 attempt_send(PseudoTcpSocket *self, SendFlags sflags)
2153 {
2154   PseudoTcpSocketPrivate *priv = self->priv;
2155   guint32 now = get_current_time (self);
2156   gboolean bFirst = TRUE;
2157 
2158   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Attempting send with flags %u.", sflags);
2159 
2160   if (time_diff(now, priv->lastsend) > (long) priv->rx_rto) {
2161     priv->cwnd = priv->mss;
2162   }
2163 
2164 
2165   while (TRUE) {
2166     guint32 cwnd;
2167     guint32 nWindow;
2168     guint32 nInFlight;
2169     guint32 nUseable;
2170     guint32 nAvailable;
2171     gsize snd_buffered;
2172     GList *iter;
2173     SSegment *sseg;
2174     int transmit_status;
2175 
2176     cwnd = priv->cwnd;
2177     if ((priv->dup_acks == 1) || (priv->dup_acks == 2)) { // Limited Transmit
2178       cwnd += priv->dup_acks * priv->mss;
2179     }
2180     nWindow = min(priv->snd_wnd, cwnd);
2181     nInFlight = priv->snd_nxt - priv->snd_una;
2182     nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
2183     snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
2184     if (snd_buffered < nInFlight)  /* iff a FIN has been sent */
2185       nAvailable = 0;
2186     else
2187       nAvailable = min(snd_buffered - nInFlight, priv->mss);
2188 
2189     if (nAvailable > nUseable) {
2190       if (nUseable * 4 < nWindow) {
2191         // RFC 813 - avoid SWS
2192         nAvailable = 0;
2193       } else {
2194         nAvailable = nUseable;
2195       }
2196     }
2197 
2198     if (bFirst) {
2199       gsize available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2200 
2201       bFirst = FALSE;
2202       DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "[cwnd: %u  nWindow: %u  nInFlight: %u "
2203           "nAvailable: %u nQueued: %" G_GSIZE_FORMAT " nEmpty: %" G_GSIZE_FORMAT
2204           "  nWaiting: %zu ssthresh: %u]",
2205           priv->cwnd, nWindow, nInFlight, nAvailable, snd_buffered,
2206           available_space, snd_buffered - nInFlight, priv->ssthresh);
2207     }
2208 
2209     if (sflags == sfDuplicateAck) {
2210       packet(self, priv->snd_nxt, 0, 0, 0, now);
2211       sflags = sfNone;
2212       continue;
2213     }
2214 
2215     if (nAvailable == 0 && sflags != sfFin && sflags != sfRst) {
2216       if (sflags == sfNone)
2217         return;
2218 
2219       // If this is an immediate ack, or the second delayed ack
2220       if ((sflags == sfImmediateAck || sflags == sfDuplicateAck) ||
2221           priv->t_ack) {
2222         packet(self, priv->snd_nxt, 0, 0, 0, now);
2223       } else {
2224         priv->t_ack = now;
2225       }
2226       return;
2227     }
2228 
2229     // Nagle algorithm
2230     // If there is data already in-flight, and we haven't a full segment of
2231     // data ready to send then hold off until we get more to send, or the
2232     // in-flight data is acknowledged.
2233     if (priv->use_nagling && sflags != sfFin && sflags != sfRst &&
2234         (priv->snd_nxt > priv->snd_una) &&
2235         (nAvailable < priv->mss))  {
2236       return;
2237     }
2238 
2239     // Find the next segment to transmit
2240     iter = g_queue_peek_head_link (&priv->unsent_slist);
2241     if (iter == NULL)
2242       return;
2243     sseg = iter->data;
2244 
2245     // If the segment is too large, break it into two
2246     if (sseg->len > nAvailable && sflags != sfFin && sflags != sfRst) {
2247       SSegment *subseg = g_slice_new0 (SSegment);
2248       subseg->seq = sseg->seq + nAvailable;
2249       subseg->len = sseg->len - nAvailable;
2250       subseg->flags = sseg->flags;
2251 
2252       sseg->len = nAvailable;
2253       g_queue_insert_after (&priv->unsent_slist, iter, subseg);
2254       g_queue_insert_after (&priv->slist, g_queue_find (&priv->slist, sseg),
2255           subseg);
2256     }
2257 
2258     transmit_status = transmit(self, sseg, now);
2259     if (transmit_status != 0) {
2260       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "transmit failed");
2261 
2262       // TODO: Is this the right thing ?
2263       closedown (self, transmit_status, CLOSEDOWN_REMOTE);
2264       return;
2265     }
2266 
2267     if (sflags == sfImmediateAck || sflags == sfDelayedAck)
2268       sflags = sfNone;
2269   }
2270 }
2271 
2272 /* If @source is %CLOSEDOWN_REMOTE, don’t send an RST packet, since closedown()
2273  * has been called as a result of an RST segment being received.
2274  * See: RFC 1122, §4.2.2.13. */
2275 static void
closedown(PseudoTcpSocket * self,guint32 err,ClosedownSource source)2276 closedown (PseudoTcpSocket *self, guint32 err, ClosedownSource source)
2277 {
2278   PseudoTcpSocketPrivate *priv = self->priv;
2279 
2280   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing down socket %p with %s error %u.",
2281       self, (source == CLOSEDOWN_LOCAL) ? "local" : "remote", err);
2282 
2283   if (source == CLOSEDOWN_LOCAL && priv->support_fin_ack) {
2284     queue_rst_message (self);
2285     attempt_send (self, sfRst);
2286   } else if (source == CLOSEDOWN_LOCAL) {
2287     priv->shutdown = SD_FORCEFUL;
2288   }
2289 
2290   /* ‘Cute’ little navigation through the state machine to avoid breaking the
2291    * invariant that CLOSED can only be reached from TIME-WAIT or LAST-ACK. */
2292   switch (priv->state) {
2293   case PSEUDO_TCP_LISTEN:
2294   case PSEUDO_TCP_SYN_SENT:
2295     break;
2296   case PSEUDO_TCP_SYN_RECEIVED:
2297   case PSEUDO_TCP_ESTABLISHED:
2298     set_state (self, PSEUDO_TCP_FIN_WAIT_1);
2299     /* Fall through. */
2300   case PSEUDO_TCP_FIN_WAIT_1:
2301     set_state (self, PSEUDO_TCP_FIN_WAIT_2);
2302     /* Fall through. */
2303   case PSEUDO_TCP_FIN_WAIT_2:
2304   case PSEUDO_TCP_CLOSING:
2305     set_state (self, PSEUDO_TCP_TIME_WAIT);
2306     break;
2307   case PSEUDO_TCP_CLOSE_WAIT:
2308     set_state (self, PSEUDO_TCP_LAST_ACK);
2309     break;
2310   case PSEUDO_TCP_LAST_ACK:
2311   case PSEUDO_TCP_TIME_WAIT:
2312   case PSEUDO_TCP_CLOSED:
2313   default:
2314     break;
2315   }
2316 
2317   set_state_closed (self, err);
2318 }
2319 
2320 static void
adjustMTU(PseudoTcpSocket * self)2321 adjustMTU(PseudoTcpSocket *self)
2322 {
2323   PseudoTcpSocketPrivate *priv = self->priv;
2324 
2325   // Determine our current mss level, so that we can adjust appropriately later
2326   for (priv->msslevel = 0;
2327        PACKET_MAXIMUMS[priv->msslevel + 1] > 0;
2328        ++priv->msslevel) {
2329     if (((guint16)PACKET_MAXIMUMS[priv->msslevel]) <= priv->mtu_advise) {
2330       break;
2331     }
2332   }
2333   priv->mss = priv->mtu_advise - PACKET_OVERHEAD;
2334   // !?! Should we reset priv->largest here?
2335   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes", priv->mss);
2336   // Enforce minimums on ssthresh and cwnd
2337   priv->ssthresh = max(priv->ssthresh, 2 * priv->mss);
2338   priv->cwnd = max(priv->cwnd, priv->mss);
2339 }
2340 
2341 static void
apply_window_scale_option(PseudoTcpSocket * self,guint8 scale_factor)2342 apply_window_scale_option (PseudoTcpSocket *self, guint8 scale_factor)
2343 {
2344    PseudoTcpSocketPrivate *priv = self->priv;
2345 
2346    priv->swnd_scale = scale_factor;
2347    DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting scale factor to %u", scale_factor);
2348 }
2349 
2350 static void
apply_fin_ack_option(PseudoTcpSocket * self)2351 apply_fin_ack_option (PseudoTcpSocket *self)
2352 {
2353   PseudoTcpSocketPrivate *priv = self->priv;
2354 
2355   priv->support_fin_ack = TRUE;
2356 }
2357 
2358 static void
apply_option(PseudoTcpSocket * self,guint8 kind,const guint8 * data,guint32 len)2359 apply_option (PseudoTcpSocket *self, guint8 kind, const guint8 *data,
2360     guint32 len)
2361 {
2362   switch (kind) {
2363   case TCP_OPT_MSS:
2364     DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
2365         "Peer specified MSS option which is not supported.");
2366     // TODO: Implement.
2367     break;
2368   case TCP_OPT_WND_SCALE:
2369     // Window scale factor.
2370     // http://www.ietf.org/rfc/rfc1323.txt
2371     if (len != 1) {
2372       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid window scale option received.");
2373       return;
2374     }
2375     apply_window_scale_option(self, data[0]);
2376     break;
2377   case TCP_OPT_FIN_ACK:
2378     // FIN-ACK support.
2379     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN-ACK support enabled.");
2380     apply_fin_ack_option (self);
2381     break;
2382   case TCP_OPT_EOL:
2383   case TCP_OPT_NOOP:
2384     /* Nothing to do. */
2385     break;
2386   default:
2387     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid TCP option %u", kind);
2388     break;
2389   }
2390 }
2391 
2392 
2393 static void
parse_options(PseudoTcpSocket * self,const guint8 * data,guint32 len)2394 parse_options (PseudoTcpSocket *self, const guint8 *data, guint32 len)
2395 {
2396   PseudoTcpSocketPrivate *priv = self->priv;
2397   gboolean has_window_scaling_option = FALSE;
2398   gboolean has_fin_ack_option = FALSE;
2399   guint32 pos = 0;
2400 
2401   // See http://www.freesoft.org/CIE/Course/Section4/8.htm for
2402   // parsing the options list.
2403   while (pos < len) {
2404     guint8 kind = TCP_OPT_EOL;
2405     guint8 opt_len;
2406 
2407     if (len < pos + 1)
2408       return;
2409 
2410     kind = data[pos];
2411     pos++;
2412 
2413     if (kind == TCP_OPT_EOL) {
2414       // End of option list.
2415       break;
2416     } else if (kind == TCP_OPT_NOOP) {
2417       // No op.
2418       continue;
2419     }
2420 
2421     if (len < pos + 1)
2422       return;
2423 
2424     // Length of this option.
2425     opt_len = data[pos];
2426     pos++;
2427 
2428     if (len < pos + opt_len)
2429       return;
2430 
2431     // Content of this option.
2432     if (opt_len <= len - pos) {
2433       apply_option (self, kind, data + pos, opt_len);
2434       pos += opt_len;
2435     } else {
2436       DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid option length received.");
2437       return;
2438     }
2439 
2440     if (kind == TCP_OPT_WND_SCALE)
2441       has_window_scaling_option = TRUE;
2442     else if (kind == TCP_OPT_FIN_ACK)
2443       has_fin_ack_option = TRUE;
2444   }
2445 
2446   if (!has_window_scaling_option) {
2447     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support window scaling");
2448     if (priv->rwnd_scale > 0) {
2449       // Peer doesn't support TCP options and window scaling.
2450       // Revert receive buffer size to default value.
2451       resize_receive_buffer (self, DEFAULT_RCV_BUF_SIZE);
2452       priv->swnd_scale = 0;
2453     }
2454   }
2455 
2456   if (!has_fin_ack_option) {
2457     DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support FIN-ACK");
2458     priv->support_fin_ack = FALSE;
2459   }
2460 }
2461 
2462 static void
resize_send_buffer(PseudoTcpSocket * self,guint32 new_size)2463 resize_send_buffer (PseudoTcpSocket *self, guint32 new_size)
2464 {
2465   PseudoTcpSocketPrivate *priv = self->priv;
2466 
2467   priv->sbuf_len = new_size;
2468   pseudo_tcp_fifo_set_capacity (&priv->sbuf, new_size);
2469 }
2470 
2471 
2472 static void
resize_receive_buffer(PseudoTcpSocket * self,guint32 new_size)2473 resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size)
2474 {
2475   PseudoTcpSocketPrivate *priv = self->priv;
2476   guint8 scale_factor = 0;
2477   gboolean result;
2478   gsize available_space;
2479 
2480   if (priv->rbuf_len == new_size)
2481     return;
2482 
2483   // Determine the scale factor such that the scaled window size can fit
2484   // in a 16-bit unsigned integer.
2485   while (new_size > 0xFFFF) {
2486     ++scale_factor;
2487     new_size >>= 1;
2488   }
2489 
2490   // Determine the proper size of the buffer.
2491   new_size <<= scale_factor;
2492   result = pseudo_tcp_fifo_set_capacity (&priv->rbuf, new_size);
2493 
2494   // Make sure the new buffer is large enough to contain data in the old
2495   // buffer. This should always be true because this method is called either
2496   // before connection is established or when peers are exchanging connect
2497   // messages.
2498   g_assert (result);
2499   priv->rbuf_len = new_size;
2500   priv->rwnd_scale = scale_factor;
2501   priv->ssthresh = new_size;
2502 
2503   available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
2504   priv->rcv_wnd = available_space;
2505 }
2506 
2507 gint
pseudo_tcp_socket_get_available_bytes(PseudoTcpSocket * self)2508 pseudo_tcp_socket_get_available_bytes (PseudoTcpSocket *self)
2509 {
2510   PseudoTcpSocketPrivate *priv = self->priv;
2511 
2512   return pseudo_tcp_fifo_get_buffered (&priv->rbuf);
2513 }
2514 
2515 gboolean
pseudo_tcp_socket_can_send(PseudoTcpSocket * self)2516 pseudo_tcp_socket_can_send (PseudoTcpSocket *self)
2517 {
2518   return (pseudo_tcp_socket_get_available_send_space (self) > 0);
2519 }
2520 
2521 gsize
pseudo_tcp_socket_get_available_send_space(PseudoTcpSocket * self)2522 pseudo_tcp_socket_get_available_send_space (PseudoTcpSocket *self)
2523 {
2524   PseudoTcpSocketPrivate *priv = self->priv;
2525   gsize ret;
2526 
2527   if (!pseudo_tcp_state_has_sent_fin (priv->state)) {
2528     ret = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2529   } else {
2530     ret = 0;
2531   }
2532 
2533   if (ret == 0)
2534     priv->bWriteEnable = TRUE;
2535 
2536   return ret;
2537 }
2538 
2539 /* State names are capitalised and formatted as in RFC 793. */
2540 static const gchar *
pseudo_tcp_state_get_name(PseudoTcpState state)2541 pseudo_tcp_state_get_name (PseudoTcpState state)
2542 {
2543   switch (state) {
2544   case PSEUDO_TCP_LISTEN: return "LISTEN";
2545   case PSEUDO_TCP_SYN_SENT: return "SYN-SENT";
2546   case PSEUDO_TCP_SYN_RECEIVED: return "SYN-RECEIVED";
2547   case PSEUDO_TCP_ESTABLISHED: return "ESTABLISHED";
2548   case PSEUDO_TCP_CLOSED: return "CLOSED";
2549   case PSEUDO_TCP_FIN_WAIT_1: return "FIN-WAIT-1";
2550   case PSEUDO_TCP_FIN_WAIT_2: return "FIN-WAIT-2";
2551   case PSEUDO_TCP_CLOSING: return "CLOSING";
2552   case PSEUDO_TCP_TIME_WAIT: return "TIME-WAIT";
2553   case PSEUDO_TCP_CLOSE_WAIT: return "CLOSE-WAIT";
2554   case PSEUDO_TCP_LAST_ACK: return "LAST-ACK";
2555   default: return "UNKNOWN";
2556   }
2557 }
2558 
2559 static void
set_state(PseudoTcpSocket * self,PseudoTcpState new_state)2560 set_state (PseudoTcpSocket *self, PseudoTcpState new_state)
2561 {
2562   PseudoTcpSocketPrivate *priv = self->priv;
2563   PseudoTcpState old_state = priv->state;
2564 
2565   if (new_state == old_state)
2566     return;
2567 
2568   DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State %s → %s.",
2569       pseudo_tcp_state_get_name (old_state),
2570       pseudo_tcp_state_get_name (new_state));
2571 
2572   /* Check whether it’s a valid state transition. */
2573 #define TRANSITION(OLD, NEW) \
2574     (old_state == PSEUDO_TCP_##OLD && \
2575      new_state == PSEUDO_TCP_##NEW)
2576 
2577   /* Valid transitions. See: RFC 793, p23; RFC 1122, §4.2.2.8. */
2578   g_assert (/* RFC 793, p23. */
2579             TRANSITION (CLOSED, SYN_SENT) ||
2580             TRANSITION (SYN_SENT, CLOSED) ||
2581             TRANSITION (CLOSED, LISTEN) ||
2582             TRANSITION (LISTEN, CLOSED) ||
2583             TRANSITION (LISTEN, SYN_SENT) ||
2584             TRANSITION (LISTEN, SYN_RECEIVED) ||
2585             TRANSITION (SYN_SENT, SYN_RECEIVED) ||
2586             TRANSITION (SYN_RECEIVED, ESTABLISHED) ||
2587             TRANSITION (SYN_SENT, ESTABLISHED) ||
2588             TRANSITION (SYN_RECEIVED, FIN_WAIT_1) ||
2589             TRANSITION (ESTABLISHED, FIN_WAIT_1) ||
2590             TRANSITION (ESTABLISHED, CLOSE_WAIT) ||
2591             TRANSITION (FIN_WAIT_1, FIN_WAIT_2) ||
2592             TRANSITION (FIN_WAIT_1, CLOSING) ||
2593             TRANSITION (CLOSE_WAIT, LAST_ACK) ||
2594             TRANSITION (FIN_WAIT_2, TIME_WAIT) ||
2595             TRANSITION (CLOSING, TIME_WAIT) ||
2596             TRANSITION (LAST_ACK, CLOSED) ||
2597             TRANSITION (TIME_WAIT, CLOSED) ||
2598             /* RFC 1122, §4.2.2.8. */
2599             TRANSITION (SYN_RECEIVED, LISTEN) ||
2600             TRANSITION (FIN_WAIT_1, TIME_WAIT));
2601 
2602 #undef TRANSITION
2603 
2604   priv->state = new_state;
2605 }
2606 
2607 static void
set_state_established(PseudoTcpSocket * self)2608 set_state_established (PseudoTcpSocket *self)
2609 {
2610   PseudoTcpSocketPrivate *priv = self->priv;
2611 
2612   set_state (self, PSEUDO_TCP_ESTABLISHED);
2613 
2614   adjustMTU (self);
2615   if (priv->callbacks.PseudoTcpOpened)
2616     priv->callbacks.PseudoTcpOpened (self, priv->callbacks.user_data);
2617 }
2618 
2619 /* (err == 0) means no error. */
2620 static void
set_state_closed(PseudoTcpSocket * self,guint32 err)2621 set_state_closed (PseudoTcpSocket *self, guint32 err)
2622 {
2623   PseudoTcpSocketPrivate *priv = self->priv;
2624 
2625   set_state (self, PSEUDO_TCP_CLOSED);
2626 
2627   /* Only call the callback if there was an error. */
2628   if (priv->callbacks.PseudoTcpClosed && err != 0)
2629     priv->callbacks.PseudoTcpClosed (self, err, priv->callbacks.user_data);
2630 }
2631 
2632 gboolean
pseudo_tcp_socket_is_closed(PseudoTcpSocket * self)2633 pseudo_tcp_socket_is_closed (PseudoTcpSocket *self)
2634 {
2635   PseudoTcpSocketPrivate *priv = self->priv;
2636 
2637   return (priv->state == PSEUDO_TCP_CLOSED);
2638 }
2639 
2640 gboolean
pseudo_tcp_socket_is_closed_remotely(PseudoTcpSocket * self)2641 pseudo_tcp_socket_is_closed_remotely (PseudoTcpSocket *self)
2642 {
2643   PseudoTcpSocketPrivate *priv = self->priv;
2644 
2645   return pseudo_tcp_state_has_received_fin (priv->state);
2646 }
2647