1 /*
2 * This file is part of the Nice GLib ICE library.
3 *
4 * (C) 2010, 2014 Collabora Ltd.
5 * Contact: Philip Withnall
6
7 *
8 * The contents of this file are subject to the Mozilla Public License Version
9 * 1.1 (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
11 * http://www.mozilla.org/MPL/
12 *
13 * Software distributed under the License is distributed on an "AS IS" basis,
14 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
15 * for the specific language governing rights and limitations under the
16 * License.
17 *
18 * The Original Code is the Nice GLib ICE library.
19 *
20 * The Initial Developers of the Original Code are Collabora Ltd and Nokia
21 * Corporation. All Rights Reserved.
22 *
23 * Contributors:
24 * Youness Alaoui, Collabora Ltd.
25 * Philip Withnall, Collabora Ltd.
26 *
27 * Alternatively, the contents of this file may be used under the terms of the
28 * the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
29 * case the provisions of LGPL are applicable instead of those above. If you
30 * wish to allow use of your version of this file only under the terms of the
31 * LGPL and not to allow others to use your version of this file under the
32 * MPL, indicate your decision by deleting the provisions above and replace
33 * them with the notice and other provisions required by the LGPL. If you do
34 * not delete the provisions above, a recipient may use your version of this
35 * file under either the MPL or the LGPL.
36 */
37
38 /* Reproducing license from libjingle for copied code */
39
40 /*
41 * libjingle
42 * Copyright 2004--2005, Google Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions are met:
46 *
47 * 1. Redistributions of source code must retain the above copyright notice,
48 * this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright notice,
50 * this list of conditions and the following disclaimer in the documentation
51 * and/or other materials provided with the distribution.
52 * 3. The name of the author may not be used to endorse or promote products
53 * derived from this software without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
56 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
57 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
58 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65 */
66
67 #include <stdlib.h>
68 #include <errno.h>
69 #include <string.h>
70
71 #include <glib.h>
72
73 #ifndef G_OS_WIN32
74 # include <arpa/inet.h>
75 #endif
76
77 #include "pseudotcp.h"
78 #include "agent-priv.h"
79
80 struct _PseudoTcpSocketClass {
81 GObjectClass parent_class;
82 };
83
84 typedef struct _PseudoTcpSocketPrivate PseudoTcpSocketPrivate;
85
86
87 struct _PseudoTcpSocket {
88 GObject parent;
89 PseudoTcpSocketPrivate *priv;
90 };
91
92 G_DEFINE_TYPE (PseudoTcpSocket, pseudo_tcp_socket, G_TYPE_OBJECT);
93
94 //////////////////////////////////////////////////////////////////////
95 // Network Constants
96 //////////////////////////////////////////////////////////////////////
97
98 // Standard MTUs
99 const guint16 PACKET_MAXIMUMS[] = {
100 65535, // Theoretical maximum, Hyperchannel
101 32000, // Nothing
102 17914, // 16Mb IBM Token Ring
103 8166, // IEEE 802.4
104 //4464, // IEEE 802.5 (4Mb max)
105 4352, // FDDI
106 //2048, // Wideband Network
107 2002, // IEEE 802.5 (4Mb recommended)
108 //1536, // Expermental Ethernet Networks
109 //1500, // Ethernet, Point-to-Point (default)
110 1492, // IEEE 802.3
111 1006, // SLIP, ARPANET
112 //576, // X.25 Networks
113 //544, // DEC IP Portal
114 //512, // NETBIOS
115 508, // IEEE 802/Source-Rt Bridge, ARCNET
116 296, // Point-to-Point (low delay)
117 //68, // Official minimum
118 0, // End of list marker
119 };
120
121 // FIXME: This is a reasonable MTU, but we should get it from the lower layer
122 #define DEF_MTU 1400
123 #define MAX_PACKET 65532
124 // Note: we removed lowest level because packet overhead was larger!
125 #define MIN_PACKET 296
126
127 // (+ up to 40 bytes of options?)
128 #define IP_HEADER_SIZE 20
129 #define ICMP_HEADER_SIZE 8
130 #define UDP_HEADER_SIZE 8
131 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
132 // when relay framing is in use
133 #define JINGLE_HEADER_SIZE 64
134
135 //////////////////////////////////////////////////////////////////////
136 // Global Constants and Functions
137 //////////////////////////////////////////////////////////////////////
138 //
139 // 0 1 2 3
140 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
141 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
142 // 0 | Conversation Number |
143 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
144 // 4 | Sequence Number |
145 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
146 // 8 | Acknowledgment Number |
147 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
148 // | | |U|A|P|R|S|F| |
149 // 12 | Control | |R|C|S|S|Y|I| Window |
150 // | | |G|K|H|T|N|N| |
151 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
152 // 16 | Timestamp sending |
153 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
154 // 20 | Timestamp receiving |
155 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
156 // 24 | data |
157 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
158 //
159 //////////////////////////////////////////////////////////////////////
160
161 #define MAX_SEQ 0xFFFFFFFF
162 #define HEADER_SIZE 24
163
164 #define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
165 IP_HEADER_SIZE + JINGLE_HEADER_SIZE)
166
167 // MIN_RTO = 1 second (RFC6298, Sec 2.4)
168 #define MIN_RTO 1000
169 #define DEF_RTO 1000 /* 1 seconds (RFC 6298 sect 2.1) */
170 #define MAX_RTO 60000 /* 60 seconds */
171 #define DEFAULT_ACK_DELAY 100 /* 100 milliseconds */
172 #define DEFAULT_NO_DELAY FALSE
173
174 #define DEFAULT_RCV_BUF_SIZE (60 * 1024)
175 #define DEFAULT_SND_BUF_SIZE (90 * 1024)
176
177 /* NOTE: This must fit in 8 bits. This is used on the wire. */
178 typedef enum {
179 /* Google-provided options: */
180 TCP_OPT_EOL = 0, /* end of list */
181 TCP_OPT_NOOP = 1, /* no-op */
182 TCP_OPT_MSS = 2, /* maximum segment size */
183 TCP_OPT_WND_SCALE = 3, /* window scale factor */
184 /* libnice extensions: */
185 TCP_OPT_FIN_ACK = 254, /* FIN-ACK support */
186 } TcpOption;
187
188
189 /*
190 #define FLAG_SYN 0x02
191 #define FLAG_ACK 0x10
192 */
193
194 /* NOTE: This must fit in 5 bits. This is used on the wire. */
195 typedef enum {
196 FLAG_NONE = 0,
197 FLAG_FIN = 1 << 0,
198 FLAG_CTL = 1 << 1,
199 FLAG_RST = 1 << 2,
200 } TcpFlags;
201
202 #define CTL_CONNECT 0
203 //#define CTL_REDIRECT 1
204 #define CTL_EXTRA 255
205
206
207 #define CTRL_BOUND 0x80000000
208
209 /* Maximum segment lifetime (1 minute).
210 * RFC 793, §3.3 specifies 2 minutes; but Linux uses 1 minute, so let’s go with
211 * that. */
212 #define TCP_MSL (60 * 1000)
213
214 // If there are no pending clocks, wake up every 4 seconds
215 #define DEFAULT_TIMEOUT 4000
216 // If the connection is closed, once per minute
217 #define CLOSED_TIMEOUT (60 * 1000)
218 /* Timeout after reaching the TIME_WAIT state, in milliseconds.
219 * See: RFC 1122, §4.2.2.13.
220 *
221 * XXX: Since we can control the underlying layer’s channel ID, we can guarantee
222 * delayed segments won’t affect subsequent connections, so can radically
223 * shorten the TIME-WAIT timeout (to the extent that it basically doesn’t
224 * exist). It would normally be (2 * TCP_MSL). */
225 #define TIME_WAIT_TIMEOUT 1
226
227 //////////////////////////////////////////////////////////////////////
228 // Helper Functions
229 //////////////////////////////////////////////////////////////////////
230 #ifndef G_OS_WIN32
231 # define min(first, second) ((first) < (second) ? (first) : (second))
232 # define max(first, second) ((first) > (second) ? (first) : (second))
233 #endif
234
235 static guint32
bound(guint32 lower,guint32 middle,guint32 upper)236 bound(guint32 lower, guint32 middle, guint32 upper)
237 {
238 return min (max (lower, middle), upper);
239 }
240
241 static gboolean
time_is_between(guint32 later,guint32 middle,guint32 earlier)242 time_is_between(guint32 later, guint32 middle, guint32 earlier)
243 {
244 if (earlier <= later) {
245 return ((earlier <= middle) && (middle <= later));
246 } else {
247 return !((later < middle) && (middle < earlier));
248 }
249 }
250
251 static gint32
time_diff(guint32 later,guint32 earlier)252 time_diff(guint32 later, guint32 earlier)
253 {
254 guint32 LAST = 0xFFFFFFFF;
255 guint32 HALF = 0x80000000;
256 if (time_is_between(earlier + HALF, later, earlier)) {
257 if (earlier <= later) {
258 return (long)(later - earlier);
259 } else {
260 return (long)(later + (LAST - earlier) + 1);
261 }
262 } else {
263 if (later <= earlier) {
264 return -(long) (earlier - later);
265 } else {
266 return -(long)(earlier + (LAST - later) + 1);
267 }
268 }
269 }
270
271 ////////////////////////////////////////////////////////
272 // PseudoTcpFifo works exactly like FifoBuffer in libjingle
273 ////////////////////////////////////////////////////////
274
275
276 typedef struct {
277 guint8 *buffer;
278 gsize buffer_length;
279 gsize data_length;
280 gsize read_position;
281 } PseudoTcpFifo;
282
283
284 static void
pseudo_tcp_fifo_init(PseudoTcpFifo * b,gsize size)285 pseudo_tcp_fifo_init (PseudoTcpFifo *b, gsize size)
286 {
287 b->buffer = g_slice_alloc (size);
288 b->buffer_length = size;
289 }
290
291 static void
pseudo_tcp_fifo_clear(PseudoTcpFifo * b)292 pseudo_tcp_fifo_clear (PseudoTcpFifo *b)
293 {
294 if (b->buffer)
295 g_slice_free1 (b->buffer_length, b->buffer);
296 b->buffer = NULL;
297 b->buffer_length = 0;
298 }
299
300 static gsize
pseudo_tcp_fifo_get_buffered(PseudoTcpFifo * b)301 pseudo_tcp_fifo_get_buffered (PseudoTcpFifo *b)
302 {
303 return b->data_length;
304 }
305
306 static gboolean
pseudo_tcp_fifo_set_capacity(PseudoTcpFifo * b,gsize size)307 pseudo_tcp_fifo_set_capacity (PseudoTcpFifo *b, gsize size)
308 {
309 if (b->data_length > size)
310 return FALSE;
311
312 if (size != b->data_length) {
313 guint8 *buffer = g_slice_alloc (size);
314 gsize copy = b->data_length;
315 gsize tail_copy = min (copy, b->buffer_length - b->read_position);
316
317 memcpy (buffer, &b->buffer[b->read_position], tail_copy);
318 memcpy (buffer + tail_copy, &b->buffer[0], copy - tail_copy);
319 g_slice_free1 (b->buffer_length, b->buffer);
320 b->buffer = buffer;
321 b->buffer_length = size;
322 b->read_position = 0;
323 }
324
325 return TRUE;
326 }
327
328 static void
pseudo_tcp_fifo_consume_read_data(PseudoTcpFifo * b,gsize size)329 pseudo_tcp_fifo_consume_read_data (PseudoTcpFifo *b, gsize size)
330 {
331 g_assert_cmpint (size, <=, b->data_length);
332
333 b->read_position = (b->read_position + size) % b->buffer_length;
334 b->data_length -= size;
335 }
336
337 static void
pseudo_tcp_fifo_consume_write_buffer(PseudoTcpFifo * b,gsize size)338 pseudo_tcp_fifo_consume_write_buffer (PseudoTcpFifo *b, gsize size)
339 {
340 g_assert_cmpint (size, <=, b->buffer_length - b->data_length);
341
342 b->data_length += size;
343 }
344
345 static gsize
pseudo_tcp_fifo_get_write_remaining(PseudoTcpFifo * b)346 pseudo_tcp_fifo_get_write_remaining (PseudoTcpFifo *b)
347 {
348 return b->buffer_length - b->data_length;
349 }
350
351 static gsize
pseudo_tcp_fifo_read_offset(PseudoTcpFifo * b,guint8 * buffer,gsize bytes,gsize offset)352 pseudo_tcp_fifo_read_offset (PseudoTcpFifo *b, guint8 *buffer, gsize bytes,
353 gsize offset)
354 {
355 gsize available = b->data_length - offset;
356 gsize read_position = (b->read_position + offset) % b->buffer_length;
357 gsize copy = min (bytes, available);
358 gsize tail_copy = min(copy, b->buffer_length - read_position);
359
360 /* EOS */
361 if (offset >= b->data_length)
362 return 0;
363
364 memcpy(buffer, &b->buffer[read_position], tail_copy);
365 memcpy(buffer + tail_copy, &b->buffer[0], copy - tail_copy);
366
367 return copy;
368 }
369
370 static gsize
pseudo_tcp_fifo_write_offset(PseudoTcpFifo * b,const guint8 * buffer,gsize bytes,gsize offset)371 pseudo_tcp_fifo_write_offset (PseudoTcpFifo *b, const guint8 *buffer,
372 gsize bytes, gsize offset)
373 {
374 gsize available = b->buffer_length - b->data_length - offset;
375 gsize write_position = (b->read_position + b->data_length + offset)
376 % b->buffer_length;
377 gsize copy = min (bytes, available);
378 gsize tail_copy = min(copy, b->buffer_length - write_position);
379
380 if (b->data_length + offset >= b->buffer_length) {
381 return 0;
382 }
383
384 memcpy(&b->buffer[write_position], buffer, tail_copy);
385 memcpy(&b->buffer[0], buffer + tail_copy, copy - tail_copy);
386
387 return copy;
388 }
389
390 static gsize
pseudo_tcp_fifo_read(PseudoTcpFifo * b,guint8 * buffer,gsize bytes)391 pseudo_tcp_fifo_read (PseudoTcpFifo *b, guint8 *buffer, gsize bytes)
392 {
393 gsize copy;
394
395 copy = pseudo_tcp_fifo_read_offset (b, buffer, bytes, 0);
396
397 b->read_position = (b->read_position + copy) % b->buffer_length;
398 b->data_length -= copy;
399
400 return copy;
401 }
402
403 static gsize
pseudo_tcp_fifo_write(PseudoTcpFifo * b,const guint8 * buffer,gsize bytes)404 pseudo_tcp_fifo_write (PseudoTcpFifo *b, const guint8 *buffer, gsize bytes)
405 {
406 gsize copy;
407
408 copy = pseudo_tcp_fifo_write_offset (b, buffer, bytes, 0);
409 b->data_length += copy;
410
411 return copy;
412 }
413
414
415 //////////////////////////////////////////////////////////////////////
416 // PseudoTcp
417 //////////////////////////////////////////////////////////////////////
418
419 /* Only used if FIN-ACK support is disabled. */
420 typedef enum {
421 SD_NONE,
422 SD_GRACEFUL,
423 SD_FORCEFUL
424 } Shutdown;
425
426 typedef enum {
427 sfNone,
428 sfDelayedAck,
429 sfImmediateAck,
430 sfFin,
431 sfRst,
432 sfDuplicateAck,
433 } SendFlags;
434
435 typedef struct {
436 guint32 conv, seq, ack;
437 TcpFlags flags;
438 guint16 wnd;
439 const gchar * data;
440 guint32 len;
441 guint32 tsval, tsecr;
442 } Segment;
443
444 typedef struct {
445 guint32 seq, len;
446 guint8 xmit;
447 TcpFlags flags;
448 } SSegment;
449
450 typedef struct {
451 guint32 seq, len;
452 } RSegment;
453
454 /**
455 * ClosedownSource:
456 * @CLOSEDOWN_LOCAL: Error detected locally, or connection forcefully closed
457 * locally.
458 * @CLOSEDOWN_REMOTE: RST segment received from the peer.
459 *
460 * Reasons for calling closedown().
461 *
462 * Since: 0.1.8
463 */
464 typedef enum {
465 CLOSEDOWN_LOCAL,
466 CLOSEDOWN_REMOTE,
467 } ClosedownSource;
468
469
470 struct _PseudoTcpSocketPrivate {
471 PseudoTcpCallbacks callbacks;
472
473 Shutdown shutdown; /* only used if !support_fin_ack */
474 gboolean shutdown_reads;
475 gint error;
476
477 // TCB data
478 PseudoTcpState state;
479 guint32 conv;
480 gboolean bReadEnable, bWriteEnable, bOutgoing;
481 guint32 last_traffic;
482
483 // Incoming data
484 GList *rlist;
485 guint32 rbuf_len, rcv_nxt, rcv_wnd, lastrecv;
486 guint8 rwnd_scale; // Window scale factor
487 PseudoTcpFifo rbuf;
488 guint32 rcv_fin; /* sequence number of the received FIN octet, or 0 */
489
490 // Outgoing data
491 GQueue slist;
492 GQueue unsent_slist;
493 guint32 sbuf_len, snd_nxt, snd_wnd, lastsend;
494 guint32 snd_una; /* oldest unacknowledged sequence number */
495 guint8 swnd_scale; // Window scale factor
496 PseudoTcpFifo sbuf;
497
498 // Maximum segment size, estimated protocol level, largest segment sent
499 guint32 mss, msslevel, largest, mtu_advise;
500 // Retransmit timer
501 guint32 rto_base;
502
503 // Timestamp tracking
504 guint32 ts_recent, ts_lastack;
505
506 // Round-trip calculation
507 guint32 rx_rttvar, rx_srtt, rx_rto;
508
509 // Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
510 guint32 ssthresh, cwnd;
511 guint8 dup_acks;
512 guint32 recover;
513 gboolean fast_recovery;
514 guint32 t_ack; /* time a delayed ack was scheduled; 0 if no acks scheduled */
515 guint32 last_acked_ts;
516
517 gboolean use_nagling;
518 guint32 ack_delay;
519
520 // This is used by unit tests to test backward compatibility of
521 // PseudoTcp implementations that don't support window scaling.
522 gboolean support_wnd_scale;
523
524 /* Current time. Typically only used for testing, when non-zero. When zero,
525 * the system monotonic clock is used. Units: monotonic milliseconds. */
526 guint32 current_time;
527
528 /* This is used by compatible implementations (with the TCP_OPT_FIN_ACK
529 * option) to enable correct FIN-ACK connection termination. Defaults to
530 * TRUE unless no compatible option is received. */
531 gboolean support_fin_ack;
532 };
533
534 #define LARGER(a,b) (((a) - (b) - 1) < (G_MAXUINT32 >> 1))
535 #define LARGER_OR_EQUAL(a,b) (((a) - (b)) < (G_MAXUINT32 >> 1))
536 #define SMALLER(a,b) LARGER ((b),(a))
537 #define SMALLER_OR_EQUAL(a,b) LARGER_OR_EQUAL ((b),(a))
538
539 /* properties */
540 enum
541 {
542 PROP_CONVERSATION = 1,
543 PROP_CALLBACKS,
544 PROP_STATE,
545 PROP_ACK_DELAY,
546 PROP_NO_DELAY,
547 PROP_RCV_BUF,
548 PROP_SND_BUF,
549 PROP_SUPPORT_FIN_ACK,
550 LAST_PROPERTY
551 };
552
553
554 static void pseudo_tcp_socket_get_property (GObject *object, guint property_id,
555 GValue *value, GParamSpec *pspec);
556 static void pseudo_tcp_socket_set_property (GObject *object, guint property_id,
557 const GValue *value, GParamSpec *pspec);
558 static void pseudo_tcp_socket_finalize (GObject *object);
559
560
561 static void queue_connect_message (PseudoTcpSocket *self);
562 static guint32 queue (PseudoTcpSocket *self, const gchar *data,
563 guint32 len, TcpFlags flags);
564 static PseudoTcpWriteResult packet(PseudoTcpSocket *self, guint32 seq,
565 TcpFlags flags, guint32 offset, guint32 len, guint32 now);
566 static gboolean parse (PseudoTcpSocket *self,
567 const guint8 *_header_buf, gsize header_buf_len,
568 const guint8 *data_buf, gsize data_buf_len);
569 static gboolean process(PseudoTcpSocket *self, Segment *seg);
570 static int transmit(PseudoTcpSocket *self, SSegment *sseg, guint32 now);
571 static void attempt_send(PseudoTcpSocket *self, SendFlags sflags);
572 static void closedown (PseudoTcpSocket *self, guint32 err,
573 ClosedownSource source);
574 static void adjustMTU(PseudoTcpSocket *self);
575 static void parse_options (PseudoTcpSocket *self, const guint8 *data,
576 guint32 len);
577 static void resize_send_buffer (PseudoTcpSocket *self, guint32 new_size);
578 static void resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size);
579 static void set_state (PseudoTcpSocket *self, PseudoTcpState new_state);
580 static void set_state_established (PseudoTcpSocket *self);
581 static void set_state_closed (PseudoTcpSocket *self, guint32 err);
582
583 static const gchar *pseudo_tcp_state_get_name (PseudoTcpState state);
584 static gboolean pseudo_tcp_state_has_sent_fin (PseudoTcpState state);
585 static gboolean pseudo_tcp_state_has_received_fin (PseudoTcpState state);
586 static gboolean pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state);
587
588 // The following logging is for detailed (packet-level) pseudotcp analysis only.
589 static PseudoTcpDebugLevel debug_level = PSEUDO_TCP_DEBUG_NONE;
590
591 #define DEBUG(level, fmt, ...) \
592 if (debug_level >= level) \
593 g_log (level == PSEUDO_TCP_DEBUG_NORMAL ? "libnice-pseudotcp" : "libnice-pseudotcp-verbose", G_LOG_LEVEL_DEBUG, "PseudoTcpSocket %p %s: " fmt, \
594 self, pseudo_tcp_state_get_name (self->priv->state), ## __VA_ARGS__)
595
596 void
pseudo_tcp_set_debug_level(PseudoTcpDebugLevel level)597 pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level)
598 {
599 debug_level = level;
600 }
601
602 static guint32
get_current_time(PseudoTcpSocket * socket)603 get_current_time (PseudoTcpSocket *socket)
604 {
605 if (G_UNLIKELY (socket->priv->current_time != 0))
606 return socket->priv->current_time;
607
608 return g_get_monotonic_time () / 1000;
609 }
610
611 void
pseudo_tcp_socket_set_time(PseudoTcpSocket * self,guint32 current_time)612 pseudo_tcp_socket_set_time (PseudoTcpSocket *self, guint32 current_time)
613 {
614 self->priv->current_time = current_time;
615 }
616
617 static void
pseudo_tcp_socket_class_init(PseudoTcpSocketClass * cls)618 pseudo_tcp_socket_class_init (PseudoTcpSocketClass *cls)
619 {
620 GObjectClass *object_class = G_OBJECT_CLASS (cls);
621
622 object_class->get_property = pseudo_tcp_socket_get_property;
623 object_class->set_property = pseudo_tcp_socket_set_property;
624 object_class->finalize = pseudo_tcp_socket_finalize;
625
626 g_object_class_install_property (object_class, PROP_CONVERSATION,
627 g_param_spec_uint ("conversation", "TCP Conversation ID",
628 "The TCP Conversation ID",
629 0, G_MAXUINT32, 0,
630 G_PARAM_CONSTRUCT_ONLY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
631
632 g_object_class_install_property (object_class, PROP_CALLBACKS,
633 g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
634 "Structure with the callbacks to call when PseudoTcp events happen",
635 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
636
637 g_object_class_install_property (object_class, PROP_STATE,
638 g_param_spec_uint ("state", "PseudoTcp State",
639 "The current state (enum PseudoTcpState) of the PseudoTcp socket",
640 PSEUDO_TCP_LISTEN, PSEUDO_TCP_CLOSED, PSEUDO_TCP_LISTEN,
641 G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
642
643 g_object_class_install_property (object_class, PROP_ACK_DELAY,
644 g_param_spec_uint ("ack-delay", "ACK Delay",
645 "Delayed ACK timeout (in milliseconds)",
646 0, G_MAXUINT, DEFAULT_ACK_DELAY,
647 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
648
649 g_object_class_install_property (object_class, PROP_NO_DELAY,
650 g_param_spec_boolean ("no-delay", "No Delay",
651 "Disable the Nagle algorithm (like the TCP_NODELAY option)",
652 DEFAULT_NO_DELAY,
653 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
654
655 g_object_class_install_property (object_class, PROP_RCV_BUF,
656 g_param_spec_uint ("rcv-buf", "Receive Buffer",
657 "Receive Buffer size",
658 1, G_MAXUINT, DEFAULT_RCV_BUF_SIZE,
659 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
660
661 g_object_class_install_property (object_class, PROP_SND_BUF,
662 g_param_spec_uint ("snd-buf", "Send Buffer",
663 "Send Buffer size",
664 1, G_MAXUINT, DEFAULT_SND_BUF_SIZE,
665 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
666
667 /**
668 * PseudoTcpSocket:support-fin-ack:
669 *
670 * Whether to support the FIN–ACK extension to the pseudo-TCP protocol for
671 * this socket. The extension is only compatible with other libnice pseudo-TCP
672 * stacks, and not with Jingle pseudo-TCP stacks. If enabled, support is
673 * negotiatied on connection setup, so it is safe for a #PseudoTcpSocket with
674 * support enabled to be used with one with it disabled, or with a Jingle
675 * pseudo-TCP socket which doesn’t support it at all.
676 *
677 * Support is enabled by default.
678 *
679 * Since: 0.1.8
680 */
681 g_object_class_install_property (object_class, PROP_SUPPORT_FIN_ACK,
682 g_param_spec_boolean ("support-fin-ack", "Support FIN–ACK",
683 "Whether to enable the optional FIN–ACK support.",
684 TRUE,
685 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
686 }
687
688
689 static void
pseudo_tcp_socket_get_property(GObject * object,guint property_id,GValue * value,GParamSpec * pspec)690 pseudo_tcp_socket_get_property (GObject *object,
691 guint property_id,
692 GValue *value,
693 GParamSpec *pspec)
694 {
695 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
696
697 switch (property_id) {
698 case PROP_CONVERSATION:
699 g_value_set_uint (value, self->priv->conv);
700 break;
701 case PROP_CALLBACKS:
702 g_value_set_pointer (value, (gpointer) &self->priv->callbacks);
703 break;
704 case PROP_STATE:
705 g_value_set_uint (value, self->priv->state);
706 break;
707 case PROP_ACK_DELAY:
708 g_value_set_uint (value, self->priv->ack_delay);
709 break;
710 case PROP_NO_DELAY:
711 g_value_set_boolean (value, !self->priv->use_nagling);
712 break;
713 case PROP_RCV_BUF:
714 g_value_set_uint (value, self->priv->rbuf_len);
715 break;
716 case PROP_SND_BUF:
717 g_value_set_uint (value, self->priv->sbuf_len);
718 break;
719 case PROP_SUPPORT_FIN_ACK:
720 g_value_set_boolean (value, self->priv->support_fin_ack);
721 break;
722 default:
723 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
724 break;
725 }
726 }
727
728 static void
pseudo_tcp_socket_set_property(GObject * object,guint property_id,const GValue * value,GParamSpec * pspec)729 pseudo_tcp_socket_set_property (GObject *object,
730 guint property_id,
731 const GValue *value,
732 GParamSpec *pspec)
733 {
734 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
735
736 switch (property_id) {
737 case PROP_CONVERSATION:
738 self->priv->conv = g_value_get_uint (value);
739 break;
740 case PROP_CALLBACKS:
741 {
742 PseudoTcpCallbacks *c = g_value_get_pointer (value);
743 self->priv->callbacks = *c;
744 }
745 break;
746 case PROP_ACK_DELAY:
747 self->priv->ack_delay = g_value_get_uint (value);
748 break;
749 case PROP_NO_DELAY:
750 self->priv->use_nagling = !g_value_get_boolean (value);
751 break;
752 case PROP_RCV_BUF:
753 g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
754 resize_receive_buffer (self, g_value_get_uint (value));
755 break;
756 case PROP_SND_BUF:
757 g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
758 resize_send_buffer (self, g_value_get_uint (value));
759 break;
760 case PROP_SUPPORT_FIN_ACK:
761 self->priv->support_fin_ack = g_value_get_boolean (value);
762 break;
763 default:
764 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
765 break;
766 }
767 }
768
769 static void
pseudo_tcp_socket_finalize(GObject * object)770 pseudo_tcp_socket_finalize (GObject *object)
771 {
772 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
773 PseudoTcpSocketPrivate *priv = self->priv;
774 GList *i;
775 SSegment *sseg;
776
777 if (priv == NULL)
778 return;
779
780 while ((sseg = g_queue_pop_head (&priv->slist)))
781 g_slice_free (SSegment, sseg);
782 g_queue_clear (&priv->unsent_slist);
783 for (i = priv->rlist; i; i = i->next) {
784 RSegment *rseg = i->data;
785 g_slice_free (RSegment, rseg);
786 }
787 g_list_free (priv->rlist);
788 priv->rlist = NULL;
789
790 pseudo_tcp_fifo_clear (&priv->rbuf);
791 pseudo_tcp_fifo_clear (&priv->sbuf);
792
793 g_free (priv);
794 self->priv = NULL;
795
796 if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize)
797 G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize (object);
798 }
799
800
801 static void
pseudo_tcp_socket_init(PseudoTcpSocket * obj)802 pseudo_tcp_socket_init (PseudoTcpSocket *obj)
803 {
804 /* Use g_new0, and do not use g_object_set_private because the size of
805 * our private data is too big (150KB+) and the g_slice_allow cannot allocate
806 * it. So we handle the private ourselves */
807 PseudoTcpSocketPrivate *priv = g_new0 (PseudoTcpSocketPrivate, 1);
808
809 obj->priv = priv;
810
811 priv->shutdown = SD_NONE;
812 priv->error = 0;
813
814 priv->rbuf_len = DEFAULT_RCV_BUF_SIZE;
815 pseudo_tcp_fifo_init (&priv->rbuf, priv->rbuf_len);
816 priv->sbuf_len = DEFAULT_SND_BUF_SIZE;
817 pseudo_tcp_fifo_init (&priv->sbuf, priv->sbuf_len);
818
819 priv->state = PSEUDO_TCP_LISTEN;
820 priv->conv = 0;
821 g_queue_init (&priv->slist);
822 g_queue_init (&priv->unsent_slist);
823 priv->rcv_wnd = priv->rbuf_len;
824 priv->rwnd_scale = priv->swnd_scale = 0;
825 priv->snd_nxt = 0;
826 priv->snd_wnd = 1;
827 priv->snd_una = priv->rcv_nxt = 0;
828 priv->bReadEnable = TRUE;
829 priv->bWriteEnable = FALSE;
830 priv->rcv_fin = 0;
831
832 priv->t_ack = 0;
833
834 priv->msslevel = 0;
835 priv->largest = 0;
836 priv->mss = MIN_PACKET - PACKET_OVERHEAD;
837 priv->mtu_advise = DEF_MTU;
838
839 priv->rto_base = 0;
840
841 priv->cwnd = 2 * priv->mss;
842 priv->ssthresh = priv->rbuf_len;
843 priv->lastrecv = priv->lastsend = priv->last_traffic = 0;
844 priv->bOutgoing = FALSE;
845
846 priv->dup_acks = 0;
847 priv->recover = 0;
848 priv->last_acked_ts = 0;
849
850 priv->ts_recent = priv->ts_lastack = 0;
851
852 priv->rx_rto = DEF_RTO;
853 priv->rx_srtt = priv->rx_rttvar = 0;
854
855 priv->ack_delay = DEFAULT_ACK_DELAY;
856 priv->use_nagling = !DEFAULT_NO_DELAY;
857
858 priv->support_wnd_scale = TRUE;
859 priv->support_fin_ack = TRUE;
860 }
861
pseudo_tcp_socket_new(guint32 conversation,PseudoTcpCallbacks * callbacks)862 PseudoTcpSocket *pseudo_tcp_socket_new (guint32 conversation,
863 PseudoTcpCallbacks *callbacks)
864 {
865
866 return g_object_new (PSEUDO_TCP_SOCKET_TYPE,
867 "conversation", conversation,
868 "callbacks", callbacks,
869 NULL);
870 }
871
872 static void
queue_connect_message(PseudoTcpSocket * self)873 queue_connect_message (PseudoTcpSocket *self)
874 {
875 PseudoTcpSocketPrivate *priv = self->priv;
876 guint8 buf[8];
877 gsize size = 0;
878
879 buf[size++] = CTL_CONNECT;
880
881 if (priv->support_wnd_scale) {
882 buf[size++] = TCP_OPT_WND_SCALE;
883 buf[size++] = 1;
884 buf[size++] = priv->rwnd_scale;
885 }
886
887 if (priv->support_fin_ack) {
888 buf[size++] = TCP_OPT_FIN_ACK;
889 buf[size++] = 1; /* option length; zero is invalid (RFC 1122, §4.2.2.5) */
890 buf[size++] = 0; /* currently unused */
891 }
892
893 priv->snd_wnd = size;
894
895 queue (self, (char *) buf, size, FLAG_CTL);
896 }
897
898 static void
queue_fin_message(PseudoTcpSocket * self)899 queue_fin_message (PseudoTcpSocket *self)
900 {
901 g_assert (self->priv->support_fin_ack);
902
903 /* FIN segments are always zero-length. */
904 queue (self, "", 0, FLAG_FIN);
905 }
906
907 static void
queue_rst_message(PseudoTcpSocket * self)908 queue_rst_message (PseudoTcpSocket *self)
909 {
910 g_assert (self->priv->support_fin_ack);
911
912 /* RST segments are always zero-length. */
913 queue (self, "", 0, FLAG_RST);
914 }
915
916 gboolean
pseudo_tcp_socket_connect(PseudoTcpSocket * self)917 pseudo_tcp_socket_connect(PseudoTcpSocket *self)
918 {
919 PseudoTcpSocketPrivate *priv = self->priv;
920
921 if (priv->state != PSEUDO_TCP_LISTEN) {
922 priv->error = EINVAL;
923 return FALSE;
924 }
925
926 set_state (self, PSEUDO_TCP_SYN_SENT);
927
928 queue_connect_message (self);
929 attempt_send(self, sfNone);
930
931 return TRUE;
932 }
933
934 void
pseudo_tcp_socket_notify_mtu(PseudoTcpSocket * self,guint16 mtu)935 pseudo_tcp_socket_notify_mtu(PseudoTcpSocket *self, guint16 mtu)
936 {
937 PseudoTcpSocketPrivate *priv = self->priv;
938 priv->mtu_advise = mtu;
939 if (priv->state == PSEUDO_TCP_ESTABLISHED) {
940 adjustMTU(self);
941 }
942 }
943
944 void
pseudo_tcp_socket_notify_clock(PseudoTcpSocket * self)945 pseudo_tcp_socket_notify_clock(PseudoTcpSocket *self)
946 {
947 PseudoTcpSocketPrivate *priv = self->priv;
948 guint32 now = get_current_time (self);
949
950 if (priv->state == PSEUDO_TCP_CLOSED)
951 return;
952
953 /* If in the TIME-WAIT state, any delayed segments have passed and the
954 * connection can be considered closed from both ends.
955 * FIXME: This should probably actually compare a timestamp before
956 * operating. */
957 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
958 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
959 "Notified clock in TIME-WAIT state; closing connection.");
960 set_state_closed (self, 0);
961 }
962
963 /* If in the LAST-ACK state, resend the FIN because it hasn’t been ACKed yet.
964 * FIXME: This should probably actually compare a timestamp before
965 * operating. */
966 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_LAST_ACK) {
967 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
968 "Notified clock in LAST-ACK state; resending FIN segment.");
969 queue_fin_message (self);
970 attempt_send (self, sfFin);
971 }
972
973 // Check if it's time to retransmit a segment
974 if (priv->rto_base &&
975 (time_diff(priv->rto_base + priv->rx_rto, now) <= 0)) {
976 if (g_queue_get_length (&priv->slist) == 0) {
977 g_assert_not_reached ();
978 } else {
979 // Note: (priv->slist.front().xmit == 0)) {
980 // retransmit segments
981 guint32 nInFlight;
982 guint32 rto_limit;
983 int transmit_status;
984
985 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "timeout retransmit (rto: %u) "
986 "(rto_base: %u) (now: %u) (dup_acks: %u)",
987 priv->rx_rto, priv->rto_base, now, (guint) priv->dup_acks);
988
989 transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
990 if (transmit_status != 0) {
991 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
992 "Error transmitting segment. Closing down.");
993 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
994 return;
995 }
996
997 nInFlight = priv->snd_nxt - priv->snd_una;
998 priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
999 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "ssthresh: %u = (nInFlight: %u / 2) + "
1000 "2 * mss: %u", priv->ssthresh, nInFlight, priv->mss);
1001 //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
1002 priv->cwnd = priv->mss;
1003
1004 // Back off retransmit timer. Note: the limit is lower when connecting.
1005 rto_limit = (priv->state < PSEUDO_TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
1006 priv->rx_rto = min(rto_limit, priv->rx_rto * 2);
1007 priv->rto_base = now;
1008
1009 priv->recover = priv->snd_nxt;
1010 if (priv->dup_acks >= 3) {
1011 priv->dup_acks = 0;
1012 priv->fast_recovery = FALSE;
1013 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery on timeout");
1014 }
1015 }
1016 }
1017
1018 // Check if it's time to probe closed windows
1019 if ((priv->snd_wnd == 0)
1020 && (time_diff(priv->lastsend + priv->rx_rto, now) <= 0)) {
1021 if (time_diff(now, priv->lastrecv) >= 15000) {
1022 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Receive window closed. Closing down.");
1023 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1024 return;
1025 }
1026
1027 // probe the window
1028 packet(self, priv->snd_nxt - 1, 0, 0, 0, now);
1029 priv->lastsend = now;
1030
1031 // back off retransmit timer
1032 priv->rx_rto = min(MAX_RTO, priv->rx_rto * 2);
1033 }
1034
1035 // Check if it's time to send delayed acks
1036 if (priv->t_ack && (time_diff(priv->t_ack + priv->ack_delay, now) <= 0)) {
1037 packet(self, priv->snd_nxt, 0, 0, 0, now);
1038 }
1039
1040 }
1041
1042 gboolean
pseudo_tcp_socket_notify_packet(PseudoTcpSocket * self,const gchar * buffer,guint32 len)1043 pseudo_tcp_socket_notify_packet(PseudoTcpSocket *self,
1044 const gchar * buffer, guint32 len)
1045 {
1046 gboolean retval;
1047
1048 if (len > MAX_PACKET) {
1049 //LOG_F(WARNING) << "packet too large";
1050 self->priv->error = EMSGSIZE;
1051 return FALSE;
1052 } else if (len < HEADER_SIZE) {
1053 //LOG_F(WARNING) << "packet too small";
1054 self->priv->error = EINVAL;
1055 return FALSE;
1056 }
1057
1058 /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1059 * closed from within a callback. */
1060 g_object_ref (self);
1061 retval = parse (self, (guint8 *) buffer, HEADER_SIZE,
1062 (guint8 *) buffer + HEADER_SIZE, len - HEADER_SIZE);
1063 g_object_unref (self);
1064
1065 return retval;
1066 }
1067
1068 /* Assume there are two buffers in the given #NiceInputMessage: a 24-byte one
1069 * containing the header, and a bigger one for the data. */
1070 gboolean
pseudo_tcp_socket_notify_message(PseudoTcpSocket * self,NiceInputMessage * message)1071 pseudo_tcp_socket_notify_message (PseudoTcpSocket *self,
1072 NiceInputMessage *message)
1073 {
1074 gboolean retval;
1075
1076 g_assert_cmpuint (message->n_buffers, >, 0);
1077
1078 if (message->n_buffers == 1)
1079 return pseudo_tcp_socket_notify_packet (self, message->buffers[0].buffer,
1080 message->buffers[0].size);
1081
1082 g_assert_cmpuint (message->n_buffers, ==, 2);
1083 g_assert_cmpuint (message->buffers[0].size, ==, HEADER_SIZE);
1084
1085 if (message->length > MAX_PACKET) {
1086 //LOG_F(WARNING) << "packet too large";
1087 return FALSE;
1088 } else if (message->length < HEADER_SIZE) {
1089 //LOG_F(WARNING) << "packet too small";
1090 return FALSE;
1091 }
1092
1093 /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1094 * closed from within a callback. */
1095 g_object_ref (self);
1096 retval = parse (self, message->buffers[0].buffer, message->buffers[0].size,
1097 message->buffers[1].buffer, message->length - message->buffers[0].size);
1098 g_object_unref (self);
1099
1100 return retval;
1101 }
1102
1103 gboolean
pseudo_tcp_socket_get_next_clock(PseudoTcpSocket * self,guint64 * timeout)1104 pseudo_tcp_socket_get_next_clock(PseudoTcpSocket *self, guint64 *timeout)
1105 {
1106 PseudoTcpSocketPrivate *priv = self->priv;
1107 guint32 now = get_current_time (self);
1108 gsize snd_buffered;
1109 guint32 closed_timeout;
1110
1111 if (priv->shutdown == SD_FORCEFUL) {
1112 if (priv->support_fin_ack) {
1113 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1114 "‘Forceful’ shutdown used when FIN-ACK support is enabled");
1115 }
1116
1117 /* Transition to the CLOSED state. */
1118 closedown (self, 0, CLOSEDOWN_REMOTE);
1119
1120 return FALSE;
1121 }
1122
1123 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1124 if ((priv->shutdown == SD_GRACEFUL)
1125 && ((priv->state != PSEUDO_TCP_ESTABLISHED)
1126 || ((snd_buffered == 0) && (priv->t_ack == 0)))) {
1127 if (priv->support_fin_ack) {
1128 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1129 "‘Graceful’ shutdown used when FIN-ACK support is enabled");
1130 }
1131
1132 /* Transition to the CLOSED state. */
1133 closedown (self, 0, CLOSEDOWN_REMOTE);
1134
1135 return FALSE;
1136 }
1137
1138 /* FIN-ACK support. The timeout for closing the socket if nothing is received
1139 * varies depending on whether the socket is waiting in the TIME-WAIT state
1140 * for delayed segments to pass.
1141 *
1142 * See: http://vincent.bernat.im/en/blog/2014-tcp-time-wait-state-linux.html
1143 */
1144 closed_timeout = CLOSED_TIMEOUT;
1145 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT)
1146 closed_timeout = TIME_WAIT_TIMEOUT;
1147
1148 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_CLOSED) {
1149 return FALSE;
1150 }
1151
1152 if (*timeout == 0 || *timeout < now)
1153 *timeout = now + closed_timeout;
1154
1155 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
1156 *timeout = min (*timeout, now + TIME_WAIT_TIMEOUT);
1157 return TRUE;
1158 }
1159
1160 if (priv->state == PSEUDO_TCP_CLOSED && !priv->support_fin_ack) {
1161 *timeout = min (*timeout, now + CLOSED_TIMEOUT);
1162 return TRUE;
1163 }
1164
1165 *timeout = min (*timeout, now + DEFAULT_TIMEOUT);
1166
1167 if (priv->t_ack) {
1168 *timeout = min(*timeout, priv->t_ack + priv->ack_delay);
1169 }
1170 if (priv->rto_base) {
1171 *timeout = min(*timeout, priv->rto_base + priv->rx_rto);
1172 }
1173 if (priv->snd_wnd == 0) {
1174 *timeout = min(*timeout, priv->lastsend + priv->rx_rto);
1175 }
1176
1177 return TRUE;
1178 }
1179
1180
1181 gint
pseudo_tcp_socket_recv(PseudoTcpSocket * self,char * buffer,size_t len)1182 pseudo_tcp_socket_recv(PseudoTcpSocket *self, char * buffer, size_t len)
1183 {
1184 PseudoTcpSocketPrivate *priv = self->priv;
1185 gsize bytesread;
1186 gsize available_space;
1187
1188 /* Received a FIN from the peer, so return 0. RFC 793, §3.5, Case 2. */
1189 if (priv->support_fin_ack && priv->shutdown_reads) {
1190 return 0;
1191 }
1192
1193 /* Return 0 if FIN-ACK is not supported but the socket has been closed. */
1194 if (!priv->support_fin_ack && pseudo_tcp_socket_is_closed (self)) {
1195 return 0;
1196 }
1197
1198 /* Return ENOTCONN if FIN-ACK is not supported and the connection is not
1199 * ESTABLISHED. */
1200 if (!priv->support_fin_ack && priv->state != PSEUDO_TCP_ESTABLISHED) {
1201 priv->error = ENOTCONN;
1202 return -1;
1203 }
1204
1205 if (len == 0)
1206 return 0;
1207
1208 bytesread = pseudo_tcp_fifo_read (&priv->rbuf, (guint8 *) buffer, len);
1209
1210 // If there's no data in |m_rbuf|.
1211 if (bytesread == 0 &&
1212 !(pseudo_tcp_state_has_received_fin (priv->state) ||
1213 pseudo_tcp_state_has_received_fin_ack (priv->state))) {
1214 priv->bReadEnable = TRUE;
1215 priv->error = EWOULDBLOCK;
1216 return -1;
1217 }
1218
1219 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1220
1221 if (available_space - priv->rcv_wnd >=
1222 min (priv->rbuf_len / 2, priv->mss)) {
1223 // !?! Not sure about this was closed business
1224 gboolean bWasClosed = (priv->rcv_wnd == 0);
1225
1226 priv->rcv_wnd = available_space;
1227
1228 if (bWasClosed) {
1229 attempt_send(self, sfImmediateAck);
1230 }
1231 }
1232
1233 return bytesread;
1234 }
1235
1236 gint
pseudo_tcp_socket_send(PseudoTcpSocket * self,const char * buffer,guint32 len)1237 pseudo_tcp_socket_send(PseudoTcpSocket *self, const char * buffer, guint32 len)
1238 {
1239 PseudoTcpSocketPrivate *priv = self->priv;
1240 gint written;
1241 gsize available_space;
1242
1243 if (priv->state != PSEUDO_TCP_ESTABLISHED) {
1244 priv->error = pseudo_tcp_state_has_sent_fin (priv->state) ? EPIPE : ENOTCONN;
1245 return -1;
1246 }
1247
1248 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1249
1250 if (!available_space) {
1251 priv->bWriteEnable = TRUE;
1252 priv->error = EWOULDBLOCK;
1253 return -1;
1254 }
1255
1256 written = queue (self, buffer, len, FLAG_NONE);
1257 attempt_send(self, sfNone);
1258
1259 if (written > 0 && (guint32)written < len) {
1260 priv->bWriteEnable = TRUE;
1261 }
1262
1263 return written;
1264 }
1265
1266 void
pseudo_tcp_socket_close(PseudoTcpSocket * self,gboolean force)1267 pseudo_tcp_socket_close(PseudoTcpSocket *self, gboolean force)
1268 {
1269 PseudoTcpSocketPrivate *priv = self->priv;
1270
1271 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing socket %p %s", self,
1272 force ? "forcefully" : "gracefully");
1273
1274 /* Forced closure by sending an RST segment. RFC 1122, §4.2.2.13. */
1275 if (force && priv->state != PSEUDO_TCP_CLOSED) {
1276 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1277 return;
1278 }
1279
1280 /* Fall back to shutdown(). */
1281 pseudo_tcp_socket_shutdown (self, PSEUDO_TCP_SHUTDOWN_RDWR);
1282 }
1283
1284 void
pseudo_tcp_socket_shutdown(PseudoTcpSocket * self,PseudoTcpShutdown how)1285 pseudo_tcp_socket_shutdown (PseudoTcpSocket *self, PseudoTcpShutdown how)
1286 {
1287 PseudoTcpSocketPrivate *priv = self->priv;
1288
1289 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Shutting down socket %p: %u", self, how);
1290
1291 /* FIN-ACK--only stuff below here. */
1292 if (!priv->support_fin_ack) {
1293 if (priv->shutdown == SD_NONE)
1294 priv->shutdown = SD_GRACEFUL;
1295 return;
1296 }
1297
1298 /* What needs shutting down? */
1299 switch (how) {
1300 case PSEUDO_TCP_SHUTDOWN_RD:
1301 case PSEUDO_TCP_SHUTDOWN_RDWR:
1302 priv->shutdown_reads = TRUE;
1303 break;
1304 case PSEUDO_TCP_SHUTDOWN_WR:
1305 /* Handled below. */
1306 break;
1307 default:
1308 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid shutdown method: %u.", how);
1309 break;
1310 }
1311
1312 if (how == PSEUDO_TCP_SHUTDOWN_RD) {
1313 return;
1314 }
1315
1316 /* Unforced write closure. */
1317 switch (priv->state) {
1318 case PSEUDO_TCP_LISTEN:
1319 case PSEUDO_TCP_SYN_SENT:
1320 /* Just abort the connection without completing the handshake. */
1321 set_state_closed (self, 0);
1322 break;
1323 case PSEUDO_TCP_SYN_RECEIVED:
1324 case PSEUDO_TCP_ESTABLISHED:
1325 /* Local user initiating the close: RFC 793, §3.5, Cases 1 and 3.
1326 * If there is pending receive data, send RST instead of FIN;
1327 * see RFC 1122, §4.2.2.13. */
1328 if (pseudo_tcp_socket_get_available_bytes (self) > 0) {
1329 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1330 } else {
1331 queue_fin_message (self);
1332 attempt_send (self, sfFin);
1333 set_state (self, PSEUDO_TCP_FIN_WAIT_1);
1334 }
1335 break;
1336 case PSEUDO_TCP_CLOSE_WAIT:
1337 /* Remote user initiating the close: RFC 793, §3.5, Case 2.
1338 * We’ve previously received a FIN from the peer; now the user is closing
1339 * the local end of the connection. */
1340 queue_fin_message (self);
1341 attempt_send (self, sfFin);
1342 set_state (self, PSEUDO_TCP_LAST_ACK);
1343 break;
1344 case PSEUDO_TCP_CLOSING:
1345 case PSEUDO_TCP_CLOSED:
1346 /* Already closed on both sides. */
1347 break;
1348 case PSEUDO_TCP_FIN_WAIT_1:
1349 case PSEUDO_TCP_FIN_WAIT_2:
1350 case PSEUDO_TCP_TIME_WAIT:
1351 case PSEUDO_TCP_LAST_ACK:
1352 /* Already closed locally. */
1353 break;
1354 default:
1355 /* Do nothing. */
1356 break;
1357 }
1358 }
1359
1360 int
pseudo_tcp_socket_get_error(PseudoTcpSocket * self)1361 pseudo_tcp_socket_get_error(PseudoTcpSocket *self)
1362 {
1363 PseudoTcpSocketPrivate *priv = self->priv;
1364 return priv->error;
1365 }
1366
1367 //
1368 // Internal Implementation
1369 //
1370
1371 static guint32
queue(PseudoTcpSocket * self,const gchar * data,guint32 len,TcpFlags flags)1372 queue (PseudoTcpSocket *self, const gchar * data, guint32 len, TcpFlags flags)
1373 {
1374 PseudoTcpSocketPrivate *priv = self->priv;
1375 gsize available_space;
1376
1377 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1378 if (len > available_space) {
1379 g_assert_cmpint (flags, ==, FLAG_NONE);
1380 len = available_space;
1381 }
1382
1383 // We can concatenate data if the last segment is the same type
1384 // (control v. regular data), and has not been transmitted yet
1385 if (g_queue_get_length (&priv->slist) &&
1386 (((SSegment *)g_queue_peek_tail (&priv->slist))->flags == flags) &&
1387 (((SSegment *)g_queue_peek_tail (&priv->slist))->xmit == 0)) {
1388 ((SSegment *)g_queue_peek_tail (&priv->slist))->len += len;
1389 } else {
1390 SSegment *sseg = g_slice_new0 (SSegment);
1391 gsize snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1392
1393 sseg->seq = priv->snd_una + snd_buffered;
1394 sseg->len = len;
1395 sseg->flags = flags;
1396 g_queue_push_tail (&priv->slist, sseg);
1397 g_queue_push_tail (&priv->unsent_slist, sseg);
1398 }
1399
1400 //LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
1401 return pseudo_tcp_fifo_write (&priv->sbuf, (guint8*) data, len);;
1402 }
1403
1404 // Creates a packet and submits it to the network. This method can either
1405 // send payload or just an ACK packet.
1406 //
1407 // |seq| is the sequence number of this packet.
1408 // |flags| is the flags for sending this packet.
1409 // |offset| is the offset to read from |m_sbuf|.
1410 // |len| is the number of bytes to read from |m_sbuf| as payload. If this
1411 // value is 0 then this is an ACK packet, otherwise this packet has payload.
1412
1413 static PseudoTcpWriteResult
packet(PseudoTcpSocket * self,guint32 seq,TcpFlags flags,guint32 offset,guint32 len,guint32 now)1414 packet(PseudoTcpSocket *self, guint32 seq, TcpFlags flags,
1415 guint32 offset, guint32 len, guint32 now)
1416 {
1417 PseudoTcpSocketPrivate *priv = self->priv;
1418 union {
1419 guint8 u8[MAX_PACKET];
1420 guint16 u16[MAX_PACKET / 2];
1421 guint32 u32[MAX_PACKET / 4];
1422 } buffer;
1423 PseudoTcpWriteResult wres = WR_SUCCESS;
1424
1425 g_assert_cmpuint (HEADER_SIZE + len, <=, MAX_PACKET);
1426
1427 *buffer.u32 = htonl(priv->conv);
1428 *(buffer.u32 + 1) = htonl(seq);
1429 *(buffer.u32 + 2) = htonl(priv->rcv_nxt);
1430 buffer.u8[12] = 0;
1431 buffer.u8[13] = flags;
1432 *(buffer.u16 + 7) = htons((guint16)(priv->rcv_wnd >> priv->rwnd_scale));
1433
1434 // Timestamp computations
1435 *(buffer.u32 + 4) = htonl(now);
1436 *(buffer.u32 + 5) = htonl(priv->ts_recent);
1437 priv->ts_lastack = priv->rcv_nxt;
1438
1439 if (len) {
1440 gsize bytes_read;
1441
1442 bytes_read = pseudo_tcp_fifo_read_offset (&priv->sbuf, buffer.u8 + HEADER_SIZE,
1443 len, offset);
1444 g_assert_cmpint (bytes_read, ==, len);
1445 }
1446
1447 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Sending <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1448 "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1449 priv->conv, (unsigned)flags, seq, seq + len, priv->rcv_nxt, priv->rcv_wnd,
1450 now % 10000, priv->ts_recent % 10000, len);
1451
1452 wres = priv->callbacks.WritePacket(self, (gchar *) buffer.u8, len + HEADER_SIZE,
1453 priv->callbacks.user_data);
1454 /* Note: When len is 0, this is an ACK packet. We don't read the
1455 return value for those, and thus we won't retry. So go ahead and treat
1456 the packet as a success (basically simulate as if it were dropped),
1457 which will prevent our timers from being messed up. */
1458 if ((wres != WR_SUCCESS) && (0 != len))
1459 return wres;
1460
1461 priv->t_ack = 0;
1462 if (len > 0) {
1463 priv->lastsend = now;
1464 }
1465 priv->last_traffic = now;
1466 priv->bOutgoing = TRUE;
1467
1468 return WR_SUCCESS;
1469 }
1470
1471 static gboolean
parse(PseudoTcpSocket * self,const guint8 * _header_buf,gsize header_buf_len,const guint8 * data_buf,gsize data_buf_len)1472 parse (PseudoTcpSocket *self, const guint8 *_header_buf, gsize header_buf_len,
1473 const guint8 *data_buf, gsize data_buf_len)
1474 {
1475 Segment seg;
1476
1477 union {
1478 const guint8 *u8;
1479 const guint16 *u16;
1480 const guint32 *u32;
1481 } header_buf;
1482
1483 header_buf.u8 = _header_buf;
1484
1485 if (header_buf_len != 24)
1486 return FALSE;
1487
1488 seg.conv = ntohl(*header_buf.u32);
1489 seg.seq = ntohl(*(header_buf.u32 + 1));
1490 seg.ack = ntohl(*(header_buf.u32 + 2));
1491 seg.flags = header_buf.u8[13];
1492 seg.wnd = ntohs(*(header_buf.u16 + 7));
1493
1494 seg.tsval = ntohl(*(header_buf.u32 + 4));
1495 seg.tsecr = ntohl(*(header_buf.u32 + 5));
1496
1497 seg.data = (const gchar *) data_buf;
1498 seg.len = data_buf_len;
1499
1500 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1501 "Received <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1502 "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1503 seg.conv, (unsigned)seg.flags, seg.seq, seg.seq + seg.len, seg.ack,
1504 seg.wnd, seg.tsval % 10000, seg.tsecr % 10000, seg.len);
1505
1506 return process(self, &seg);
1507 }
1508
1509 /* True iff the @state requires that a FIN has already been sent by this
1510 * host. */
1511 static gboolean
pseudo_tcp_state_has_sent_fin(PseudoTcpState state)1512 pseudo_tcp_state_has_sent_fin (PseudoTcpState state)
1513 {
1514 switch (state) {
1515 case PSEUDO_TCP_LISTEN:
1516 case PSEUDO_TCP_SYN_SENT:
1517 case PSEUDO_TCP_SYN_RECEIVED:
1518 case PSEUDO_TCP_ESTABLISHED:
1519 case PSEUDO_TCP_CLOSE_WAIT:
1520 return FALSE;
1521 case PSEUDO_TCP_CLOSED:
1522 case PSEUDO_TCP_FIN_WAIT_1:
1523 case PSEUDO_TCP_FIN_WAIT_2:
1524 case PSEUDO_TCP_CLOSING:
1525 case PSEUDO_TCP_TIME_WAIT:
1526 case PSEUDO_TCP_LAST_ACK:
1527 return TRUE;
1528 default:
1529 return FALSE;
1530 }
1531 }
1532
1533 /* True iff the @state requires that a FIN has already been received from the
1534 * peer. */
1535 static gboolean
pseudo_tcp_state_has_received_fin(PseudoTcpState state)1536 pseudo_tcp_state_has_received_fin (PseudoTcpState state)
1537 {
1538 switch (state) {
1539 case PSEUDO_TCP_LISTEN:
1540 case PSEUDO_TCP_SYN_SENT:
1541 case PSEUDO_TCP_SYN_RECEIVED:
1542 case PSEUDO_TCP_ESTABLISHED:
1543 case PSEUDO_TCP_FIN_WAIT_1:
1544 case PSEUDO_TCP_FIN_WAIT_2:
1545 return FALSE;
1546 case PSEUDO_TCP_CLOSED:
1547 case PSEUDO_TCP_CLOSING:
1548 case PSEUDO_TCP_TIME_WAIT:
1549 case PSEUDO_TCP_CLOSE_WAIT:
1550 case PSEUDO_TCP_LAST_ACK:
1551 return TRUE;
1552 default:
1553 return FALSE;
1554 }
1555 }
1556
1557 /* True iff the @state requires that a FIN-ACK has already been received from
1558 * the peer. */
1559 static gboolean
pseudo_tcp_state_has_received_fin_ack(PseudoTcpState state)1560 pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state)
1561 {
1562 switch (state) {
1563 case PSEUDO_TCP_LISTEN:
1564 case PSEUDO_TCP_SYN_SENT:
1565 case PSEUDO_TCP_SYN_RECEIVED:
1566 case PSEUDO_TCP_ESTABLISHED:
1567 case PSEUDO_TCP_FIN_WAIT_1:
1568 case PSEUDO_TCP_FIN_WAIT_2:
1569 case PSEUDO_TCP_CLOSING:
1570 case PSEUDO_TCP_CLOSE_WAIT:
1571 case PSEUDO_TCP_LAST_ACK:
1572 return FALSE;
1573 case PSEUDO_TCP_CLOSED:
1574 case PSEUDO_TCP_TIME_WAIT:
1575 return TRUE;
1576 default:
1577 return FALSE;
1578 }
1579 }
1580
1581 static gboolean
process(PseudoTcpSocket * self,Segment * seg)1582 process(PseudoTcpSocket *self, Segment *seg)
1583 {
1584 PseudoTcpSocketPrivate *priv = self->priv;
1585 guint32 now;
1586 SendFlags sflags = sfNone;
1587 gboolean bIgnoreData;
1588 gboolean bNewData;
1589 gboolean bConnect = FALSE;
1590 gsize snd_buffered;
1591 gsize available_space;
1592 guint32 kIdealRefillSize;
1593 gboolean is_valuable_ack, is_duplicate_ack, is_fin_ack = FALSE;
1594 gboolean received_fin = FALSE;
1595
1596 /* If this is the wrong conversation, send a reset!?!
1597 (with the correct conversation?) */
1598 if (seg->conv != priv->conv) {
1599 //if ((seg->flags & FLAG_RST) == 0) {
1600 // packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
1601 //}
1602 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "wrong conversation");
1603 return FALSE;
1604 }
1605
1606 now = get_current_time (self);
1607 priv->last_traffic = priv->lastrecv = now;
1608 priv->bOutgoing = FALSE;
1609
1610 if (priv->state == PSEUDO_TCP_CLOSED ||
1611 (pseudo_tcp_state_has_received_fin_ack (priv->state) && seg->len > 0)) {
1612 /* Send an RST segment. See: RFC 1122, §4.2.2.13; RFC 793, §3.4, point 3,
1613 * page 37. We can only send RST if we know the peer knows we’re closed;
1614 * otherwise this could be a timeout retransmit from them, due to our
1615 * packets from data through to FIN being dropped. */
1616 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1617 "Segment received while closed; sending RST.");
1618 if ((seg->flags & FLAG_RST) == 0) {
1619 closedown (self, 0, CLOSEDOWN_LOCAL);
1620 }
1621
1622 return FALSE;
1623 }
1624
1625 // Check if this is a reset segment
1626 if (seg->flags & FLAG_RST) {
1627 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Received RST segment; closing down.");
1628 closedown (self, ECONNRESET, CLOSEDOWN_REMOTE);
1629 return FALSE;
1630 }
1631
1632 // Check for control data
1633 bConnect = FALSE;
1634 if (seg->flags & FLAG_CTL) {
1635 if (seg->len == 0) {
1636 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Missing control code");
1637 return FALSE;
1638 } else if (seg->data[0] == CTL_CONNECT) {
1639 bConnect = TRUE;
1640
1641 parse_options (self, (guint8 *) &seg->data[1], seg->len - 1);
1642
1643 if (priv->state == PSEUDO_TCP_LISTEN) {
1644 set_state (self, PSEUDO_TCP_SYN_RECEIVED);
1645 queue_connect_message (self);
1646 } else if (priv->state == PSEUDO_TCP_SYN_SENT) {
1647 set_state_established (self);
1648 }
1649 } else {
1650 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Unknown control code: %u", seg->data[0]);
1651 return FALSE;
1652 }
1653 }
1654
1655 // Update timestamp
1656 if (SMALLER_OR_EQUAL (seg->seq, priv->ts_lastack) &&
1657 SMALLER (priv->ts_lastack, seg->seq + seg->len)) {
1658 priv->ts_recent = seg->tsval;
1659 }
1660
1661 // Check if this is a valuable ack
1662 is_valuable_ack = (LARGER(seg->ack, priv->snd_una) &&
1663 SMALLER_OR_EQUAL(seg->ack, priv->snd_nxt));
1664 is_duplicate_ack = (seg->ack == priv->snd_una);
1665
1666 if (is_valuable_ack) {
1667 guint32 nAcked;
1668 guint32 nFree;
1669
1670 // Calculate round-trip time
1671 if (seg->tsecr) {
1672 long rtt = time_diff(now, seg->tsecr);
1673 if (rtt >= 0) {
1674 if (priv->rx_srtt == 0) {
1675 priv->rx_srtt = rtt;
1676 priv->rx_rttvar = rtt / 2;
1677 } else {
1678 priv->rx_rttvar = (3 * priv->rx_rttvar +
1679 labs((long)(rtt - priv->rx_srtt))) / 4;
1680 priv->rx_srtt = (7 * priv->rx_srtt + rtt) / 8;
1681 }
1682 priv->rx_rto = bound(MIN_RTO,
1683 priv->rx_srtt + max(1LU, 4 * priv->rx_rttvar), MAX_RTO);
1684
1685 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "rtt: %ld srtt: %u rttvar: %u rto: %u",
1686 rtt, priv->rx_srtt, priv->rx_rttvar, priv->rx_rto);
1687 } else {
1688 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid RTT: %ld", rtt);
1689 return FALSE;
1690 }
1691
1692 priv->last_acked_ts = seg->tsecr;
1693 }
1694
1695 priv->snd_wnd = seg->wnd << priv->swnd_scale;
1696
1697 nAcked = seg->ack - priv->snd_una;
1698 priv->snd_una = seg->ack;
1699
1700 priv->rto_base = (priv->snd_una == priv->snd_nxt) ? 0 : now;
1701
1702 /* ACKs for FIN segments give an increment on nAcked, but there is no
1703 * corresponding byte to read because the FIN segment is empty (it just has
1704 * a sequence number). */
1705 if (nAcked == priv->sbuf.data_length + 1 &&
1706 pseudo_tcp_state_has_sent_fin (priv->state)) {
1707 is_fin_ack = TRUE;
1708 nAcked--;
1709 }
1710
1711 pseudo_tcp_fifo_consume_read_data (&priv->sbuf, nAcked);
1712
1713 for (nFree = nAcked; nFree > 0; ) {
1714 SSegment *data;
1715
1716 g_assert_cmpuint (g_queue_get_length (&priv->slist), !=, 0);
1717 data = (SSegment *) g_queue_peek_head (&priv->slist);
1718
1719 if (nFree < data->len) {
1720 data->len -= nFree;
1721 data->seq += nFree;
1722 nFree = 0;
1723 } else {
1724 if (data->len > priv->largest) {
1725 priv->largest = data->len;
1726 }
1727 nFree -= data->len;
1728 g_slice_free (SSegment, data);
1729 g_queue_pop_head (&priv->slist);
1730 }
1731 }
1732
1733 if (priv->dup_acks >= 3) {
1734 if (LARGER_OR_EQUAL (priv->snd_una, priv->recover)) { // NewReno
1735 guint32 nInFlight = priv->snd_nxt - priv->snd_una;
1736 // (Fast Retransmit)
1737 priv->cwnd = min(priv->ssthresh,
1738 max (nInFlight, priv->mss) + priv->mss);
1739 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery cwnd=%d ssthresh=%d nInFlight=%d mss: %d", priv->cwnd, priv->ssthresh, nInFlight, priv->mss);
1740 priv->fast_recovery = FALSE;
1741 priv->dup_acks = 0;
1742 } else {
1743 int transmit_status;
1744
1745 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1746 transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
1747 if (transmit_status != 0) {
1748 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1749 "Error transmitting recovery retransmit segment. Closing down.");
1750 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1751 return FALSE;
1752 }
1753 priv->cwnd += (nAcked > priv->mss ? priv->mss : 0) -
1754 min(nAcked, priv->cwnd);
1755 }
1756 } else {
1757 priv->dup_acks = 0;
1758 // Slow start, congestion avoidance
1759 if (priv->cwnd < priv->ssthresh) {
1760 priv->cwnd += priv->mss;
1761 } else {
1762 priv->cwnd += max(1LU, priv->mss * priv->mss / priv->cwnd);
1763 }
1764 }
1765 } else if (is_duplicate_ack) {
1766 /* !?! Note, tcp says don't do this... but otherwise how does a
1767 closed window become open? */
1768 priv->snd_wnd = seg->wnd << priv->swnd_scale;
1769
1770 // Check duplicate acks
1771 if (seg->len > 0) {
1772 // it's a dup ack, but with a data payload, so don't modify priv->dup_acks
1773 } else if (priv->snd_una != priv->snd_nxt) {
1774 guint32 nInFlight;
1775
1776 priv->dup_acks += 1;
1777 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Received dup ack (dups: %u)",
1778 priv->dup_acks);
1779 if (priv->dup_acks == 3) { // (Fast Retransmit)
1780 int transmit_status;
1781
1782
1783 if (LARGER_OR_EQUAL (priv->snd_una, priv->recover) ||
1784 seg->tsecr == priv->last_acked_ts) { /* NewReno */
1785 /* Invoke fast retransmit RFC3782 section 3 step 1A*/
1786 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "enter recovery");
1787 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1788
1789 transmit_status = transmit(self, g_queue_peek_head (&priv->slist),
1790 now);
1791 if (transmit_status != 0) {
1792 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1793 "Error transmitting recovery retransmit segment. Closing down.");
1794
1795 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1796 return FALSE;
1797 }
1798 priv->recover = priv->snd_nxt;
1799 nInFlight = priv->snd_nxt - priv->snd_una;
1800 priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
1801 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1802 "ssthresh: %u = max((nInFlight: %u / 2), 2 * mss: %u)",
1803 priv->ssthresh, nInFlight, priv->mss);
1804 priv->cwnd = priv->ssthresh + 3 * priv->mss;
1805 priv->fast_recovery = TRUE;
1806 } else {
1807 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1808 "Skipping fast recovery: recover: %u snd_una: %u", priv->recover,
1809 priv->snd_una);
1810 }
1811 } else if (priv->dup_acks > 3) {
1812 if (priv->fast_recovery)
1813 priv->cwnd += priv->mss;
1814 }
1815 } else {
1816 priv->dup_acks = 0;
1817 }
1818 }
1819
1820 // !?! A bit hacky
1821 if ((priv->state == PSEUDO_TCP_SYN_RECEIVED) && !bConnect) {
1822 set_state_established (self);
1823 }
1824
1825 /* Check for connection closure. Only pay attention to FIN segments if they
1826 * are in sequence; otherwise we’ve missed a packet earlier in the stream and
1827 * need to request retransmission first. */
1828 if (priv->support_fin_ack) {
1829 /* @received_fin is set when, and only when, all segments preceding the FIN
1830 * have been acknowledged. This is to handle the case where the FIN arrives
1831 * out of order with a preceding data segment. */
1832 if (seg->flags & FLAG_FIN) {
1833 priv->rcv_fin = seg->seq;
1834 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting rcv_fin = %u", priv->rcv_fin);
1835 }
1836
1837 /* For the moment, FIN segments must not contain data. */
1838 if (seg->flags & FLAG_FIN && seg->len != 0) {
1839 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN segment contained data; ignored");
1840 return FALSE;
1841 }
1842
1843 received_fin = (priv->rcv_nxt != 0 && priv->rcv_nxt + seg->len == priv->rcv_fin);
1844
1845 /* Update the state machine, implementing all transitions on ‘rcv FIN’ or
1846 * ‘rcv ACK of FIN’ from RFC 793, Figure 6; and RFC 1122, §4.2.2.8. */
1847 switch (priv->state) {
1848 case PSEUDO_TCP_ESTABLISHED:
1849 if (received_fin) {
1850 /* Received a FIN from the network, RFC 793, §3.5, Case 2.
1851 * The code below will send an ACK for the FIN. */
1852 set_state (self, PSEUDO_TCP_CLOSE_WAIT);
1853 }
1854 break;
1855 case PSEUDO_TCP_CLOSING:
1856 if (is_fin_ack) {
1857 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 3. */
1858 set_state (self, PSEUDO_TCP_TIME_WAIT);
1859 }
1860 break;
1861 case PSEUDO_TCP_LAST_ACK:
1862 if (is_fin_ack) {
1863 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 2. */
1864 set_state_closed (self, 0);
1865 }
1866 break;
1867 case PSEUDO_TCP_FIN_WAIT_1:
1868 if (is_fin_ack && received_fin) {
1869 /* Simultaneous close with an ACK for a FIN previously sent,
1870 * RFC 793, §3.5, Case 3. */
1871 set_state (self, PSEUDO_TCP_TIME_WAIT);
1872 } else if (is_fin_ack) {
1873 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 1. */
1874 set_state (self, PSEUDO_TCP_FIN_WAIT_2);
1875 } else if (received_fin) {
1876 /* Simultaneous close, RFC 793, §3.5, Case 3. */
1877 set_state (self, PSEUDO_TCP_CLOSING);
1878 }
1879 break;
1880 case PSEUDO_TCP_FIN_WAIT_2:
1881 if (received_fin) {
1882 /* Local user closed the connection, RFC 793, §3.5, Case 1. */
1883 set_state (self, PSEUDO_TCP_TIME_WAIT);
1884 }
1885 break;
1886 case PSEUDO_TCP_LISTEN:
1887 case PSEUDO_TCP_SYN_SENT:
1888 case PSEUDO_TCP_SYN_RECEIVED:
1889 case PSEUDO_TCP_TIME_WAIT:
1890 case PSEUDO_TCP_CLOSED:
1891 case PSEUDO_TCP_CLOSE_WAIT:
1892 /* Shouldn’t ever hit these cases. */
1893 if (received_fin) {
1894 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1895 "Unexpected state %u when FIN received", priv->state);
1896 } else if (is_fin_ack) {
1897 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1898 "Unexpected state %u when FIN-ACK received", priv->state);
1899 }
1900 break;
1901 default:
1902 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid state %u when FIN received",
1903 priv->state);
1904 return FALSE;
1905 }
1906 } else if (seg->flags & FLAG_FIN) {
1907 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1908 "Invalid FIN received when FIN-ACK support is disabled");
1909 } else if (is_fin_ack) {
1910 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1911 "Invalid FIN-ACK received when FIN-ACK support is disabled");
1912 }
1913
1914 // If we make room in the send queue, notify the user
1915 // The goal it to make sure we always have at least enough data to fill the
1916 // window. We'd like to notify the app when we are halfway to that point.
1917 kIdealRefillSize = (priv->sbuf_len + priv->rbuf_len) / 2;
1918
1919 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1920 if (priv->bWriteEnable && snd_buffered < kIdealRefillSize) {
1921 priv->bWriteEnable = FALSE;
1922 if (priv->callbacks.PseudoTcpWritable)
1923 priv->callbacks.PseudoTcpWritable(self, priv->callbacks.user_data);
1924 }
1925
1926 /* Conditions where acks must be sent:
1927 * 1) Segment is too old (they missed an ACK) (immediately)
1928 * 2) Segment is too new (we missed a segment) (immediately)
1929 * 3) Segment has data (so we need to ACK!) (delayed)
1930 * ... so the only time we don't need to ACK, is an empty segment
1931 * that points to rcv_nxt!
1932 * 4) Segment has the FIN flag set (immediately) — note that the FIN flag
1933 * itself has to be included in the ACK as a numbered byte;
1934 * see RFC 793, §3.3. Also see: RFC 793, §3.5.
1935 */
1936 if (seg->seq != priv->rcv_nxt) {
1937 sflags = sfDuplicateAck; // (Fast Recovery)
1938 } else if (seg->len != 0) {
1939 if (priv->ack_delay == 0) {
1940 sflags = sfImmediateAck;
1941 } else {
1942 sflags = sfDelayedAck;
1943 }
1944 } else if (received_fin) {
1945 /* FIN flags have a sequence number. Only acknowledge them after all
1946 * preceding octets have been acknowledged. */
1947 sflags = sfImmediateAck;
1948 }
1949
1950 if (sflags == sfDuplicateAck) {
1951 if (seg->seq > priv->rcv_nxt) {
1952 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too new");
1953 } else if (SMALLER_OR_EQUAL(seg->seq + seg->len, priv->rcv_nxt)) {
1954 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too old");
1955 }
1956 }
1957
1958 // Adjust the incoming segment to fit our receive buffer
1959 if (SMALLER(seg->seq, priv->rcv_nxt)) {
1960 guint32 nAdjust = priv->rcv_nxt - seg->seq;
1961 if (nAdjust < seg->len) {
1962 seg->seq += nAdjust;
1963 seg->data += nAdjust;
1964 seg->len -= nAdjust;
1965 } else {
1966 seg->len = 0;
1967 }
1968 }
1969
1970 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1971
1972 if ((seg->seq + seg->len - priv->rcv_nxt) > available_space) {
1973 guint32 nAdjust = seg->seq + seg->len - priv->rcv_nxt - available_space;
1974 if (nAdjust < seg->len) {
1975 seg->len -= nAdjust;
1976 } else {
1977 seg->len = 0;
1978 }
1979 }
1980
1981 bIgnoreData = (seg->flags & FLAG_CTL);
1982 if (!priv->support_fin_ack)
1983 bIgnoreData |= (priv->shutdown != SD_NONE);
1984
1985 bNewData = FALSE;
1986
1987 if (seg->len > 0) {
1988 if (bIgnoreData) {
1989 if (seg->seq == priv->rcv_nxt) {
1990 priv->rcv_nxt += seg->len;
1991 }
1992 } else {
1993 guint32 nOffset = seg->seq - priv->rcv_nxt;
1994 gsize res;
1995
1996 res = pseudo_tcp_fifo_write_offset (&priv->rbuf, (guint8 *) seg->data,
1997 seg->len, nOffset);
1998 g_assert_cmpint (res, ==, seg->len);
1999
2000 if (seg->seq == priv->rcv_nxt) {
2001 GList *iter = NULL;
2002
2003 pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, seg->len);
2004 priv->rcv_nxt += seg->len;
2005 priv->rcv_wnd -= seg->len;
2006 bNewData = TRUE;
2007
2008 iter = priv->rlist;
2009 while (iter &&
2010 SMALLER_OR_EQUAL(((RSegment *)iter->data)->seq, priv->rcv_nxt)) {
2011 RSegment *data = (RSegment *)(iter->data);
2012 if (LARGER (data->seq + data->len, priv->rcv_nxt)) {
2013 guint32 nAdjust = (data->seq + data->len) - priv->rcv_nxt;
2014 sflags = sfImmediateAck; // (Fast Recovery)
2015 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Recovered %u bytes (%u -> %u)",
2016 nAdjust, priv->rcv_nxt, priv->rcv_nxt + nAdjust);
2017 pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, nAdjust);
2018 priv->rcv_nxt += nAdjust;
2019 priv->rcv_wnd -= nAdjust;
2020 }
2021 g_slice_free (RSegment, priv->rlist->data);
2022 priv->rlist = g_list_delete_link (priv->rlist, priv->rlist);
2023 iter = priv->rlist;
2024 }
2025 } else {
2026 GList *iter = NULL;
2027 RSegment *rseg = g_slice_new0 (RSegment);
2028
2029 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Saving %u bytes (%u -> %u)",
2030 seg->len, seg->seq, seg->seq + seg->len);
2031 rseg->seq = seg->seq;
2032 rseg->len = seg->len;
2033 iter = priv->rlist;
2034 while (iter && SMALLER (((RSegment*)iter->data)->seq, rseg->seq)) {
2035 iter = g_list_next (iter);
2036 }
2037 priv->rlist = g_list_insert_before(priv->rlist, iter, rseg);
2038 }
2039 }
2040 }
2041
2042 if (received_fin) {
2043 /* FIN flags have a sequence number. */
2044 priv->rcv_nxt++;
2045 }
2046
2047
2048 attempt_send(self, sflags);
2049
2050 // If we have new data, notify the user
2051 if (bNewData && priv->bReadEnable) {
2052 /* priv->bReadEnable = FALSE; — removed so that we’re always notified of
2053 * incoming pseudo-TCP data, rather than having to read the entire buffer
2054 * on each readable() callback before the next callback is enabled.
2055 * (When client-provided buffers are small, this is not possible.) */
2056 if (priv->callbacks.PseudoTcpReadable)
2057 priv->callbacks.PseudoTcpReadable(self, priv->callbacks.user_data);
2058 }
2059
2060 return TRUE;
2061 }
2062
2063 static gboolean
transmit(PseudoTcpSocket * self,SSegment * segment,guint32 now)2064 transmit(PseudoTcpSocket *self, SSegment *segment, guint32 now)
2065 {
2066 PseudoTcpSocketPrivate *priv = self->priv;
2067 guint32 nTransmit = min(segment->len, priv->mss);
2068
2069 if (segment->xmit >= ((priv->state == PSEUDO_TCP_ESTABLISHED) ? 15 : 30)) {
2070 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too many retransmits");
2071 return ETIMEDOUT;
2072 }
2073
2074 while (TRUE) {
2075 guint32 seq = segment->seq;
2076 guint8 flags = segment->flags;
2077 PseudoTcpWriteResult wres;
2078
2079 /* The packet must not have already been acknowledged. */
2080 g_assert_cmpuint (segment->seq - priv->snd_una, <=, 1024 * 1024 * 64);
2081
2082 /* Write out the packet. */
2083 wres = packet(self, seq, flags,
2084 segment->seq - priv->snd_una, nTransmit, now);
2085
2086 if (wres == WR_SUCCESS)
2087 break;
2088
2089 if (wres == WR_FAIL) {
2090 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "packet failed");
2091 return ECONNABORTED; /* FIXME: This error code doesn’t quite seem right */
2092 }
2093
2094 g_assert_cmpint (wres, ==, WR_TOO_LARGE);
2095
2096 while (TRUE) {
2097 if (PACKET_MAXIMUMS[priv->msslevel + 1] == 0) {
2098 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "MTU too small");
2099 return EMSGSIZE;
2100 }
2101 /* !?! We need to break up all outstanding and pending packets
2102 and then retransmit!?! */
2103
2104 priv->mss = PACKET_MAXIMUMS[++priv->msslevel] - PACKET_OVERHEAD;
2105 // I added this... haven't researched actual formula
2106 priv->cwnd = 2 * priv->mss;
2107
2108 if (priv->mss < nTransmit) {
2109 nTransmit = priv->mss;
2110 break;
2111 }
2112 }
2113 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes ", priv->mss);
2114 }
2115
2116 if (nTransmit < segment->len) {
2117 SSegment *subseg = g_slice_new0 (SSegment);
2118 subseg->seq = segment->seq + nTransmit;
2119 subseg->len = segment->len - nTransmit;
2120 subseg->flags = segment->flags;
2121 subseg->xmit = segment->xmit;
2122
2123 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "mss reduced to %u", priv->mss);
2124
2125 segment->len = nTransmit;
2126 g_queue_insert_after (&priv->slist,
2127 g_queue_find (&priv->slist, segment), subseg);
2128 if (subseg->xmit == 0)
2129 g_queue_insert_after (&priv->unsent_slist,
2130 g_queue_find (&priv->unsent_slist, segment), subseg);
2131 }
2132
2133 if (segment->xmit == 0) {
2134 g_assert (g_queue_peek_head (&priv->unsent_slist) == segment);
2135 g_queue_pop_head (&priv->unsent_slist);
2136 priv->snd_nxt += segment->len;
2137
2138 /* FIN flags require acknowledgement. */
2139 if (segment->len == 0 && segment->flags & FLAG_FIN)
2140 priv->snd_nxt++;
2141 }
2142 segment->xmit += 1;
2143
2144 if (priv->rto_base == 0) {
2145 priv->rto_base = now;
2146 }
2147
2148 return 0;
2149 }
2150
2151 static void
attempt_send(PseudoTcpSocket * self,SendFlags sflags)2152 attempt_send(PseudoTcpSocket *self, SendFlags sflags)
2153 {
2154 PseudoTcpSocketPrivate *priv = self->priv;
2155 guint32 now = get_current_time (self);
2156 gboolean bFirst = TRUE;
2157
2158 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Attempting send with flags %u.", sflags);
2159
2160 if (time_diff(now, priv->lastsend) > (long) priv->rx_rto) {
2161 priv->cwnd = priv->mss;
2162 }
2163
2164
2165 while (TRUE) {
2166 guint32 cwnd;
2167 guint32 nWindow;
2168 guint32 nInFlight;
2169 guint32 nUseable;
2170 guint32 nAvailable;
2171 gsize snd_buffered;
2172 GList *iter;
2173 SSegment *sseg;
2174 int transmit_status;
2175
2176 cwnd = priv->cwnd;
2177 if ((priv->dup_acks == 1) || (priv->dup_acks == 2)) { // Limited Transmit
2178 cwnd += priv->dup_acks * priv->mss;
2179 }
2180 nWindow = min(priv->snd_wnd, cwnd);
2181 nInFlight = priv->snd_nxt - priv->snd_una;
2182 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
2183 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
2184 if (snd_buffered < nInFlight) /* iff a FIN has been sent */
2185 nAvailable = 0;
2186 else
2187 nAvailable = min(snd_buffered - nInFlight, priv->mss);
2188
2189 if (nAvailable > nUseable) {
2190 if (nUseable * 4 < nWindow) {
2191 // RFC 813 - avoid SWS
2192 nAvailable = 0;
2193 } else {
2194 nAvailable = nUseable;
2195 }
2196 }
2197
2198 if (bFirst) {
2199 gsize available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2200
2201 bFirst = FALSE;
2202 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "[cwnd: %u nWindow: %u nInFlight: %u "
2203 "nAvailable: %u nQueued: %" G_GSIZE_FORMAT " nEmpty: %" G_GSIZE_FORMAT
2204 " nWaiting: %zu ssthresh: %u]",
2205 priv->cwnd, nWindow, nInFlight, nAvailable, snd_buffered,
2206 available_space, snd_buffered - nInFlight, priv->ssthresh);
2207 }
2208
2209 if (sflags == sfDuplicateAck) {
2210 packet(self, priv->snd_nxt, 0, 0, 0, now);
2211 sflags = sfNone;
2212 continue;
2213 }
2214
2215 if (nAvailable == 0 && sflags != sfFin && sflags != sfRst) {
2216 if (sflags == sfNone)
2217 return;
2218
2219 // If this is an immediate ack, or the second delayed ack
2220 if ((sflags == sfImmediateAck || sflags == sfDuplicateAck) ||
2221 priv->t_ack) {
2222 packet(self, priv->snd_nxt, 0, 0, 0, now);
2223 } else {
2224 priv->t_ack = now;
2225 }
2226 return;
2227 }
2228
2229 // Nagle algorithm
2230 // If there is data already in-flight, and we haven't a full segment of
2231 // data ready to send then hold off until we get more to send, or the
2232 // in-flight data is acknowledged.
2233 if (priv->use_nagling && sflags != sfFin && sflags != sfRst &&
2234 (priv->snd_nxt > priv->snd_una) &&
2235 (nAvailable < priv->mss)) {
2236 return;
2237 }
2238
2239 // Find the next segment to transmit
2240 iter = g_queue_peek_head_link (&priv->unsent_slist);
2241 if (iter == NULL)
2242 return;
2243 sseg = iter->data;
2244
2245 // If the segment is too large, break it into two
2246 if (sseg->len > nAvailable && sflags != sfFin && sflags != sfRst) {
2247 SSegment *subseg = g_slice_new0 (SSegment);
2248 subseg->seq = sseg->seq + nAvailable;
2249 subseg->len = sseg->len - nAvailable;
2250 subseg->flags = sseg->flags;
2251
2252 sseg->len = nAvailable;
2253 g_queue_insert_after (&priv->unsent_slist, iter, subseg);
2254 g_queue_insert_after (&priv->slist, g_queue_find (&priv->slist, sseg),
2255 subseg);
2256 }
2257
2258 transmit_status = transmit(self, sseg, now);
2259 if (transmit_status != 0) {
2260 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "transmit failed");
2261
2262 // TODO: Is this the right thing ?
2263 closedown (self, transmit_status, CLOSEDOWN_REMOTE);
2264 return;
2265 }
2266
2267 if (sflags == sfImmediateAck || sflags == sfDelayedAck)
2268 sflags = sfNone;
2269 }
2270 }
2271
2272 /* If @source is %CLOSEDOWN_REMOTE, don’t send an RST packet, since closedown()
2273 * has been called as a result of an RST segment being received.
2274 * See: RFC 1122, §4.2.2.13. */
2275 static void
closedown(PseudoTcpSocket * self,guint32 err,ClosedownSource source)2276 closedown (PseudoTcpSocket *self, guint32 err, ClosedownSource source)
2277 {
2278 PseudoTcpSocketPrivate *priv = self->priv;
2279
2280 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing down socket %p with %s error %u.",
2281 self, (source == CLOSEDOWN_LOCAL) ? "local" : "remote", err);
2282
2283 if (source == CLOSEDOWN_LOCAL && priv->support_fin_ack) {
2284 queue_rst_message (self);
2285 attempt_send (self, sfRst);
2286 } else if (source == CLOSEDOWN_LOCAL) {
2287 priv->shutdown = SD_FORCEFUL;
2288 }
2289
2290 /* ‘Cute’ little navigation through the state machine to avoid breaking the
2291 * invariant that CLOSED can only be reached from TIME-WAIT or LAST-ACK. */
2292 switch (priv->state) {
2293 case PSEUDO_TCP_LISTEN:
2294 case PSEUDO_TCP_SYN_SENT:
2295 break;
2296 case PSEUDO_TCP_SYN_RECEIVED:
2297 case PSEUDO_TCP_ESTABLISHED:
2298 set_state (self, PSEUDO_TCP_FIN_WAIT_1);
2299 /* Fall through. */
2300 case PSEUDO_TCP_FIN_WAIT_1:
2301 set_state (self, PSEUDO_TCP_FIN_WAIT_2);
2302 /* Fall through. */
2303 case PSEUDO_TCP_FIN_WAIT_2:
2304 case PSEUDO_TCP_CLOSING:
2305 set_state (self, PSEUDO_TCP_TIME_WAIT);
2306 break;
2307 case PSEUDO_TCP_CLOSE_WAIT:
2308 set_state (self, PSEUDO_TCP_LAST_ACK);
2309 break;
2310 case PSEUDO_TCP_LAST_ACK:
2311 case PSEUDO_TCP_TIME_WAIT:
2312 case PSEUDO_TCP_CLOSED:
2313 default:
2314 break;
2315 }
2316
2317 set_state_closed (self, err);
2318 }
2319
2320 static void
adjustMTU(PseudoTcpSocket * self)2321 adjustMTU(PseudoTcpSocket *self)
2322 {
2323 PseudoTcpSocketPrivate *priv = self->priv;
2324
2325 // Determine our current mss level, so that we can adjust appropriately later
2326 for (priv->msslevel = 0;
2327 PACKET_MAXIMUMS[priv->msslevel + 1] > 0;
2328 ++priv->msslevel) {
2329 if (((guint16)PACKET_MAXIMUMS[priv->msslevel]) <= priv->mtu_advise) {
2330 break;
2331 }
2332 }
2333 priv->mss = priv->mtu_advise - PACKET_OVERHEAD;
2334 // !?! Should we reset priv->largest here?
2335 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes", priv->mss);
2336 // Enforce minimums on ssthresh and cwnd
2337 priv->ssthresh = max(priv->ssthresh, 2 * priv->mss);
2338 priv->cwnd = max(priv->cwnd, priv->mss);
2339 }
2340
2341 static void
apply_window_scale_option(PseudoTcpSocket * self,guint8 scale_factor)2342 apply_window_scale_option (PseudoTcpSocket *self, guint8 scale_factor)
2343 {
2344 PseudoTcpSocketPrivate *priv = self->priv;
2345
2346 priv->swnd_scale = scale_factor;
2347 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting scale factor to %u", scale_factor);
2348 }
2349
2350 static void
apply_fin_ack_option(PseudoTcpSocket * self)2351 apply_fin_ack_option (PseudoTcpSocket *self)
2352 {
2353 PseudoTcpSocketPrivate *priv = self->priv;
2354
2355 priv->support_fin_ack = TRUE;
2356 }
2357
2358 static void
apply_option(PseudoTcpSocket * self,guint8 kind,const guint8 * data,guint32 len)2359 apply_option (PseudoTcpSocket *self, guint8 kind, const guint8 *data,
2360 guint32 len)
2361 {
2362 switch (kind) {
2363 case TCP_OPT_MSS:
2364 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
2365 "Peer specified MSS option which is not supported.");
2366 // TODO: Implement.
2367 break;
2368 case TCP_OPT_WND_SCALE:
2369 // Window scale factor.
2370 // http://www.ietf.org/rfc/rfc1323.txt
2371 if (len != 1) {
2372 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid window scale option received.");
2373 return;
2374 }
2375 apply_window_scale_option(self, data[0]);
2376 break;
2377 case TCP_OPT_FIN_ACK:
2378 // FIN-ACK support.
2379 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN-ACK support enabled.");
2380 apply_fin_ack_option (self);
2381 break;
2382 case TCP_OPT_EOL:
2383 case TCP_OPT_NOOP:
2384 /* Nothing to do. */
2385 break;
2386 default:
2387 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid TCP option %u", kind);
2388 break;
2389 }
2390 }
2391
2392
2393 static void
parse_options(PseudoTcpSocket * self,const guint8 * data,guint32 len)2394 parse_options (PseudoTcpSocket *self, const guint8 *data, guint32 len)
2395 {
2396 PseudoTcpSocketPrivate *priv = self->priv;
2397 gboolean has_window_scaling_option = FALSE;
2398 gboolean has_fin_ack_option = FALSE;
2399 guint32 pos = 0;
2400
2401 // See http://www.freesoft.org/CIE/Course/Section4/8.htm for
2402 // parsing the options list.
2403 while (pos < len) {
2404 guint8 kind = TCP_OPT_EOL;
2405 guint8 opt_len;
2406
2407 if (len < pos + 1)
2408 return;
2409
2410 kind = data[pos];
2411 pos++;
2412
2413 if (kind == TCP_OPT_EOL) {
2414 // End of option list.
2415 break;
2416 } else if (kind == TCP_OPT_NOOP) {
2417 // No op.
2418 continue;
2419 }
2420
2421 if (len < pos + 1)
2422 return;
2423
2424 // Length of this option.
2425 opt_len = data[pos];
2426 pos++;
2427
2428 if (len < pos + opt_len)
2429 return;
2430
2431 // Content of this option.
2432 if (opt_len <= len - pos) {
2433 apply_option (self, kind, data + pos, opt_len);
2434 pos += opt_len;
2435 } else {
2436 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid option length received.");
2437 return;
2438 }
2439
2440 if (kind == TCP_OPT_WND_SCALE)
2441 has_window_scaling_option = TRUE;
2442 else if (kind == TCP_OPT_FIN_ACK)
2443 has_fin_ack_option = TRUE;
2444 }
2445
2446 if (!has_window_scaling_option) {
2447 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support window scaling");
2448 if (priv->rwnd_scale > 0) {
2449 // Peer doesn't support TCP options and window scaling.
2450 // Revert receive buffer size to default value.
2451 resize_receive_buffer (self, DEFAULT_RCV_BUF_SIZE);
2452 priv->swnd_scale = 0;
2453 }
2454 }
2455
2456 if (!has_fin_ack_option) {
2457 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support FIN-ACK");
2458 priv->support_fin_ack = FALSE;
2459 }
2460 }
2461
2462 static void
resize_send_buffer(PseudoTcpSocket * self,guint32 new_size)2463 resize_send_buffer (PseudoTcpSocket *self, guint32 new_size)
2464 {
2465 PseudoTcpSocketPrivate *priv = self->priv;
2466
2467 priv->sbuf_len = new_size;
2468 pseudo_tcp_fifo_set_capacity (&priv->sbuf, new_size);
2469 }
2470
2471
2472 static void
resize_receive_buffer(PseudoTcpSocket * self,guint32 new_size)2473 resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size)
2474 {
2475 PseudoTcpSocketPrivate *priv = self->priv;
2476 guint8 scale_factor = 0;
2477 gboolean result;
2478 gsize available_space;
2479
2480 if (priv->rbuf_len == new_size)
2481 return;
2482
2483 // Determine the scale factor such that the scaled window size can fit
2484 // in a 16-bit unsigned integer.
2485 while (new_size > 0xFFFF) {
2486 ++scale_factor;
2487 new_size >>= 1;
2488 }
2489
2490 // Determine the proper size of the buffer.
2491 new_size <<= scale_factor;
2492 result = pseudo_tcp_fifo_set_capacity (&priv->rbuf, new_size);
2493
2494 // Make sure the new buffer is large enough to contain data in the old
2495 // buffer. This should always be true because this method is called either
2496 // before connection is established or when peers are exchanging connect
2497 // messages.
2498 g_assert (result);
2499 priv->rbuf_len = new_size;
2500 priv->rwnd_scale = scale_factor;
2501 priv->ssthresh = new_size;
2502
2503 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
2504 priv->rcv_wnd = available_space;
2505 }
2506
2507 gint
pseudo_tcp_socket_get_available_bytes(PseudoTcpSocket * self)2508 pseudo_tcp_socket_get_available_bytes (PseudoTcpSocket *self)
2509 {
2510 PseudoTcpSocketPrivate *priv = self->priv;
2511
2512 return pseudo_tcp_fifo_get_buffered (&priv->rbuf);
2513 }
2514
2515 gboolean
pseudo_tcp_socket_can_send(PseudoTcpSocket * self)2516 pseudo_tcp_socket_can_send (PseudoTcpSocket *self)
2517 {
2518 return (pseudo_tcp_socket_get_available_send_space (self) > 0);
2519 }
2520
2521 gsize
pseudo_tcp_socket_get_available_send_space(PseudoTcpSocket * self)2522 pseudo_tcp_socket_get_available_send_space (PseudoTcpSocket *self)
2523 {
2524 PseudoTcpSocketPrivate *priv = self->priv;
2525 gsize ret;
2526
2527 if (!pseudo_tcp_state_has_sent_fin (priv->state)) {
2528 ret = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2529 } else {
2530 ret = 0;
2531 }
2532
2533 if (ret == 0)
2534 priv->bWriteEnable = TRUE;
2535
2536 return ret;
2537 }
2538
2539 /* State names are capitalised and formatted as in RFC 793. */
2540 static const gchar *
pseudo_tcp_state_get_name(PseudoTcpState state)2541 pseudo_tcp_state_get_name (PseudoTcpState state)
2542 {
2543 switch (state) {
2544 case PSEUDO_TCP_LISTEN: return "LISTEN";
2545 case PSEUDO_TCP_SYN_SENT: return "SYN-SENT";
2546 case PSEUDO_TCP_SYN_RECEIVED: return "SYN-RECEIVED";
2547 case PSEUDO_TCP_ESTABLISHED: return "ESTABLISHED";
2548 case PSEUDO_TCP_CLOSED: return "CLOSED";
2549 case PSEUDO_TCP_FIN_WAIT_1: return "FIN-WAIT-1";
2550 case PSEUDO_TCP_FIN_WAIT_2: return "FIN-WAIT-2";
2551 case PSEUDO_TCP_CLOSING: return "CLOSING";
2552 case PSEUDO_TCP_TIME_WAIT: return "TIME-WAIT";
2553 case PSEUDO_TCP_CLOSE_WAIT: return "CLOSE-WAIT";
2554 case PSEUDO_TCP_LAST_ACK: return "LAST-ACK";
2555 default: return "UNKNOWN";
2556 }
2557 }
2558
2559 static void
set_state(PseudoTcpSocket * self,PseudoTcpState new_state)2560 set_state (PseudoTcpSocket *self, PseudoTcpState new_state)
2561 {
2562 PseudoTcpSocketPrivate *priv = self->priv;
2563 PseudoTcpState old_state = priv->state;
2564
2565 if (new_state == old_state)
2566 return;
2567
2568 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State %s → %s.",
2569 pseudo_tcp_state_get_name (old_state),
2570 pseudo_tcp_state_get_name (new_state));
2571
2572 /* Check whether it’s a valid state transition. */
2573 #define TRANSITION(OLD, NEW) \
2574 (old_state == PSEUDO_TCP_##OLD && \
2575 new_state == PSEUDO_TCP_##NEW)
2576
2577 /* Valid transitions. See: RFC 793, p23; RFC 1122, §4.2.2.8. */
2578 g_assert (/* RFC 793, p23. */
2579 TRANSITION (CLOSED, SYN_SENT) ||
2580 TRANSITION (SYN_SENT, CLOSED) ||
2581 TRANSITION (CLOSED, LISTEN) ||
2582 TRANSITION (LISTEN, CLOSED) ||
2583 TRANSITION (LISTEN, SYN_SENT) ||
2584 TRANSITION (LISTEN, SYN_RECEIVED) ||
2585 TRANSITION (SYN_SENT, SYN_RECEIVED) ||
2586 TRANSITION (SYN_RECEIVED, ESTABLISHED) ||
2587 TRANSITION (SYN_SENT, ESTABLISHED) ||
2588 TRANSITION (SYN_RECEIVED, FIN_WAIT_1) ||
2589 TRANSITION (ESTABLISHED, FIN_WAIT_1) ||
2590 TRANSITION (ESTABLISHED, CLOSE_WAIT) ||
2591 TRANSITION (FIN_WAIT_1, FIN_WAIT_2) ||
2592 TRANSITION (FIN_WAIT_1, CLOSING) ||
2593 TRANSITION (CLOSE_WAIT, LAST_ACK) ||
2594 TRANSITION (FIN_WAIT_2, TIME_WAIT) ||
2595 TRANSITION (CLOSING, TIME_WAIT) ||
2596 TRANSITION (LAST_ACK, CLOSED) ||
2597 TRANSITION (TIME_WAIT, CLOSED) ||
2598 /* RFC 1122, §4.2.2.8. */
2599 TRANSITION (SYN_RECEIVED, LISTEN) ||
2600 TRANSITION (FIN_WAIT_1, TIME_WAIT));
2601
2602 #undef TRANSITION
2603
2604 priv->state = new_state;
2605 }
2606
2607 static void
set_state_established(PseudoTcpSocket * self)2608 set_state_established (PseudoTcpSocket *self)
2609 {
2610 PseudoTcpSocketPrivate *priv = self->priv;
2611
2612 set_state (self, PSEUDO_TCP_ESTABLISHED);
2613
2614 adjustMTU (self);
2615 if (priv->callbacks.PseudoTcpOpened)
2616 priv->callbacks.PseudoTcpOpened (self, priv->callbacks.user_data);
2617 }
2618
2619 /* (err == 0) means no error. */
2620 static void
set_state_closed(PseudoTcpSocket * self,guint32 err)2621 set_state_closed (PseudoTcpSocket *self, guint32 err)
2622 {
2623 PseudoTcpSocketPrivate *priv = self->priv;
2624
2625 set_state (self, PSEUDO_TCP_CLOSED);
2626
2627 /* Only call the callback if there was an error. */
2628 if (priv->callbacks.PseudoTcpClosed && err != 0)
2629 priv->callbacks.PseudoTcpClosed (self, err, priv->callbacks.user_data);
2630 }
2631
2632 gboolean
pseudo_tcp_socket_is_closed(PseudoTcpSocket * self)2633 pseudo_tcp_socket_is_closed (PseudoTcpSocket *self)
2634 {
2635 PseudoTcpSocketPrivate *priv = self->priv;
2636
2637 return (priv->state == PSEUDO_TCP_CLOSED);
2638 }
2639
2640 gboolean
pseudo_tcp_socket_is_closed_remotely(PseudoTcpSocket * self)2641 pseudo_tcp_socket_is_closed_remotely (PseudoTcpSocket *self)
2642 {
2643 PseudoTcpSocketPrivate *priv = self->priv;
2644
2645 return pseudo_tcp_state_has_received_fin (priv->state);
2646 }
2647