1 /* $KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $ */
2 /* $NetBSD: dccp_tcplike.c,v 1.3 2016/04/26 08:44:44 ozaki-r Exp $ */
3
4 /*
5 * Copyright (c) 2003 Magnus Erixzon
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 /*
32 * TCP-like congestion control for DCCP
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.3 2016/04/26 08:44:44 ozaki-r Exp $");
37
38 #ifdef _KERNEL_OPT
39 #include "opt_dccp.h"
40 #endif
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/domain.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/proc.h>
50 #include <sys/protosw.h>
51 #include <sys/signalvar.h>
52 #include <sys/socket.h>
53 #include <sys/socketvar.h>
54 #include <sys/mutex.h>
55 #include <sys/sysctl.h>
56 #include <sys/syslog.h>
57
58 #include <net/if.h>
59
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/ip.h>
63 #include <netinet/in_pcb.h>
64 #include <netinet/in_var.h>
65
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/ip_var.h>
69
70 #include <netinet/dccp.h>
71 #include <netinet/dccp_var.h>
72 #include <netinet/dccp_tcplike.h>
73
74 #define TCPLIKE_DEBUG(args) dccp_log args
75 #define MALLOC_DEBUG(args) log args
76 #define CWND_DEBUG(args) dccp_log args
77 #define ACKRATIO_DEBUG(args) dccp_log args
78 #define LOSS_DEBUG(args) dccp_log args
79 #define TIMEOUT_DEBUG(args) dccp_log args
80
81 #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
82 #define INP_INFO_LOCK_INIT(x,y)
83 #define INP_INFO_WLOCK(x)
84 #define INP_INFO_WUNLOCK(x)
85 #define INP_INFO_RLOCK(x)
86 #define INP_INFO_RUNLOCK(x)
87 #define INP_LOCK(x)
88 #define INP_UNLOCK(x)
89 #endif
90
91 /* Sender side */
92
93 void tcplike_rto_timeout(void *);
94 void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
95 void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
96 void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97 int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
98 int _cwndvector_size(struct tcplike_send_ccb *);
99 u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
100
101 void tcplike_send_term(void *);
102 void tcplike_recv_term(void *);
103
104 void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
105 u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
106
107 /* extern Ack Vector functions */
108 extern void dccp_use_ackvector(struct dccpcb *);
109 extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
110 extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
111 extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
112 extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
113
114 extern int dccp_get_option(char *, int, int, char *, int);
115 extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
116
117 /*
118 * RTO timer activated
119 */
120 void
tcplike_rto_timeout(void * ccb)121 tcplike_rto_timeout(void *ccb)
122 {
123 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
124 /*struct inpcb *inp;*/
125 int s;
126
127 mutex_enter(&(cb->mutex));
128
129 cb->ssthresh = cb->cwnd >>1;
130 cb->cwnd = 1; /* allowing 1 packet to be sent */
131 cb->outstanding = 0; /* is this correct? */
132 cb->rto_timer_callout = 0;
133 cb->rto = cb->rto << 1;
134 TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
135
136 cb->sample_rtt = 0;
137
138 cb->ack_last = 0;
139 cb->ack_miss = 0;
140
141 cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
142 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
143 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
144 (char *) &cb->rcvr_ackratio, 1);
145 cb->acked_in_win = 0;
146 cb->acked_windows = 0;
147 cb->oldcwnd_ts = cb->pcb->seq_snd;
148
149 LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
150 cb->cwnd, cb->outstanding));
151 mutex_exit(&(cb->mutex));
152
153 /* lock'n run dccp_output */
154 s = splnet();
155 INP_INFO_RLOCK(&dccpbinfo);
156 /*inp = cb->pcb->d_inpcb;*/
157 INP_LOCK(inp);
158 INP_INFO_RUNLOCK(&dccpbinfo);
159
160 dccp_output(cb->pcb, 1);
161
162 INP_UNLOCK(inp);
163 splx(s);
164 }
165
tcplike_rtt_sample(struct tcplike_send_ccb * cb,u_int16_t sample)166 void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
167 {
168 u_int16_t err;
169
170 if (cb->rtt == 0xffff) {
171 /* hmmmmm. */
172 cb->rtt = sample;
173 cb->rto = cb->rtt << 1;
174 return;
175 }
176
177 /* This is how the Linux implementation is doing it.. */
178 if (sample >= cb->rtt) {
179 err = sample - cb->rtt;
180 cb->rtt = cb->rtt + (err >> 3);
181 } else {
182 err = cb->rtt - sample;
183 cb->rtt = cb->rtt - (err >> 3);
184 }
185 cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
186 if (cb->rtt < TCPLIKE_MIN_RTT)
187 cb->rtt = TCPLIKE_MIN_RTT;
188 cb->rto = cb->rtt + (cb->rtt_d << 2);
189
190
191 /* 5 million ways to calculate RTT ...*/
192 #if 0
193 cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
194 if (cb->srtt < TCPLIKE_MIN_RTT)
195 cb->srtt = TCPLIKE_MIN_RTT;
196 cb->rto = cb->srtt << 1;
197 #endif
198
199 LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
200 }
201
202 /* Functions declared in struct dccp_cc_sw */
203
204 /*
205 * Initialises the sender side
206 * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
207 */
208 void *
tcplike_send_init(struct dccpcb * pcb)209 tcplike_send_init(struct dccpcb* pcb)
210 {
211 struct tcplike_send_ccb *cb;
212
213 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
214
215 cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
216 if (cb == 0) {
217 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
218 dccpstat.tcplikes_send_memerr++;
219 return 0;
220 }
221 memset(cb, 0, sizeof (struct tcplike_send_ccb));
222
223 /* init sender */
224 cb->pcb = pcb;
225
226 cb->cwnd = TCPLIKE_INITIAL_CWND;
227 cb->ssthresh = 0xafff; /* lim-> infinity */
228 cb->oldcwnd_ts = 0;
229 cb->outstanding = 0;
230 cb->rcvr_ackratio = 2; /* Ack Ratio */
231 cb->acked_in_win = 0;
232 cb->acked_windows = 0;
233
234 CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
235 cb->cwnd, cb->outstanding));
236 cb->rtt = 0xffff;
237 cb->rto = TIMEOUT_UBOUND;
238 callout_init(&cb->rto_timer, 0);
239 callout_init(&cb->free_timer, 0);
240 cb->rto_timer_callout = 0;
241 cb->rtt_d = 0;
242 cb->timestamp = 0;
243
244 cb->sample_rtt = 1;
245
246 cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
247 /* 1 bit per entry */
248 cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
249 if (cb->cwndvector == NULL) {
250 MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
251 /* What to do now? */
252 cb->cv_size = 0;
253 dccpstat.tcplikes_send_memerr++;
254 return 0;
255 }
256 memset(cb->cwndvector, 0, cb->cv_size / 8);
257 cb->cv_hs = cb->cv_ts = 0;
258 cb->cv_hp = cb->cwndvector;
259
260 cb->ack_last = 0;
261 cb->ack_miss = 0;
262
263 mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
264
265 TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
266 dccpstat.tcplikes_send_conn++;
267 return cb;
268 }
269
tcplike_send_term(void * ccb)270 void tcplike_send_term(void *ccb)
271 {
272 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
273 if (ccb == 0)
274 return;
275
276 mutex_destroy(&(cb->mutex));
277
278 free(cb, M_PCB);
279 TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
280 }
281
282 /*
283 * Free the sender side
284 * args: ccb - ccb of sender
285 */
286 void
tcplike_send_free(void * ccb)287 tcplike_send_free(void *ccb)
288 {
289 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
290
291 LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
292
293 if (ccb == 0)
294 return;
295
296 mutex_enter(&(cb->mutex));
297
298 free(cb->cwndvector, M_PCB);
299 cb->cv_hs = cb->cv_ts = 0;
300
301 /* untimeout any active timer */
302 if (cb->rto_timer_callout) {
303 TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
304 callout_stop(&cb->rto_timer);
305 cb->rto_timer_callout = 0;
306 }
307
308 mutex_exit(&(cb->mutex));
309
310 callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
311 }
312
313 /*
314 * Ask TCPlike wheter one can send a packet or not
315 * args: ccb - ccb block for current connection
316 * returns: 0 if ok, else <> 0.
317 */
318 int
tcplike_send_packet(void * ccb,long datasize)319 tcplike_send_packet(void *ccb, long datasize)
320 {
321 /* check if one can send here */
322 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
323 long ticks;
324 char feature[1];
325
326 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
327
328 if (datasize == 0) {
329 TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
330 return 1;
331 }
332
333 mutex_enter(&(cb->mutex));
334
335 if (cb->cwnd <= cb->outstanding) {
336 /* May not send. trigger RTO */
337 DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
338 if (!cb->rto_timer_callout) {
339 LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
340 ticks = (long)cb->rto;
341 callout_reset(&cb->rto_timer, ticks,
342 tcplike_rto_timeout, (void *)cb);
343 cb->rto_timer_callout = 1;
344 }
345 mutex_exit(&(cb->mutex));
346 return 0;
347 }
348
349 /* We're allowed to send */
350
351 feature[0] = 1;
352 if (cb->pcb->remote_ackvector == 0) {
353 ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
354 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
355 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
356 }
357
358 /* untimeout any active timer */
359 if (cb->rto_timer_callout) {
360 LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
361 callout_stop(&cb->rto_timer);
362 cb->rto_timer_callout = 0;
363 }
364
365 if (!cb->sample_rtt) {
366 struct timeval stamp;
367 microtime(&stamp);
368 cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
369 dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
370 /*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
371 cb->sample_rtt = 1;
372 }
373
374 mutex_exit(&(cb->mutex));
375 return 1;
376
377 }
378
379 /*
380 * Notify sender that a packet has been sent
381 * args: ccb - ccb block for current connection
382 * moreToSend - if there exists more packets to send
383 */
384 void
tcplike_send_packet_sent(void * ccb,int moreToSend,long datasize)385 tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
386 {
387 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
388
389 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
390
391 if (datasize == 0) {
392 TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
393 return;
394 }
395
396 mutex_enter(&(cb->mutex));
397
398 cb->outstanding++;
399 TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
400
401 /* stash the seqnr in cwndvector */
402 /* Dont do this if we're only sending an ACK ! */
403 _add_to_cwndvector(cb, cb->pcb->seq_snd);
404 CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
405
406 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
407 mutex_exit(&(cb->mutex));
408 }
409
410 /*
411 * Notify that an ack package was received
412 * args: ccb - ccb block for current connection
413 */
414 void
tcplike_send_packet_recv(void * ccb,char * options,int optlen)415 tcplike_send_packet_recv(void *ccb, char *options, int optlen)
416 {
417 dccp_seq acknum, lastok;
418 u_int16_t numlostpackets, avsize, i, prev_size;
419 u_int8_t length, state, numokpackets, ackratiocnt;
420 u_char av[10];
421 struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
422
423 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
424 mutex_enter(&(cb->mutex));
425
426 if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
427 u_int32_t echo, elapsed;
428
429 TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
430 bcopy(av, &echo, 4);
431 bcopy(av + 4, &elapsed, 4);
432
433 if (echo == cb->timestamp) {
434 struct timeval time;
435 u_int32_t c_stamp;
436 u_int16_t diff;
437
438 microtime(&time);
439 c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
440
441 diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
442 diff = (u_int16_t)(diff / 1000);
443 TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
444 echo, elapsed, diff));
445 tcplike_rtt_sample(cb, diff);
446 }
447 }
448
449 if (cb->pcb->ack_rcv == 0) {
450 /* There was no Ack. There is no spoon */
451
452 /* We'll clear the missingacks data here, since the other host
453 * is also sending data.
454 * I guess we could deal with this, using the NDP field in the
455 * header. Let's stick a *TODO* mark here for now.
456 * The missingacks mechanism will activate if other host goes to
457 * only sending DCCP-Ack packets.
458 */
459 cb->ack_last = 0;
460 cb->ack_miss = 0;
461 ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
462 mutex_exit(&(cb->mutex));
463 return;
464 }
465
466 cb->sample_rtt = 0;
467
468 /* check ackVector for lost packets. cmp with cv_list */
469 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
470 if (avsize == 0)
471 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
472
473 if (avsize > 0)
474 dccpstat.tcplikes_send_ackrecv++;
475
476 acknum = cb->pcb->ack_rcv;
477 numlostpackets = 0;
478 numokpackets = 0;
479 lastok = 0;
480 prev_size = _cwndvector_size(cb);
481
482 TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
483 if (avsize == 0)
484 _remove_from_cwndvector(cb, acknum);
485
486 for (i=0; i < avsize; i++) {
487 state = (av[i] & 0xc0) >> 6;
488 length = (av[i] & 0x3f) +1;
489 while (length > 0) {
490 if (state == 0) {
491 CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
492 numokpackets++;
493 lastok = acknum;
494 _remove_from_cwndvector(cb, acknum);
495 } else {
496 if (acknum > cb->oldcwnd_ts) {
497 LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
498 numlostpackets++;
499 dccpstat.tcplikes_send_reploss++;
500 }
501 }
502 acknum--;
503 length--;
504 }
505 }
506 if (lastok)
507 if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
508 LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
509 if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
510 numlostpackets++;
511 dccpstat.tcplikes_send_assloss++;
512 }
513 }
514
515 lastok = cb->cv_hs;
516 while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
517 lastok++;
518 if (lastok != cb->cv_hs)
519 _chop_cwndvector(cb, lastok);
520
521 cb->outstanding = _cwndvector_size(cb);
522 CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
523 if (prev_size == cb->outstanding) {
524 /* Nothing dropped from cwndvector */
525 mutex_exit(&(cb->mutex));
526 return;
527 }
528
529 cb->acked_in_win += numokpackets;
530
531 if (cb->cwnd < cb->ssthresh) {
532 /* Slow start */
533
534 if (numlostpackets > 0) {
535 /* Packet loss */
536 LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
537 cb->cwnd = cb->cwnd>>1;
538 if (cb->cwnd < 1)
539 cb->cwnd = 1;
540 cb->ssthresh = cb->cwnd;
541 cb->acked_in_win = 0;
542 cb->acked_windows = 0;
543 cb->oldcwnd_ts = cb->pcb->seq_snd;
544
545 } else {
546 cb->cwnd++;
547 }
548
549 } else if (cb->cwnd >= cb->ssthresh) {
550
551 if (numlostpackets > 0) {
552 /* Packet loss */
553 LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
554 cb->cwnd = cb->cwnd>>1;
555 if (cb->cwnd < 1)
556 cb->cwnd = 1;
557 cb->ssthresh = cb->cwnd;
558 cb->acked_in_win = 0;
559 cb->acked_windows = 0;
560 cb->oldcwnd_ts = cb->pcb->seq_snd;
561
562 } else if (cb->acked_in_win > cb->cwnd) {
563 cb->cwnd++;
564 }
565 }
566
567 /* Ok let's check if there are missing Ack packets */
568 ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
569 cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
570
571 if (cb->ack_last == 0) {
572 /* First received ack (or first after Data packet). Yey */
573 cb->ack_last = cb->pcb->seq_rcv;
574 cb->ack_miss = 0;
575 } else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
576 /* This is correct, non-congestion, in-order behaviour */
577 cb->ack_last = cb->pcb->seq_rcv;
578
579 } else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
580 /* Might be an Ack we've been missing */
581 /* This code has a flaw; If we miss 2 Ack packets, we only care
582 * about the older one. This means that the next-to-oldest one could
583 * be lost without any action beeing taken.
584 * Time will tell if that is going to be a Giant Problem(r)
585 */
586 if (cb->pcb->seq_rcv == cb->ack_miss) {
587 /* Yea it was. great */
588 cb->ack_miss = 0;
589 }
590
591 } else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
592 /* There is a jump in Ack seqnums.. */
593 cb->ack_miss = cb->ack_last + 1;
594 cb->ack_last = cb->pcb->seq_rcv;
595 }
596
597 if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
598 /* Alert! Alert! Ack packets are MIA.
599 * Decrease Ack Ratio
600 */
601 cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
602 if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
603 /* Constraint 2 */
604 cb->rcvr_ackratio = cb->cwnd>>1;
605 }
606 if (cb->rcvr_ackratio == 0)
607 cb->rcvr_ackratio = 1;
608 ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
609 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
610 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
611 (char *) &cb->rcvr_ackratio, 1);
612
613 cb->ack_miss = 0;
614 cb->acked_windows = 0;
615 cb->acked_in_win = 0;
616 dccpstat.tcplikes_send_missack++;
617
618 } else if (cb->acked_in_win > cb->cwnd) {
619 cb->acked_in_win = 0;
620 cb->acked_windows++;
621 if (cb->rcvr_ackratio == 1) {
622 /* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
623 * heavy congestion so we can increase it
624 */
625 cb->acked_windows = 0;
626 }
627 }
628
629 if (cb->acked_windows >= 1) {
630 ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
631 if (cb->acked_windows >= ackratiocnt) {
632 if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
633 /* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
634 cb->rcvr_ackratio--;
635 ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
636 dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
637 dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
638 (char *) &cb->rcvr_ackratio, 1);
639 }
640 cb->acked_in_win = 0;
641 cb->acked_windows = 0;
642 }
643 }
644
645 CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
646 cb->cwnd, cb->outstanding));
647
648 if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
649 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
650 cb->cwnd, cb->outstanding));
651 callout_stop(&cb->rto_timer);
652 cb->rto_timer_callout = 0;
653
654 mutex_exit(&(cb->mutex));
655 dccp_output(cb->pcb, 1);
656 return;
657 }
658 mutex_exit(&(cb->mutex));
659 }
660
661 int
_cwndvector_size(struct tcplike_send_ccb * cb)662 _cwndvector_size(struct tcplike_send_ccb *cb)
663 {
664 u_int64_t gap, offset, seqnr;
665 u_int32_t cnt;
666 u_char *t;
667
668 TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
669 cnt = 0;
670 for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
671 gap = seqnr - cb->cv_hs;
672
673 offset = gap % 8;
674 t = cb->cv_hp + (gap/8);
675 if (t >= (cb->cwndvector + (cb->cv_size/8)))
676 t -= (cb->cv_size / 8); /* wrapped */
677
678 if (((*t & (0x01 << offset)) >> offset) == 0x01)
679 cnt++;
680 }
681 return cnt;
682 }
683
684 u_char
_cwndvector_state(struct tcplike_send_ccb * cb,u_int64_t seqnr)685 _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
686 {
687 u_int64_t gap, offset;
688 u_char *t;
689
690 /* Check for wrapping */
691 if (seqnr >= cb->cv_hs) {
692 /* Not wrapped */
693 gap = seqnr - cb->cv_hs;
694 } else {
695 /* Wrapped XXXXX */
696 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
697 }
698
699 if (gap >= cb->cv_size) {
700 /* gap is bigger than cwndvector size? baaad */
701 return 0x01;
702 }
703
704 offset = gap % 8;
705 t = cb->cv_hp + (gap/8);
706 if (t >= (cb->cwndvector + (cb->cv_size/8)))
707 t -= (cb->cv_size / 8); /* wrapped */
708
709 return ((*t & (0x01 << offset)) >> offset);
710 }
711
712 void
_add_to_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)713 _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
714 {
715 u_int64_t offset, dc, gap;
716 u_char *t, *n;
717
718 TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
719
720 if (cb->cv_hs == cb->cv_ts) {
721 /* Empty cwndvector */
722 cb->cv_hs = cb->cv_ts = seqnr;
723 }
724
725 /* Check for wrapping */
726 if (seqnr >= cb->cv_hs) {
727 /* Not wrapped */
728 gap = seqnr - cb->cv_hs;
729 } else {
730 /* Wrapped */
731 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
732 }
733
734 if (gap >= cb->cv_size) {
735 /* gap is bigger than cwndvector size? baaad */
736 /* maybe we should increase the cwndvector here */
737 CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
738 gap, cb->cv_size, seqnr));
739 dccpstat.tcplikes_send_badseq++;
740 return;
741 }
742
743 offset = gap % 8; /* bit to mark */
744 t = cb->cv_hp + (gap/8);
745 if (t >= (cb->cwndvector + (cb->cv_size/8)))
746 t -= (cb->cv_size / 8); /* cwndvector wrapped */
747
748 *t = *t | (0x01 << offset); /* turn on bit */
749
750 cb->cv_ts = seqnr+1;
751 if (cb->cv_ts == 0x1000000000000LL)
752 cb->cv_ts = 0;
753
754 if (gap > (cb->cv_size - 128)) {
755 MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
756 n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
757 if (n == NULL) {
758 MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
759 dccpstat.tcplikes_send_memerr++;
760 return;
761 }
762 memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
763 dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
764 memcpy (n,cb->cv_hp, dc); /* tail to end */
765 memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
766 cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
767 free (cb->cwndvector, M_PCB);
768 cb->cv_hp = cb->cwndvector = n;
769 }
770 }
771
772 void
_remove_from_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)773 _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
774 {
775 u_int64_t offset;
776 int64_t gap;
777 u_char *t;
778
779 DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
780
781 if (cb->cv_hs == cb->cv_ts) {
782 /* Empty cwndvector */
783 return;
784 }
785
786 /* Check for wrapping */
787 if (seqnr >= cb->cv_hs) {
788 /* Not wrapped */
789 gap = seqnr - cb->cv_hs;
790 } else {
791 /* Wrapped */
792 gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
793 }
794
795 if (gap >= cb->cv_size) {
796 /* gap is bigger than cwndvector size. has already been chopped */
797 return;
798 }
799
800 offset = gap % 8; /* hi or low 2 bits to mark */
801 t = cb->cv_hp + (gap/8);
802 if (t >= (cb->cwndvector + (cb->cv_size/8)))
803 t -= (cb->cv_size / 8); /* cwndvector wrapped */
804
805 *t = *t & (~(0x01 << offset)); /* turn off bits */
806 }
807
808 int
_chop_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)809 _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
810 {
811 int64_t gap, bytegap;
812 u_char *t;
813
814 CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
815
816 if (cb->cv_hs == cb->cv_ts)
817 return 0;
818
819 if (seqnr > cb->cv_hs) {
820 gap = seqnr - cb->cv_hs;
821 } else {
822 /* We received obsolete information */
823 return 0;
824 }
825
826 bytegap = gap/8;
827 if (bytegap == 0)
828 return 0;
829
830 t = cb->cv_hp + bytegap;
831 if (t >= (cb->cwndvector + (cb->cv_size/8)))
832 t -= (cb->cv_size / 8); /* ackvector wrapped */
833 cb->cv_hp = t;
834 cb->cv_hs += bytegap*8;
835 return 1;
836 }
837
838
839 /* Receiver side */
840
841
842 /* Functions declared in struct dccp_cc_sw */
843
844 /* Initialises the receiver side
845 * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
846 */
847 void *
tcplike_recv_init(struct dccpcb * pcb)848 tcplike_recv_init(struct dccpcb *pcb)
849 {
850 struct tcplike_recv_ccb *ccb;
851
852 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
853
854 ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
855 if (ccb == 0) {
856 TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
857 dccpstat.tcplikes_recv_memerr++;
858 return 0;
859 }
860
861 memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
862
863 ccb->pcb = pcb;
864 ccb->unacked = 0;
865 ccb->pcb->ack_ratio = 2;
866
867 ccb->pcb->remote_ackvector = 1;
868 dccp_use_ackvector(ccb->pcb);
869
870 callout_init(&ccb->free_timer, 0);
871
872 mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
873
874 TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
875 dccpstat.tcplikes_recv_conn++;
876 return ccb;
877 }
878
tcplike_recv_term(void * ccb)879 void tcplike_recv_term(void *ccb)
880 {
881 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
882 if (ccb == 0)
883 return;
884
885 mutex_destroy(&(cb->mutex));
886 free(cb, M_PCB);
887 TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
888 }
889
890 /* Free the receiver side
891 * args: ccb - ccb of recevier
892 */
893 void
tcplike_recv_free(void * ccb)894 tcplike_recv_free(void *ccb)
895 {
896 struct ack_list *a;
897 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
898
899 LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
900
901 if (ccb == 0)
902 return;
903
904 mutex_enter(&(cb->mutex));
905
906 a = cb->av_list;
907 while (a) {
908 cb->av_list = a->next;
909 free(a, M_TEMP);
910 a = cb->av_list;
911 }
912
913 cb->pcb->av_size = 0;
914 free(cb->pcb->ackvector, M_PCB);
915
916 mutex_exit(&(cb->mutex));
917 callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
918 }
919
920 /*
921 * Tell TCPlike that a packet has been received
922 * args: ccb - ccb block for current connection
923 */
924 void
tcplike_recv_packet_recv(void * ccb,char * options,int optlen)925 tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
926 {
927 struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
928 u_char ackvector[16];
929 u_int16_t avsize;
930 u_char av_rcv[10];
931
932 TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
933
934 mutex_enter(&(cb->mutex));
935
936 if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
937 cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
938 dccpstat.tcplikes_recv_datarecv++;
939
940 /* Grab Ack Vector 0 or 1 */
941 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
942 if (avsize == 0)
943 avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
944
945 /* We are only interested in acks-on-acks here.
946 * The "real" ack handling is done be the sender */
947 if (avsize == 0 && cb->pcb->ack_rcv) {
948 u_int64_t ackthru;
949 /* We got an Ack without an ackvector.
950 * This would mean it's an ack on an ack.
951 */
952 ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
953 ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
954 if (ackthru) {
955 dccp_update_ackvector(cb->pcb, ackthru);
956 dccpstat.tcplikes_recv_ackack++;
957 }
958 } else if (avsize > 0 && cb->pcb->ack_rcv) {
959 /* We received an AckVector */
960 u_int32_t acknum, ackthru;
961 int i;
962 ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
963 /* gotta loop through the ackvector */
964 acknum = cb->pcb->ack_rcv;
965 for (i=0; i<avsize; i++) {
966 u_int8_t state, len;
967 state = (av_rcv[i] & 0xc0) >> 6;
968 len = (av_rcv[i] & 0x2f) + 1;
969 if (state != 0) {
970 /* Drops in ackvector! Will be noted and taken care of by the sender part */
971 ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
972 continue;
973 }
974
975 while (len>0) {
976 ackthru = _avlist_get(cb, acknum);
977 ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
978 if (ackthru) {
979 dccp_update_ackvector(cb->pcb, ackthru);
980 dccpstat.tcplikes_recv_ackack++;
981 }
982 acknum--;
983 len--;
984 }
985 }
986 }
987
988 ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
989 dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
990 cb->unacked++;
991
992 if (cb->unacked >= cb->pcb->ack_ratio) {
993 /* Time to send an Ack */
994
995 avsize = dccp_generate_ackvector(cb->pcb, ackvector);
996 TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
997 cb->unacked = 0;
998 if (avsize > 0) {
999 dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
1000 cb->pcb->ack_snd = cb->pcb->seq_rcv;
1001 _avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1002 ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1003 dccpstat.tcplikes_recv_acksent++;
1004 dccp_output(cb->pcb, 1);
1005 }
1006 }
1007 mutex_exit(&(cb->mutex));
1008 }
1009
1010 void
_avlist_add(struct tcplike_recv_ccb * cb,u_int64_t localseq,u_int64_t ackthru)1011 _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1012 {
1013 struct ack_list *a;
1014 ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1015 /*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1016 a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1017 if (a == NULL) {
1018 MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1019 dccpstat.tcplikes_recv_memerr++;
1020 return;
1021 }
1022 memset(a, 0, sizeof(struct ack_list));
1023 a->localseq = localseq;
1024 a->ackthru = ackthru;
1025 a->next = cb->av_list;
1026 cb->av_list = a;
1027 }
1028
1029 /*
1030 * Searches the av_list. if 'localseq' found, drop it from list and return
1031 * ackthru
1032 */
1033 u_int64_t
_avlist_get(struct tcplike_recv_ccb * cb,u_int64_t localseq)1034 _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1035 {
1036 struct ack_list *a, *n, *p;
1037 u_int64_t ackthru;
1038
1039 ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1040 a = cb->av_list;
1041 p = 0;
1042 while (a) {
1043 n = a->next;
1044 if (a->localseq == localseq) {
1045 if (p)
1046 p->next = n;
1047 else
1048 cb->av_list = n;
1049 ackthru = a->ackthru;
1050 /*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1051 free(a, M_TEMP);
1052 return ackthru;
1053 }
1054 p = a;
1055 a = n;
1056 }
1057 /* Not found. return 0 */
1058 return 0;
1059 }
1060
1061 /*
1062 int tcplike_option_recv(void);
1063 */
1064