xref: /netbsd/sys/netinet/dccp_tcplike.c (revision 2d52435a)
1abc2ac13Srjs /*	$KAME: dccp_tcplike.c,v 1.19 2005/07/27 06:27:25 nishida Exp $	*/
2*2d52435aSandvar /*	$NetBSD: dccp_tcplike.c,v 1.5 2022/05/22 11:27:36 andvar Exp $ */
3abc2ac13Srjs 
4abc2ac13Srjs /*
5abc2ac13Srjs  * Copyright (c) 2003 Magnus Erixzon
6abc2ac13Srjs  * All rights reserved.
7abc2ac13Srjs  *
8abc2ac13Srjs  * Redistribution and use in source and binary forms, with or without
9abc2ac13Srjs  * modification, are permitted provided that the following conditions
10abc2ac13Srjs  * are met:
11abc2ac13Srjs  *
12abc2ac13Srjs  * 1. Redistributions of source code must retain the above copyright
13abc2ac13Srjs  *    notice, this list of conditions and the following disclaimer.
14abc2ac13Srjs  * 2. Redistributions in binary form must reproduce the above copyright
15abc2ac13Srjs  *    notice, this list of conditions and the following disclaimer in the
16abc2ac13Srjs  *    documentation and/or other materials provided with the distribution.
17abc2ac13Srjs  * 3. The name of the author may not be used to endorse or promote products
18abc2ac13Srjs  *    derived from this software without specific prior written permission.
19abc2ac13Srjs  *
20abc2ac13Srjs  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21abc2ac13Srjs  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22abc2ac13Srjs  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23abc2ac13Srjs  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24abc2ac13Srjs  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25abc2ac13Srjs  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26abc2ac13Srjs  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27abc2ac13Srjs  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28abc2ac13Srjs  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29abc2ac13Srjs  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30abc2ac13Srjs  */
31abc2ac13Srjs /*
32abc2ac13Srjs  * TCP-like congestion control for DCCP
33abc2ac13Srjs  */
34abc2ac13Srjs 
35abc2ac13Srjs #include <sys/cdefs.h>
36*2d52435aSandvar __KERNEL_RCSID(0, "$NetBSD: dccp_tcplike.c,v 1.5 2022/05/22 11:27:36 andvar Exp $");
37abc2ac13Srjs 
38082aeae6Spooka #ifdef _KERNEL_OPT
39abc2ac13Srjs #include "opt_dccp.h"
40082aeae6Spooka #endif
41abc2ac13Srjs 
42abc2ac13Srjs #include <sys/param.h>
43abc2ac13Srjs #include <sys/systm.h>
44abc2ac13Srjs #include <sys/domain.h>
45abc2ac13Srjs #include <sys/kernel.h>
46abc2ac13Srjs #include <sys/lock.h>
47abc2ac13Srjs #include <sys/malloc.h>
48abc2ac13Srjs #include <sys/mbuf.h>
49abc2ac13Srjs #include <sys/proc.h>
50abc2ac13Srjs #include <sys/protosw.h>
51abc2ac13Srjs #include <sys/signalvar.h>
52abc2ac13Srjs #include <sys/socket.h>
53abc2ac13Srjs #include <sys/socketvar.h>
54abc2ac13Srjs #include <sys/mutex.h>
55abc2ac13Srjs #include <sys/sysctl.h>
56abc2ac13Srjs #include <sys/syslog.h>
57abc2ac13Srjs 
58abc2ac13Srjs #include <net/if.h>
59abc2ac13Srjs 
60abc2ac13Srjs #include <netinet/in.h>
61abc2ac13Srjs #include <netinet/in_systm.h>
62abc2ac13Srjs #include <netinet/ip.h>
63abc2ac13Srjs #include <netinet/in_pcb.h>
64abc2ac13Srjs #include <netinet/in_var.h>
65abc2ac13Srjs 
66abc2ac13Srjs #include <netinet/ip_icmp.h>
67abc2ac13Srjs #include <netinet/icmp_var.h>
68abc2ac13Srjs #include <netinet/ip_var.h>
69abc2ac13Srjs 
70abc2ac13Srjs #include <netinet/dccp.h>
71abc2ac13Srjs #include <netinet/dccp_var.h>
72abc2ac13Srjs #include <netinet/dccp_tcplike.h>
73abc2ac13Srjs 
74abc2ac13Srjs #define TCPLIKE_DEBUG(args) dccp_log args
75abc2ac13Srjs #define MALLOC_DEBUG(args) log args
76abc2ac13Srjs #define CWND_DEBUG(args) dccp_log args
77abc2ac13Srjs #define ACKRATIO_DEBUG(args) dccp_log args
78abc2ac13Srjs #define LOSS_DEBUG(args) dccp_log args
79abc2ac13Srjs #define TIMEOUT_DEBUG(args) dccp_log args
80abc2ac13Srjs 
81abc2ac13Srjs #if !defined(__FreeBSD__) || __FreeBSD_version < 500000
82abc2ac13Srjs #define	INP_INFO_LOCK_INIT(x,y)
83abc2ac13Srjs #define	INP_INFO_WLOCK(x)
84abc2ac13Srjs #define INP_INFO_WUNLOCK(x)
85abc2ac13Srjs #define	INP_INFO_RLOCK(x)
86abc2ac13Srjs #define INP_INFO_RUNLOCK(x)
87abc2ac13Srjs #define	INP_LOCK(x)
88abc2ac13Srjs #define INP_UNLOCK(x)
89abc2ac13Srjs #endif
90abc2ac13Srjs 
91abc2ac13Srjs /* Sender side */
92abc2ac13Srjs 
93abc2ac13Srjs void tcplike_rto_timeout(void *);
94abc2ac13Srjs void tcplike_rtt_sample(struct tcplike_send_ccb *, u_int16_t);
95abc2ac13Srjs void _add_to_cwndvector(struct tcplike_send_ccb *, u_int64_t);
96abc2ac13Srjs void _remove_from_cwndvector(struct tcplike_send_ccb *, u_int64_t);
97abc2ac13Srjs int _chop_cwndvector(struct tcplike_send_ccb *, u_int64_t);
98abc2ac13Srjs int _cwndvector_size(struct tcplike_send_ccb *);
99abc2ac13Srjs u_char _cwndvector_state(struct tcplike_send_ccb *, u_int64_t);
100abc2ac13Srjs 
101abc2ac13Srjs void tcplike_send_term(void *);
102abc2ac13Srjs void tcplike_recv_term(void *);
103abc2ac13Srjs 
104abc2ac13Srjs void _avlist_add(struct tcplike_recv_ccb *, u_int64_t, u_int64_t);
105abc2ac13Srjs u_int64_t _avlist_get(struct tcplike_recv_ccb *, u_int64_t);
106abc2ac13Srjs 
107abc2ac13Srjs /* extern Ack Vector functions */
108abc2ac13Srjs extern void dccp_use_ackvector(struct dccpcb *);
109abc2ac13Srjs extern void dccp_update_ackvector(struct dccpcb *, u_int64_t);
110abc2ac13Srjs extern void dccp_increment_ackvector(struct dccpcb *, u_int64_t);
111abc2ac13Srjs extern u_int16_t dccp_generate_ackvector(struct dccpcb *, u_char *);
112abc2ac13Srjs extern u_char dccp_ackvector_state(struct dccpcb *, u_int32_t);
113abc2ac13Srjs 
114abc2ac13Srjs extern int dccp_get_option(char *, int, int, char *, int);
115abc2ac13Srjs extern int dccp_remove_feature(struct dccpcb *, u_int8_t, u_int8_t);
116abc2ac13Srjs 
117abc2ac13Srjs /*
118abc2ac13Srjs  * RTO timer activated
119abc2ac13Srjs  */
120abc2ac13Srjs void
tcplike_rto_timeout(void * ccb)121abc2ac13Srjs tcplike_rto_timeout(void *ccb)
122abc2ac13Srjs {
123abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
124abc2ac13Srjs 	/*struct inpcb *inp;*/
125abc2ac13Srjs 	int s;
126abc2ac13Srjs 
127abc2ac13Srjs 	mutex_enter(&(cb->mutex));
128abc2ac13Srjs 
129abc2ac13Srjs 	cb->ssthresh = cb->cwnd >>1;
130abc2ac13Srjs 	cb->cwnd = 1; /* allowing 1 packet to be sent */
131abc2ac13Srjs 	cb->outstanding = 0; /* is this correct? */
132abc2ac13Srjs 	cb->rto_timer_callout = 0;
133abc2ac13Srjs 	cb->rto = cb->rto << 1;
134abc2ac13Srjs 	TIMEOUT_DEBUG((LOG_INFO, "RTO Timeout. New RTO = %u\n", cb->rto));
135abc2ac13Srjs 
136abc2ac13Srjs 	cb->sample_rtt = 0;
137abc2ac13Srjs 
138abc2ac13Srjs 	cb->ack_last = 0;
139abc2ac13Srjs 	cb->ack_miss = 0;
140abc2ac13Srjs 
141abc2ac13Srjs 	cb->rcvr_ackratio = 1; /* Constraint 2 & 3. We need ACKs asap */
142abc2ac13Srjs 	dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
143abc2ac13Srjs 	dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
144abc2ac13Srjs 				 (char *) &cb->rcvr_ackratio, 1);
145abc2ac13Srjs 	cb->acked_in_win = 0;
146abc2ac13Srjs 	cb->acked_windows = 0;
147abc2ac13Srjs 	cb->oldcwnd_ts = cb->pcb->seq_snd;
148abc2ac13Srjs 
149abc2ac13Srjs 	LOSS_DEBUG((LOG_INFO, "Timeout. CWND value: %u , OUTSTANDING value: %u\n",
150abc2ac13Srjs 	    cb->cwnd, cb->outstanding));
151abc2ac13Srjs 	mutex_exit(&(cb->mutex));
152abc2ac13Srjs 
153abc2ac13Srjs 	/* lock'n run dccp_output */
154abc2ac13Srjs 	s = splnet();
155abc2ac13Srjs 	INP_INFO_RLOCK(&dccpbinfo);
156abc2ac13Srjs 	/*inp = cb->pcb->d_inpcb;*/
157abc2ac13Srjs 	INP_LOCK(inp);
158abc2ac13Srjs 	INP_INFO_RUNLOCK(&dccpbinfo);
159abc2ac13Srjs 
160abc2ac13Srjs 	dccp_output(cb->pcb, 1);
161abc2ac13Srjs 
162abc2ac13Srjs 	INP_UNLOCK(inp);
163abc2ac13Srjs 	splx(s);
164abc2ac13Srjs }
165abc2ac13Srjs 
tcplike_rtt_sample(struct tcplike_send_ccb * cb,u_int16_t sample)166abc2ac13Srjs void tcplike_rtt_sample(struct tcplike_send_ccb *cb, u_int16_t sample)
167abc2ac13Srjs {
168abc2ac13Srjs 	u_int16_t err;
169abc2ac13Srjs 
170abc2ac13Srjs 	if (cb->rtt == 0xffff) {
171abc2ac13Srjs 		/* hmmmmm. */
172abc2ac13Srjs 		cb->rtt = sample;
173abc2ac13Srjs 		cb->rto = cb->rtt << 1;
174abc2ac13Srjs 		return;
175abc2ac13Srjs 	}
176abc2ac13Srjs 
177abc2ac13Srjs 	/* This is how the Linux implementation is doing it.. */
178abc2ac13Srjs 	if (sample >= cb->rtt) {
179abc2ac13Srjs 		err = sample - cb->rtt;
180abc2ac13Srjs 		cb->rtt = cb->rtt + (err >> 3);
181abc2ac13Srjs 	} else {
182abc2ac13Srjs 		err = cb->rtt - sample;
183abc2ac13Srjs 		cb->rtt = cb->rtt - (err >> 3);
184abc2ac13Srjs 	}
185abc2ac13Srjs 	cb->rtt_d = cb->rtt_d + ((err - cb->rtt_d) >> 2);
186abc2ac13Srjs 	if (cb->rtt < TCPLIKE_MIN_RTT)
187abc2ac13Srjs 		cb->rtt = TCPLIKE_MIN_RTT;
188abc2ac13Srjs 	cb->rto = cb->rtt + (cb->rtt_d << 2);
189abc2ac13Srjs 
190abc2ac13Srjs 
191abc2ac13Srjs 	/* 5 million ways to calculate RTT ...*/
192abc2ac13Srjs #if 0
193abc2ac13Srjs 	cb->srtt = ( 0.8 * cb->srtt ) + (0.2 * sample);
194abc2ac13Srjs 	if (cb->srtt < TCPLIKE_MIN_RTT)
195abc2ac13Srjs 		cb->srtt = TCPLIKE_MIN_RTT;
196abc2ac13Srjs 	cb->rto = cb->srtt << 1;
197abc2ac13Srjs #endif
198abc2ac13Srjs 
199abc2ac13Srjs 	LOSS_DEBUG((LOG_INFO, "RTT Sample: %u , New RTO: %u\n", sample, cb->rto));
200abc2ac13Srjs }
201abc2ac13Srjs 
202abc2ac13Srjs /* Functions declared in struct dccp_cc_sw */
203abc2ac13Srjs 
204abc2ac13Srjs /*
205abc2ac13Srjs  * Initialises the sender side
206abc2ac13Srjs  * returns: pointer to a tfrc_send_ccb struct on success, otherwise 0
207abc2ac13Srjs  */
208abc2ac13Srjs void *
tcplike_send_init(struct dccpcb * pcb)209abc2ac13Srjs tcplike_send_init(struct dccpcb* pcb)
210abc2ac13Srjs {
211abc2ac13Srjs 	struct tcplike_send_ccb *cb;
212abc2ac13Srjs 
213abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_init()\n"));
214abc2ac13Srjs 
215abc2ac13Srjs 	cb = malloc(sizeof (struct tcplike_send_ccb), M_PCB, M_NOWAIT | M_ZERO);
216abc2ac13Srjs 	if (cb == 0) {
217abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_send_ccb!\n"));
218abc2ac13Srjs 		dccpstat.tcplikes_send_memerr++;
219abc2ac13Srjs 		return 0;
220abc2ac13Srjs 	}
221abc2ac13Srjs 	memset(cb, 0, sizeof (struct tcplike_send_ccb));
222abc2ac13Srjs 
223abc2ac13Srjs 	/* init sender */
224abc2ac13Srjs 	cb->pcb = pcb;
225abc2ac13Srjs 
226abc2ac13Srjs 	cb->cwnd = TCPLIKE_INITIAL_CWND;
227abc2ac13Srjs 	cb->ssthresh = 0xafff; /* lim-> infinity */
228abc2ac13Srjs 	cb->oldcwnd_ts = 0;
229abc2ac13Srjs 	cb->outstanding = 0;
230abc2ac13Srjs 	cb->rcvr_ackratio = 2; /* Ack Ratio */
231abc2ac13Srjs 	cb->acked_in_win = 0;
232abc2ac13Srjs 	cb->acked_windows = 0;
233abc2ac13Srjs 
234abc2ac13Srjs 	CWND_DEBUG((LOG_INFO, "Init. CWND value: %u , OUTSTANDING value: %u\n",
235abc2ac13Srjs 		    cb->cwnd, cb->outstanding));
236abc2ac13Srjs 	cb->rtt = 0xffff;
237abc2ac13Srjs 	cb->rto = TIMEOUT_UBOUND;
238abc2ac13Srjs 	callout_init(&cb->rto_timer, 0);
239abc2ac13Srjs 	callout_init(&cb->free_timer, 0);
240abc2ac13Srjs 	cb->rto_timer_callout = 0;
241abc2ac13Srjs 	cb->rtt_d = 0;
242abc2ac13Srjs 	cb->timestamp = 0;
243abc2ac13Srjs 
244abc2ac13Srjs 	cb->sample_rtt = 1;
245abc2ac13Srjs 
246abc2ac13Srjs 	cb->cv_size = TCPLIKE_INITIAL_CWNDVECTOR;
247abc2ac13Srjs 	/* 1 bit per entry */
248abc2ac13Srjs 	cb->cwndvector = malloc(cb->cv_size / 8, M_PCB, M_NOWAIT | M_ZERO);
249abc2ac13Srjs 	if (cb->cwndvector == NULL) {
250abc2ac13Srjs 		MALLOC_DEBUG((LOG_INFO, "Unable to allocate memory for cwndvector\n"));
251abc2ac13Srjs 		/* What to do now? */
252abc2ac13Srjs 		cb->cv_size = 0;
253abc2ac13Srjs 		dccpstat.tcplikes_send_memerr++;
254abc2ac13Srjs 		return 0;
255abc2ac13Srjs 	}
256abc2ac13Srjs 	memset(cb->cwndvector, 0, cb->cv_size / 8);
257abc2ac13Srjs 	cb->cv_hs = cb->cv_ts = 0;
258abc2ac13Srjs 	cb->cv_hp = cb->cwndvector;
259abc2ac13Srjs 
260abc2ac13Srjs 	cb->ack_last = 0;
261abc2ac13Srjs 	cb->ack_miss = 0;
262abc2ac13Srjs 
263abc2ac13Srjs 	mutex_init(&(cb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
264abc2ac13Srjs 
265abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "TCPlike sender initialised!\n"));
266abc2ac13Srjs 	dccpstat.tcplikes_send_conn++;
267abc2ac13Srjs 	return cb;
268abc2ac13Srjs }
269abc2ac13Srjs 
tcplike_send_term(void * ccb)270abc2ac13Srjs void tcplike_send_term(void *ccb)
271abc2ac13Srjs {
272abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
273abc2ac13Srjs 	if (ccb == 0)
274abc2ac13Srjs 		return;
275abc2ac13Srjs 
276abc2ac13Srjs 	mutex_destroy(&(cb->mutex));
277abc2ac13Srjs 
278abc2ac13Srjs 	free(cb, M_PCB);
279abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "TCP-like sender is destroyed\n"));
280abc2ac13Srjs }
281abc2ac13Srjs 
282abc2ac13Srjs /*
283abc2ac13Srjs  * Free the sender side
284abc2ac13Srjs  * args: ccb - ccb of sender
285abc2ac13Srjs  */
286abc2ac13Srjs void
tcplike_send_free(void * ccb)287abc2ac13Srjs tcplike_send_free(void *ccb)
288abc2ac13Srjs {
289abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
290abc2ac13Srjs 
291abc2ac13Srjs 	LOSS_DEBUG((LOG_INFO, "Entering tcplike_send_free()\n"));
292abc2ac13Srjs 
293abc2ac13Srjs 	if (ccb == 0)
294abc2ac13Srjs 		return;
295abc2ac13Srjs 
296abc2ac13Srjs 	mutex_enter(&(cb->mutex));
297abc2ac13Srjs 
298abc2ac13Srjs 	free(cb->cwndvector, M_PCB);
299abc2ac13Srjs 	cb->cv_hs = cb->cv_ts = 0;
300abc2ac13Srjs 
301abc2ac13Srjs 	/* untimeout any active timer */
302abc2ac13Srjs 	if (cb->rto_timer_callout) {
303abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
304abc2ac13Srjs 		callout_stop(&cb->rto_timer);
305abc2ac13Srjs 		cb->rto_timer_callout = 0;
306abc2ac13Srjs 	}
307abc2ac13Srjs 
308abc2ac13Srjs 	mutex_exit(&(cb->mutex));
309abc2ac13Srjs 
310abc2ac13Srjs 	callout_reset(&cb->free_timer, 10 * hz, tcplike_send_term, (void *)cb);
311abc2ac13Srjs }
312abc2ac13Srjs 
313abc2ac13Srjs /*
314abc2ac13Srjs  * Ask TCPlike wheter one can send a packet or not
315abc2ac13Srjs  * args: ccb  -  ccb block for current connection
316abc2ac13Srjs  * returns: 0 if ok, else <> 0.
317abc2ac13Srjs  */
318abc2ac13Srjs int
tcplike_send_packet(void * ccb,long datasize)319abc2ac13Srjs tcplike_send_packet(void *ccb, long datasize)
320abc2ac13Srjs {
321abc2ac13Srjs 	/* check if one can send here */
322abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
323abc2ac13Srjs 	long ticks;
324abc2ac13Srjs 	char feature[1];
325abc2ac13Srjs 
326abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet()\n"));
327abc2ac13Srjs 
328abc2ac13Srjs 	if (datasize == 0) {
329abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Sending pure ACK. Dont care about CC right now\n"));
330abc2ac13Srjs 		return 1;
331abc2ac13Srjs 	}
332abc2ac13Srjs 
333abc2ac13Srjs 	mutex_enter(&(cb->mutex));
334abc2ac13Srjs 
335abc2ac13Srjs 	if (cb->cwnd <= cb->outstanding) {
336abc2ac13Srjs 		/* May not send. trigger RTO */
337abc2ac13Srjs 		DCCP_DEBUG((LOG_INFO, "cwnd (%d) < outstanding (%d)\n", cb->cwnd, cb->outstanding));
338abc2ac13Srjs 		if (!cb->rto_timer_callout) {
339abc2ac13Srjs 			LOSS_DEBUG((LOG_INFO, "Trigger TCPlike RTO timeout timer. Ticks = %u\n", cb->rto));
340abc2ac13Srjs 			ticks = (long)cb->rto;
341abc2ac13Srjs 			callout_reset(&cb->rto_timer, ticks,
342abc2ac13Srjs 			    tcplike_rto_timeout, (void *)cb);
343abc2ac13Srjs 			cb->rto_timer_callout = 1;
344abc2ac13Srjs 		}
345abc2ac13Srjs 		mutex_exit(&(cb->mutex));
346abc2ac13Srjs 		return 0;
347abc2ac13Srjs 	}
348abc2ac13Srjs 
349abc2ac13Srjs 	/* We're allowed to send */
350abc2ac13Srjs 
351abc2ac13Srjs 	feature[0] = 1;
352abc2ac13Srjs 	if (cb->pcb->remote_ackvector == 0) {
353abc2ac13Srjs 		ACK_DEBUG((LOG_INFO, "Adding Change(Use Ack Vector, 1) to outgoing packet\n"));
354abc2ac13Srjs 		dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR);
355abc2ac13Srjs 		dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKVECTOR, feature, 1);
356abc2ac13Srjs 	}
357abc2ac13Srjs 
358abc2ac13Srjs 	/* untimeout any active timer */
359abc2ac13Srjs 	if (cb->rto_timer_callout) {
360abc2ac13Srjs 		LOSS_DEBUG((LOG_INFO, "Untimeout RTO Timer\n"));
361abc2ac13Srjs 		callout_stop(&cb->rto_timer);
362abc2ac13Srjs 		cb->rto_timer_callout = 0;
363abc2ac13Srjs 	}
364abc2ac13Srjs 
365abc2ac13Srjs 	if (!cb->sample_rtt) {
366abc2ac13Srjs 		struct timeval stamp;
367abc2ac13Srjs 		microtime(&stamp);
368abc2ac13Srjs 		cb->timestamp = ((stamp.tv_sec & 0x00000FFF) * 1000000) + stamp.tv_usec;
369abc2ac13Srjs 		dccp_add_option(cb->pcb, DCCP_OPT_TIMESTAMP, (char*) &(cb->timestamp), 4);
370abc2ac13Srjs 		/*LOSS_DEBUG((LOG_INFO, "Adding timestamp %u\n", cb->timestamp));*/
371abc2ac13Srjs 		cb->sample_rtt = 1;
372abc2ac13Srjs 	}
373abc2ac13Srjs 
374abc2ac13Srjs 	mutex_exit(&(cb->mutex));
375abc2ac13Srjs 	return 1;
376abc2ac13Srjs 
377abc2ac13Srjs }
378abc2ac13Srjs 
379abc2ac13Srjs /*
380abc2ac13Srjs  * Notify sender that a packet has been sent
381abc2ac13Srjs  * args: ccb - ccb block for current connection
382abc2ac13Srjs  *	 moreToSend - if there exists more packets to send
383abc2ac13Srjs  */
384abc2ac13Srjs void
tcplike_send_packet_sent(void * ccb,int moreToSend,long datasize)385abc2ac13Srjs tcplike_send_packet_sent(void *ccb, int moreToSend, long datasize)
386abc2ac13Srjs {
387abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
388abc2ac13Srjs 
389abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_packet_sent(,%i,%i)\n",moreToSend,(int) datasize));
390abc2ac13Srjs 
391abc2ac13Srjs 	if (datasize == 0) {
392abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Sent pure ACK. Dont care about cwnd-storing\n"));
393abc2ac13Srjs 		return;
394abc2ac13Srjs 	}
395abc2ac13Srjs 
396abc2ac13Srjs 	mutex_enter(&(cb->mutex));
397abc2ac13Srjs 
398abc2ac13Srjs 	cb->outstanding++;
399abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "SENT. cwnd: %d, outstanding: %d\n",cb->cwnd, cb->outstanding));
400abc2ac13Srjs 
401abc2ac13Srjs 	/* stash the seqnr in cwndvector */
402abc2ac13Srjs 	/* Dont do this if we're only sending an ACK ! */
403abc2ac13Srjs 	_add_to_cwndvector(cb, cb->pcb->seq_snd);
404abc2ac13Srjs 	CWND_DEBUG((LOG_INFO, "Sent. CWND value: %u , OUTSTANDING value: %u\n",cb->cwnd, cb->outstanding));
405abc2ac13Srjs 
406abc2ac13Srjs 	dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
407abc2ac13Srjs 	mutex_exit(&(cb->mutex));
408abc2ac13Srjs }
409abc2ac13Srjs 
410abc2ac13Srjs /*
411abc2ac13Srjs  * Notify that an ack package was received
412abc2ac13Srjs  * args: ccb  -  ccb block for current connection
413abc2ac13Srjs  */
414abc2ac13Srjs void
tcplike_send_packet_recv(void * ccb,char * options,int optlen)415abc2ac13Srjs tcplike_send_packet_recv(void *ccb, char *options, int optlen)
416abc2ac13Srjs {
417abc2ac13Srjs 	dccp_seq acknum, lastok;
418abc2ac13Srjs 	u_int16_t numlostpackets, avsize, i, prev_size;
419abc2ac13Srjs 	u_int8_t length, state, numokpackets, ackratiocnt;
420abc2ac13Srjs 	u_char av[10];
421abc2ac13Srjs 	struct tcplike_send_ccb *cb = (struct tcplike_send_ccb *) ccb;
422abc2ac13Srjs 
423abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_send_ack_recv()\n"));
424abc2ac13Srjs 	mutex_enter(&(cb->mutex));
425abc2ac13Srjs 
426abc2ac13Srjs 	if (dccp_get_option(options, optlen, DCCP_OPT_TIMESTAMP_ECHO, av,10) > 0) {
427abc2ac13Srjs 		u_int32_t echo, elapsed;
428abc2ac13Srjs 
429abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Received TIMESTAMP ECHO\n"));
430abc2ac13Srjs 		bcopy(av, &echo, 4);
431abc2ac13Srjs 		bcopy(av + 4, &elapsed, 4);
432abc2ac13Srjs 
433abc2ac13Srjs 		if (echo == cb->timestamp) {
434abc2ac13Srjs 			struct timeval time;
435abc2ac13Srjs 			u_int32_t c_stamp;
436abc2ac13Srjs 			u_int16_t diff;
437abc2ac13Srjs 
438abc2ac13Srjs 			microtime(&time);
439abc2ac13Srjs 			c_stamp = ((time.tv_sec & 0x00000FFF) * 1000000) + time.tv_usec;
440abc2ac13Srjs 
441abc2ac13Srjs 			diff = (u_int16_t) c_stamp - cb->timestamp - elapsed;
442abc2ac13Srjs 			diff = (u_int16_t)(diff / 1000);
443abc2ac13Srjs 			TCPLIKE_DEBUG((LOG_INFO, "Got Timestamp Echo; Echo = %u, Elapsed = %u. DIFF = %u\n",
444abc2ac13Srjs 				       echo, elapsed, diff));
445abc2ac13Srjs 			tcplike_rtt_sample(cb, diff);
446abc2ac13Srjs 		}
447abc2ac13Srjs 	}
448abc2ac13Srjs 
449abc2ac13Srjs 	if (cb->pcb->ack_rcv == 0) {
450abc2ac13Srjs 		/* There was no Ack. There is no spoon */
451abc2ac13Srjs 
452abc2ac13Srjs 		/* We'll clear the missingacks data here, since the other host
453abc2ac13Srjs 		 * is also sending data.
454abc2ac13Srjs 		 * I guess we could deal with this, using the NDP field in the
455abc2ac13Srjs 		 * header. Let's stick a *TODO* mark here for now.
456abc2ac13Srjs 		 * The missingacks mechanism will activate if other host goes to
457abc2ac13Srjs 		 * only sending DCCP-Ack packets.
458abc2ac13Srjs 		 */
459abc2ac13Srjs 		cb->ack_last = 0;
460abc2ac13Srjs 		cb->ack_miss = 0;
461abc2ac13Srjs 		ACKRATIO_DEBUG((LOG_INFO, "Clear Missing Acks state!\n"));
462abc2ac13Srjs 		mutex_exit(&(cb->mutex));
463abc2ac13Srjs 		return;
464abc2ac13Srjs 	}
465abc2ac13Srjs 
466abc2ac13Srjs 	cb->sample_rtt = 0;
467abc2ac13Srjs 
468abc2ac13Srjs 	/* check ackVector for lost packets. cmp with cv_list */
469abc2ac13Srjs 	avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av,10);
470abc2ac13Srjs 	if (avsize == 0)
471abc2ac13Srjs 		avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av,10);
472abc2ac13Srjs 
473abc2ac13Srjs 	if (avsize > 0)
474abc2ac13Srjs 		dccpstat.tcplikes_send_ackrecv++;
475abc2ac13Srjs 
476abc2ac13Srjs 	acknum = cb->pcb->ack_rcv;
477abc2ac13Srjs 	numlostpackets = 0;
478abc2ac13Srjs 	numokpackets = 0;
479abc2ac13Srjs 	lastok = 0;
480abc2ac13Srjs 	prev_size = _cwndvector_size(cb);
481abc2ac13Srjs 
482abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Start removing from cwndvector %d\n", avsize));
483abc2ac13Srjs 	if (avsize == 0)
484abc2ac13Srjs 		_remove_from_cwndvector(cb, acknum);
485abc2ac13Srjs 
486abc2ac13Srjs 	for (i=0; i < avsize; i++) {
487abc2ac13Srjs 		state = (av[i] & 0xc0) >> 6;
488abc2ac13Srjs 		length = (av[i] & 0x3f) +1;
489abc2ac13Srjs 		while (length > 0) {
490abc2ac13Srjs 			if (state == 0) {
491abc2ac13Srjs 				CWND_DEBUG((LOG_INFO, "Packet %llu was OK\n", acknum));
492abc2ac13Srjs 				numokpackets++;
493abc2ac13Srjs 				lastok = acknum;
494abc2ac13Srjs 				_remove_from_cwndvector(cb, acknum);
495abc2ac13Srjs 			} else {
496abc2ac13Srjs 				if (acknum > cb->oldcwnd_ts) {
497abc2ac13Srjs 					LOSS_DEBUG((LOG_INFO, "Packet %llu was lost %llu state %d\n", acknum, cb->oldcwnd_ts, state));
498abc2ac13Srjs 					numlostpackets++;
499abc2ac13Srjs 					dccpstat.tcplikes_send_reploss++;
500abc2ac13Srjs 				}
501abc2ac13Srjs 			}
502abc2ac13Srjs 			acknum--;
503abc2ac13Srjs 			length--;
504abc2ac13Srjs 		}
505abc2ac13Srjs 	}
506abc2ac13Srjs 	if (lastok)
507abc2ac13Srjs 		if (_chop_cwndvector(cb, lastok-TCPLIKE_NUMDUPACK)) {
508abc2ac13Srjs 			LOSS_DEBUG((LOG_INFO, "Packets were lost\n"));
509abc2ac13Srjs 			if (lastok-TCPLIKE_NUMDUPACK > cb->oldcwnd_ts) {
510abc2ac13Srjs 				numlostpackets++;
511abc2ac13Srjs 				dccpstat.tcplikes_send_assloss++;
512abc2ac13Srjs 			}
513abc2ac13Srjs 		}
514abc2ac13Srjs 
515abc2ac13Srjs 	lastok = cb->cv_hs;
516abc2ac13Srjs 	while (_cwndvector_state(cb, lastok) == 0x00 && lastok < cb->cv_ts)
517abc2ac13Srjs 		lastok++;
518abc2ac13Srjs 	if (lastok != cb->cv_hs)
519abc2ac13Srjs 		_chop_cwndvector(cb, lastok);
520abc2ac13Srjs 
521abc2ac13Srjs 	cb->outstanding = _cwndvector_size(cb);
522abc2ac13Srjs 	CWND_DEBUG((LOG_INFO, "Decrease outstanding. was = %u , now = %u\n", prev_size, cb->outstanding));
523abc2ac13Srjs 	if (prev_size == cb->outstanding) {
524abc2ac13Srjs 		/* Nothing dropped from cwndvector  */
525abc2ac13Srjs 		mutex_exit(&(cb->mutex));
526abc2ac13Srjs 		return;
527abc2ac13Srjs 	}
528abc2ac13Srjs 
529abc2ac13Srjs 	cb->acked_in_win += numokpackets;
530abc2ac13Srjs 
531abc2ac13Srjs 	if (cb->cwnd < cb->ssthresh) {
532abc2ac13Srjs 		/* Slow start */
533abc2ac13Srjs 
534abc2ac13Srjs 		if (numlostpackets > 0) {
535abc2ac13Srjs 			/* Packet loss */
536abc2ac13Srjs 			LOSS_DEBUG((LOG_INFO, "Packet Loss in Slow Start\n"));
537abc2ac13Srjs 			cb->cwnd = cb->cwnd>>1;
538abc2ac13Srjs 			if (cb->cwnd < 1)
539abc2ac13Srjs 				cb->cwnd = 1;
540abc2ac13Srjs 			cb->ssthresh = cb->cwnd;
541abc2ac13Srjs 			cb->acked_in_win = 0;
542abc2ac13Srjs 			cb->acked_windows = 0;
543abc2ac13Srjs 			cb->oldcwnd_ts = cb->pcb->seq_snd;
544abc2ac13Srjs 
545abc2ac13Srjs 		} else {
546abc2ac13Srjs 			cb->cwnd++;
547abc2ac13Srjs 		}
548abc2ac13Srjs 
549abc2ac13Srjs 	} else if (cb->cwnd >= cb->ssthresh) {
550abc2ac13Srjs 
551abc2ac13Srjs 		if (numlostpackets > 0) {
552abc2ac13Srjs 			/* Packet loss */
553abc2ac13Srjs 			LOSS_DEBUG((LOG_INFO, "Packet Loss in action\n"));
554abc2ac13Srjs 			cb->cwnd = cb->cwnd>>1;
555abc2ac13Srjs 			if (cb->cwnd < 1)
556abc2ac13Srjs 				cb->cwnd = 1;
557abc2ac13Srjs 			cb->ssthresh = cb->cwnd;
558abc2ac13Srjs 			cb->acked_in_win = 0;
559abc2ac13Srjs 			cb->acked_windows = 0;
560abc2ac13Srjs 			cb->oldcwnd_ts = cb->pcb->seq_snd;
561abc2ac13Srjs 
562abc2ac13Srjs 		} else if (cb->acked_in_win > cb->cwnd) {
563abc2ac13Srjs 			cb->cwnd++;
564abc2ac13Srjs 		}
565abc2ac13Srjs 	}
566abc2ac13Srjs 
567abc2ac13Srjs 	/* Ok let's check if there are missing Ack packets */
568abc2ac13Srjs 	ACKRATIO_DEBUG((LOG_INFO, "Check Ack. seq_rcv: %u ,ack_last: %u ,ack_miss: %u\n",
569abc2ac13Srjs 			cb->pcb->seq_rcv, cb->ack_last, cb->ack_miss));
570abc2ac13Srjs 
571abc2ac13Srjs 	if (cb->ack_last == 0) {
572abc2ac13Srjs 		/* First received ack (or first after Data packet). Yey */
573abc2ac13Srjs 		cb->ack_last = cb->pcb->seq_rcv;
574abc2ac13Srjs 		cb->ack_miss = 0;
575abc2ac13Srjs 	} else if (cb->pcb->seq_rcv == (cb->ack_last + 1)) {
576abc2ac13Srjs 		/* This is correct, non-congestion, in-order behaviour */
577abc2ac13Srjs 		cb->ack_last = cb->pcb->seq_rcv;
578abc2ac13Srjs 
579abc2ac13Srjs 	} else if (cb->pcb->seq_rcv < (cb->ack_last + 1)) {
580abc2ac13Srjs 		/* Might be an Ack we've been missing */
581abc2ac13Srjs 		/* This code has a flaw; If we miss 2 Ack packets, we only care
582abc2ac13Srjs 		 * about the older one. This means that the next-to-oldest one could
583*2d52435aSandvar 		 * be lost without any action being taken.
584abc2ac13Srjs 		 * Time will tell if that is going to be a Giant Problem(r)
585abc2ac13Srjs 		 */
586abc2ac13Srjs 		if (cb->pcb->seq_rcv == cb->ack_miss) {
587abc2ac13Srjs 			/* Yea it was. great */
588abc2ac13Srjs 			cb->ack_miss = 0;
589abc2ac13Srjs 		}
590abc2ac13Srjs 
591abc2ac13Srjs 	} else if (cb->pcb->seq_rcv > (cb->ack_last + 1)) {
592abc2ac13Srjs 		/* There is a jump in Ack seqnums.. */
593abc2ac13Srjs 		cb->ack_miss = cb->ack_last + 1;
594abc2ac13Srjs 		cb->ack_last = cb->pcb->seq_rcv;
595abc2ac13Srjs 	}
596abc2ac13Srjs 
597abc2ac13Srjs 	if (cb->ack_miss && ((cb->ack_miss + TCPLIKE_NUMDUPACK) < cb->ack_last)) {
598abc2ac13Srjs 		/* Alert! Alert! Ack packets are MIA.
599abc2ac13Srjs 		 * Decrease Ack Ratio
600abc2ac13Srjs 		 */
601abc2ac13Srjs 		cb->rcvr_ackratio = cb->rcvr_ackratio<<1;
602abc2ac13Srjs 		if (cb->rcvr_ackratio > (cb->cwnd>>1)) {
603abc2ac13Srjs 			/* Constraint 2 */
604abc2ac13Srjs 			cb->rcvr_ackratio = cb->cwnd>>1;
605abc2ac13Srjs 		}
606abc2ac13Srjs 		if (cb->rcvr_ackratio == 0)
607abc2ac13Srjs 			cb->rcvr_ackratio = 1;
608abc2ac13Srjs 		ACKRATIO_DEBUG((LOG_INFO, "Increase Ack Ratio. Now = %u. (cwnd = %u)\n", cb->rcvr_ackratio, cb->cwnd));
609abc2ac13Srjs 		dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
610abc2ac13Srjs 		dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
611abc2ac13Srjs 				 (char *) &cb->rcvr_ackratio, 1);
612abc2ac13Srjs 
613abc2ac13Srjs 		cb->ack_miss = 0;
614abc2ac13Srjs 		cb->acked_windows = 0;
615abc2ac13Srjs 		cb->acked_in_win = 0;
616abc2ac13Srjs 		dccpstat.tcplikes_send_missack++;
617abc2ac13Srjs 
618abc2ac13Srjs 	} else if (cb->acked_in_win > cb->cwnd) {
619abc2ac13Srjs 		cb->acked_in_win = 0;
620abc2ac13Srjs 		cb->acked_windows++;
621abc2ac13Srjs 		if (cb->rcvr_ackratio == 1) {
622abc2ac13Srjs 			/* Ack Ratio is 1. We cant decrease it more.. Lets wait for some
623abc2ac13Srjs 			 * heavy congestion so we can increase it
624abc2ac13Srjs 			 */
625abc2ac13Srjs 			cb->acked_windows = 0;
626abc2ac13Srjs 		}
627abc2ac13Srjs 	}
628abc2ac13Srjs 
629abc2ac13Srjs 	if (cb->acked_windows >= 1) {
630abc2ac13Srjs 		ackratiocnt = (cb->cwnd / ((cb->rcvr_ackratio*cb->rcvr_ackratio) - cb->rcvr_ackratio));
631abc2ac13Srjs 		if (cb->acked_windows >= ackratiocnt) {
632abc2ac13Srjs 			if (cb->rcvr_ackratio > 2 && cb->cwnd >= 4) {
633abc2ac13Srjs 				/* Constraint 3 - AckRatio at least 2 for a cwnd >= 4 */
634abc2ac13Srjs 				cb->rcvr_ackratio--;
635abc2ac13Srjs 				ACKRATIO_DEBUG((LOG_INFO, "Decrease ackratio by 1, now: %u\n", cb->rcvr_ackratio));
636abc2ac13Srjs 				dccp_remove_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO);
637abc2ac13Srjs 				dccp_add_feature(cb->pcb, DCCP_OPT_CHANGE_R, DCCP_FEATURE_ACKRATIO,
638abc2ac13Srjs 						 (char *) &cb->rcvr_ackratio, 1);
639abc2ac13Srjs 			}
640abc2ac13Srjs 			cb->acked_in_win = 0;
641abc2ac13Srjs 			cb->acked_windows = 0;
642abc2ac13Srjs 		}
643abc2ac13Srjs 	}
644abc2ac13Srjs 
645abc2ac13Srjs 	CWND_DEBUG((LOG_INFO, "Recvd. CWND value: %u , OUTSTANDING value: %u\n",
646abc2ac13Srjs 		    cb->cwnd, cb->outstanding));
647abc2ac13Srjs 
648abc2ac13Srjs 	if (cb->cwnd > cb->outstanding && cb->rto_timer_callout) {
649abc2ac13Srjs                 LOSS_DEBUG((LOG_INFO, "Force DCCP_OUTPUT, CWND = %u Outstanding = %u\n",
650abc2ac13Srjs                             cb->cwnd, cb->outstanding));
651abc2ac13Srjs 		callout_stop(&cb->rto_timer);
652abc2ac13Srjs 		cb->rto_timer_callout = 0;
653abc2ac13Srjs 
654abc2ac13Srjs 		mutex_exit(&(cb->mutex));
655abc2ac13Srjs                 dccp_output(cb->pcb, 1);
656abc2ac13Srjs 		return;
657abc2ac13Srjs         }
658abc2ac13Srjs 	mutex_exit(&(cb->mutex));
659abc2ac13Srjs }
660abc2ac13Srjs 
661abc2ac13Srjs int
_cwndvector_size(struct tcplike_send_ccb * cb)662abc2ac13Srjs _cwndvector_size(struct tcplike_send_ccb *cb)
663abc2ac13Srjs {
664abc2ac13Srjs 	u_int64_t gap, offset, seqnr;
665abc2ac13Srjs 	u_int32_t cnt;
666abc2ac13Srjs 	u_char *t;
667abc2ac13Srjs 
668abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Enter cwndvector_size\n"));
669abc2ac13Srjs 	cnt = 0;
670abc2ac13Srjs 	for (seqnr = cb->cv_hs; seqnr < cb->cv_ts; seqnr++) {
671abc2ac13Srjs 		gap = seqnr - cb->cv_hs;
672abc2ac13Srjs 
673abc2ac13Srjs 		offset = gap % 8;
674abc2ac13Srjs 		t = cb->cv_hp + (gap/8);
675abc2ac13Srjs 		if (t >= (cb->cwndvector + (cb->cv_size/8)))
676abc2ac13Srjs 			t -= (cb->cv_size / 8); /* wrapped */
677abc2ac13Srjs 
678abc2ac13Srjs 		if (((*t & (0x01 << offset)) >> offset) == 0x01)
679abc2ac13Srjs 			cnt++;
680abc2ac13Srjs 	}
681abc2ac13Srjs 	return cnt;
682abc2ac13Srjs }
683abc2ac13Srjs 
684abc2ac13Srjs u_char
_cwndvector_state(struct tcplike_send_ccb * cb,u_int64_t seqnr)685abc2ac13Srjs _cwndvector_state(struct tcplike_send_ccb *cb, u_int64_t seqnr)
686abc2ac13Srjs {
687abc2ac13Srjs 	u_int64_t gap, offset;
688abc2ac13Srjs 	u_char *t;
689abc2ac13Srjs 
690abc2ac13Srjs 	/* Check for wrapping */
691abc2ac13Srjs 	if (seqnr >= cb->cv_hs) {
692abc2ac13Srjs 		/* Not wrapped */
693abc2ac13Srjs 		gap = seqnr - cb->cv_hs;
694abc2ac13Srjs 	} else {
695abc2ac13Srjs 		/* Wrapped XXXXX */
696abc2ac13Srjs 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
697abc2ac13Srjs 	}
698abc2ac13Srjs 
699abc2ac13Srjs 	if (gap >= cb->cv_size) {
700abc2ac13Srjs 		/* gap is bigger than cwndvector size? baaad */
701abc2ac13Srjs 		return 0x01;
702abc2ac13Srjs 	}
703abc2ac13Srjs 
704abc2ac13Srjs 	offset = gap % 8;
705abc2ac13Srjs 	t = cb->cv_hp + (gap/8);
706abc2ac13Srjs 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
707abc2ac13Srjs 		t -= (cb->cv_size / 8); /* wrapped */
708abc2ac13Srjs 
709abc2ac13Srjs 	return ((*t & (0x01 << offset)) >> offset);
710abc2ac13Srjs }
711abc2ac13Srjs 
712abc2ac13Srjs void
_add_to_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)713abc2ac13Srjs _add_to_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
714abc2ac13Srjs {
715abc2ac13Srjs 	u_int64_t offset, dc, gap;
716abc2ac13Srjs 	u_char *t, *n;
717abc2ac13Srjs 
718abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering add_to_cwndvector\n"));
719abc2ac13Srjs 
720abc2ac13Srjs 	if (cb->cv_hs == cb->cv_ts) {
721abc2ac13Srjs 		/* Empty cwndvector */
722abc2ac13Srjs 		cb->cv_hs = cb->cv_ts = seqnr;
723abc2ac13Srjs 	}
724abc2ac13Srjs 
725abc2ac13Srjs 	/* Check for wrapping */
726abc2ac13Srjs 	if (seqnr >= cb->cv_hs) {
727abc2ac13Srjs 		/* Not wrapped */
728abc2ac13Srjs 		gap = seqnr - cb->cv_hs;
729abc2ac13Srjs 	} else {
730abc2ac13Srjs 		/* Wrapped */
731abc2ac13Srjs 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
732abc2ac13Srjs 	}
733abc2ac13Srjs 
734abc2ac13Srjs 	if (gap >= cb->cv_size) {
735abc2ac13Srjs 		/* gap is bigger than cwndvector size? baaad */
736abc2ac13Srjs 		/* maybe we should increase the cwndvector here */
737abc2ac13Srjs 		CWND_DEBUG((LOG_INFO, "add cwndvector error. gap: %d, cv_size: %d, seqnr: %d\n",
738abc2ac13Srjs 			    gap, cb->cv_size, seqnr));
739abc2ac13Srjs 		dccpstat.tcplikes_send_badseq++;
740abc2ac13Srjs 		return;
741abc2ac13Srjs 	}
742abc2ac13Srjs 
743abc2ac13Srjs 	offset = gap % 8; /* bit to mark */
744abc2ac13Srjs 	t = cb->cv_hp + (gap/8);
745abc2ac13Srjs 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
746abc2ac13Srjs 		t -= (cb->cv_size / 8); /* cwndvector wrapped */
747abc2ac13Srjs 
748abc2ac13Srjs 	*t = *t | (0x01 << offset); /* turn on bit */
749abc2ac13Srjs 
750abc2ac13Srjs 	cb->cv_ts = seqnr+1;
751abc2ac13Srjs 	if (cb->cv_ts == 0x1000000000000LL)
752abc2ac13Srjs 		cb->cv_ts = 0;
753abc2ac13Srjs 
754abc2ac13Srjs 	if (gap > (cb->cv_size - 128)) {
755abc2ac13Srjs 		MALLOC_DEBUG((LOG_INFO, "INCREASE cwndVECTOR\n"));
756abc2ac13Srjs 		n = malloc(cb->cv_size/4, M_PCB, M_NOWAIT); /* old size * 2 */
757abc2ac13Srjs 		if (n == NULL) {
758abc2ac13Srjs 			MALLOC_DEBUG((LOG_INFO, "Increase cwndvector FAILED\n"));
759abc2ac13Srjs 			dccpstat.tcplikes_send_memerr++;
760abc2ac13Srjs 			return;
761abc2ac13Srjs 		}
762abc2ac13Srjs 		memset (n+cb->cv_size/8,0x00,cb->cv_size/8); /* new half all missing */
763abc2ac13Srjs 		dc = (cb->cwndvector + (cb->cv_size/8)) - cb->cv_hp;
764abc2ac13Srjs 		memcpy (n,cb->cv_hp, dc); /* tail to end */
765abc2ac13Srjs 		memcpy (n+dc,cb->cwndvector,cb->cv_hp - cb->cwndvector); /* start to tail */
766abc2ac13Srjs 		cb->cv_size = cb->cv_size * 2; /* counted in items, so it';s a doubling */
767abc2ac13Srjs 		free (cb->cwndvector, M_PCB);
768abc2ac13Srjs 		cb->cv_hp = cb->cwndvector = n;
769abc2ac13Srjs 	}
770abc2ac13Srjs }
771abc2ac13Srjs 
772abc2ac13Srjs void
_remove_from_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)773abc2ac13Srjs _remove_from_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
774abc2ac13Srjs {
775abc2ac13Srjs 	u_int64_t offset;
776abc2ac13Srjs 	int64_t gap;
777abc2ac13Srjs 	u_char *t;
778abc2ac13Srjs 
779abc2ac13Srjs 	DCCP_DEBUG((LOG_INFO, "Entering remove_from_cwndvector\n"));
780abc2ac13Srjs 
781abc2ac13Srjs 	if (cb->cv_hs == cb->cv_ts) {
782abc2ac13Srjs 		/* Empty cwndvector */
783abc2ac13Srjs 		return;
784abc2ac13Srjs 	}
785abc2ac13Srjs 
786abc2ac13Srjs 	/* Check for wrapping */
787abc2ac13Srjs 	if (seqnr >= cb->cv_hs) {
788abc2ac13Srjs 		/* Not wrapped */
789abc2ac13Srjs 		gap = seqnr - cb->cv_hs;
790abc2ac13Srjs 	} else {
791abc2ac13Srjs 		/* Wrapped */
792abc2ac13Srjs 		gap = seqnr + 0x1000000000000LL - cb->cv_hs; /* seq nr = 48 bits */
793abc2ac13Srjs 	}
794abc2ac13Srjs 
795abc2ac13Srjs 	if (gap >= cb->cv_size) {
796abc2ac13Srjs 		/* gap is bigger than cwndvector size. has already been chopped */
797abc2ac13Srjs 		return;
798abc2ac13Srjs 	}
799abc2ac13Srjs 
800abc2ac13Srjs 	offset = gap % 8; /* hi or low 2 bits to mark */
801abc2ac13Srjs 	t = cb->cv_hp + (gap/8);
802abc2ac13Srjs 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
803abc2ac13Srjs 		t -= (cb->cv_size / 8); /* cwndvector wrapped */
804abc2ac13Srjs 
805abc2ac13Srjs 	*t = *t & (~(0x01 << offset)); /* turn off bits */
806abc2ac13Srjs }
807abc2ac13Srjs 
808abc2ac13Srjs int
_chop_cwndvector(struct tcplike_send_ccb * cb,u_int64_t seqnr)809abc2ac13Srjs _chop_cwndvector(struct tcplike_send_ccb *cb, u_int64_t seqnr)
810abc2ac13Srjs {
811abc2ac13Srjs 	int64_t gap, bytegap;
812abc2ac13Srjs 	u_char *t;
813abc2ac13Srjs 
814abc2ac13Srjs 	CWND_DEBUG((LOG_INFO,"Chop cwndvector at: %u\n", seqnr));
815abc2ac13Srjs 
816abc2ac13Srjs 	if (cb->cv_hs == cb->cv_ts)
817abc2ac13Srjs 		return 0;
818abc2ac13Srjs 
819abc2ac13Srjs 	if (seqnr > cb->cv_hs) {
820abc2ac13Srjs 		gap = seqnr - cb->cv_hs;
821abc2ac13Srjs 	} else {
822abc2ac13Srjs 		/* We received obsolete information */
823abc2ac13Srjs 		return 0;
824abc2ac13Srjs 	}
825abc2ac13Srjs 
826abc2ac13Srjs 	bytegap = gap/8;
827abc2ac13Srjs 	if (bytegap == 0)
828abc2ac13Srjs 		return 0;
829abc2ac13Srjs 
830abc2ac13Srjs 	t = cb->cv_hp + bytegap;
831abc2ac13Srjs 	if (t >= (cb->cwndvector + (cb->cv_size/8)))
832abc2ac13Srjs 		t -= (cb->cv_size / 8); /* ackvector wrapped */
833abc2ac13Srjs 	cb->cv_hp = t;
834abc2ac13Srjs 	cb->cv_hs += bytegap*8;
835abc2ac13Srjs 	return 1;
836abc2ac13Srjs }
837abc2ac13Srjs 
838abc2ac13Srjs 
839abc2ac13Srjs /* Receiver side */
840abc2ac13Srjs 
841abc2ac13Srjs 
842abc2ac13Srjs /* Functions declared in struct dccp_cc_sw */
843abc2ac13Srjs 
844abc2ac13Srjs /* Initialises the receiver side
845abc2ac13Srjs  * returns: pointer to a tcplike_recv_ccb struct on success, otherwise 0
846abc2ac13Srjs  */
847abc2ac13Srjs void *
tcplike_recv_init(struct dccpcb * pcb)848abc2ac13Srjs tcplike_recv_init(struct dccpcb *pcb)
849abc2ac13Srjs {
850abc2ac13Srjs 	struct tcplike_recv_ccb *ccb;
851abc2ac13Srjs 
852abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_init()\n"));
853abc2ac13Srjs 
854abc2ac13Srjs 	ccb = malloc(sizeof (struct tcplike_recv_ccb), M_PCB, M_NOWAIT | M_ZERO);
855abc2ac13Srjs 	if (ccb == 0) {
856abc2ac13Srjs 		TCPLIKE_DEBUG((LOG_INFO, "Unable to allocate memory for tcplike_recv_ccb!\n"));
857abc2ac13Srjs 		dccpstat.tcplikes_recv_memerr++;
858abc2ac13Srjs 		return 0;
859abc2ac13Srjs 	}
860abc2ac13Srjs 
861abc2ac13Srjs 	memset(ccb, 0, sizeof (struct tcplike_recv_ccb));
862abc2ac13Srjs 
863abc2ac13Srjs 	ccb->pcb = pcb;
864abc2ac13Srjs 	ccb->unacked = 0;
865abc2ac13Srjs 	ccb->pcb->ack_ratio = 2;
866abc2ac13Srjs 
867abc2ac13Srjs 	ccb->pcb->remote_ackvector = 1;
868abc2ac13Srjs 	dccp_use_ackvector(ccb->pcb);
869abc2ac13Srjs 
870abc2ac13Srjs 	callout_init(&ccb->free_timer, 0);
871abc2ac13Srjs 
872abc2ac13Srjs 	mutex_init(&(ccb->mutex), MUTEX_DEFAULT, IPL_SOFTNET);
873abc2ac13Srjs 
874abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "TCPlike receiver initialised!\n"));
875abc2ac13Srjs 	dccpstat.tcplikes_recv_conn++;
876abc2ac13Srjs 	return ccb;
877abc2ac13Srjs }
878abc2ac13Srjs 
tcplike_recv_term(void * ccb)879abc2ac13Srjs void tcplike_recv_term(void *ccb)
880abc2ac13Srjs {
881abc2ac13Srjs 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
882abc2ac13Srjs 	if (ccb == 0)
883abc2ac13Srjs 		return;
884abc2ac13Srjs 
885abc2ac13Srjs 	mutex_destroy(&(cb->mutex));
886abc2ac13Srjs 	free(cb, M_PCB);
887abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "TCP-like receiver is destroyed\n"));
888abc2ac13Srjs }
889abc2ac13Srjs 
890abc2ac13Srjs /* Free the receiver side
89109ae0501Smsaitoh  * args: ccb - ccb of receiver
892abc2ac13Srjs  */
893abc2ac13Srjs void
tcplike_recv_free(void * ccb)894abc2ac13Srjs tcplike_recv_free(void *ccb)
895abc2ac13Srjs {
896abc2ac13Srjs 	struct ack_list *a;
897abc2ac13Srjs 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
898abc2ac13Srjs 
899abc2ac13Srjs 	LOSS_DEBUG((LOG_INFO, "Entering tcplike_recv_free()\n"));
900abc2ac13Srjs 
901abc2ac13Srjs 	if (ccb == 0)
902abc2ac13Srjs 		return;
903abc2ac13Srjs 
904abc2ac13Srjs 	mutex_enter(&(cb->mutex));
905abc2ac13Srjs 
906abc2ac13Srjs 	a = cb->av_list;
907abc2ac13Srjs 	while (a) {
908abc2ac13Srjs 		cb->av_list = a->next;
909abc2ac13Srjs 		free(a, M_TEMP);
910abc2ac13Srjs 		a = cb->av_list;
911abc2ac13Srjs 	}
912abc2ac13Srjs 
913abc2ac13Srjs 	cb->pcb->av_size = 0;
914abc2ac13Srjs 	free(cb->pcb->ackvector, M_PCB);
915abc2ac13Srjs 
916abc2ac13Srjs 	mutex_exit(&(cb->mutex));
917abc2ac13Srjs 	callout_reset(&cb->free_timer, 10 * hz, tcplike_recv_term, (void *)cb);
918abc2ac13Srjs }
919abc2ac13Srjs 
920abc2ac13Srjs /*
921abc2ac13Srjs  * Tell TCPlike that a packet has been received
922abc2ac13Srjs  * args: ccb  -  ccb block for current connection
923abc2ac13Srjs  */
924abc2ac13Srjs void
tcplike_recv_packet_recv(void * ccb,char * options,int optlen)925abc2ac13Srjs tcplike_recv_packet_recv(void *ccb, char *options, int optlen)
926abc2ac13Srjs {
927abc2ac13Srjs 	struct tcplike_recv_ccb *cb = (struct tcplike_recv_ccb *) ccb;
928abc2ac13Srjs 	u_char ackvector[16];
929abc2ac13Srjs 	u_int16_t avsize;
930abc2ac13Srjs 	u_char av_rcv[10];
931abc2ac13Srjs 
932abc2ac13Srjs 	TCPLIKE_DEBUG((LOG_INFO, "Entering tcplike_recv_packet()\n"));
933abc2ac13Srjs 
934abc2ac13Srjs 	mutex_enter(&(cb->mutex));
935abc2ac13Srjs 
936abc2ac13Srjs 	if (cb->pcb->type_rcv == DCCP_TYPE_DATA ||
937abc2ac13Srjs 	    cb->pcb->type_rcv == DCCP_TYPE_DATAACK)
938abc2ac13Srjs 		dccpstat.tcplikes_recv_datarecv++;
939abc2ac13Srjs 
940abc2ac13Srjs 	/* Grab Ack Vector 0 or 1 */
941abc2ac13Srjs 	avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR0, av_rcv,10);
942abc2ac13Srjs 	if (avsize == 0)
943abc2ac13Srjs 		avsize = dccp_get_option(options, optlen, DCCP_OPT_ACK_VECTOR1, av_rcv,10);
944abc2ac13Srjs 
945abc2ac13Srjs 	/* We are only interested in acks-on-acks here.
946abc2ac13Srjs 	 * The "real" ack handling is done be the sender */
947abc2ac13Srjs 	if (avsize == 0 && cb->pcb->ack_rcv) {
948abc2ac13Srjs 		u_int64_t ackthru;
949abc2ac13Srjs 		/* We got an Ack without an ackvector.
950abc2ac13Srjs 		 * This would mean it's an ack on an ack.
951abc2ac13Srjs 		 */
952abc2ac13Srjs 		ackthru = _avlist_get(cb, cb->pcb->ack_rcv);
953abc2ac13Srjs 		ACK_DEBUG((LOG_INFO, "GOT Ack without Ackvector; Ackthru: %u\n", ackthru));
954abc2ac13Srjs 		if (ackthru) {
955abc2ac13Srjs 			dccp_update_ackvector(cb->pcb, ackthru);
956abc2ac13Srjs 			dccpstat.tcplikes_recv_ackack++;
957abc2ac13Srjs 		}
958abc2ac13Srjs 	} else if (avsize > 0 && cb->pcb->ack_rcv) {
959abc2ac13Srjs 		/* We received an AckVector */
960abc2ac13Srjs 		u_int32_t acknum, ackthru;
961abc2ac13Srjs 		int i;
962abc2ac13Srjs 		ACK_DEBUG((LOG_INFO, "GOT Ack with Ackvector\n"));
963abc2ac13Srjs 		/* gotta loop through the ackvector */
964abc2ac13Srjs 		acknum = cb->pcb->ack_rcv;
965abc2ac13Srjs 		for (i=0; i<avsize; i++) {
966abc2ac13Srjs 			u_int8_t state, len;
967abc2ac13Srjs 			state = (av_rcv[i] & 0xc0) >> 6;
968abc2ac13Srjs 			len = (av_rcv[i] & 0x2f) + 1;
969abc2ac13Srjs 			if (state != 0) {
970abc2ac13Srjs 				/* Drops in ackvector! Will be noted and taken care of by the sender part */
971abc2ac13Srjs 				ACK_DEBUG((LOG_INFO, "Packets %u - %u are FUCKED\n",acknum-len, acknum));
972abc2ac13Srjs 				continue;
973abc2ac13Srjs 			}
974abc2ac13Srjs 
975abc2ac13Srjs 			while (len>0) {
976abc2ac13Srjs 				ackthru = _avlist_get(cb, acknum);
977abc2ac13Srjs 				ACK_DEBUG((LOG_INFO, "Ackthru: %u\n", ackthru));
978abc2ac13Srjs 				if (ackthru) {
979abc2ac13Srjs 					dccp_update_ackvector(cb->pcb, ackthru);
980abc2ac13Srjs 					dccpstat.tcplikes_recv_ackack++;
981abc2ac13Srjs 				}
982abc2ac13Srjs 				acknum--;
983abc2ac13Srjs 				len--;
984abc2ac13Srjs 			}
985abc2ac13Srjs 		}
986abc2ac13Srjs 	}
987abc2ac13Srjs 
988abc2ac13Srjs 	ACK_DEBUG((LOG_INFO, "Adding %llu to local ackvector\n", cb->pcb->seq_rcv));
989abc2ac13Srjs 	dccp_increment_ackvector(cb->pcb, cb->pcb->seq_rcv);
990abc2ac13Srjs 	cb->unacked++;
991abc2ac13Srjs 
992abc2ac13Srjs 	if (cb->unacked >= cb->pcb->ack_ratio) {
993abc2ac13Srjs 		/* Time to send an Ack */
994abc2ac13Srjs 
995abc2ac13Srjs 		avsize = dccp_generate_ackvector(cb->pcb, ackvector);
996abc2ac13Srjs TCPLIKE_DEBUG((LOG_INFO, "recv_packet avsize %d ackvector %d\n", avsize, ackvector));
997abc2ac13Srjs 		cb->unacked = 0;
998abc2ac13Srjs 		if (avsize > 0) {
999abc2ac13Srjs 			dccp_add_option(cb->pcb, DCCP_OPT_ACK_VECTOR0, ackvector, avsize);
1000abc2ac13Srjs 			cb->pcb->ack_snd = cb->pcb->seq_rcv;
1001abc2ac13Srjs 			_avlist_add(cb, cb->pcb->seq_snd+1, cb->pcb->ack_snd);
1002abc2ac13Srjs 			ACK_DEBUG((LOG_INFO, "Recvr: Sending Ack (%llu) w/ Ack Vector\n", cb->pcb->ack_snd));
1003abc2ac13Srjs 			dccpstat.tcplikes_recv_acksent++;
1004abc2ac13Srjs 			dccp_output(cb->pcb, 1);
1005abc2ac13Srjs 		}
1006abc2ac13Srjs 	}
1007abc2ac13Srjs 	mutex_exit(&(cb->mutex));
1008abc2ac13Srjs }
1009abc2ac13Srjs 
1010abc2ac13Srjs void
_avlist_add(struct tcplike_recv_ccb * cb,u_int64_t localseq,u_int64_t ackthru)1011abc2ac13Srjs _avlist_add(struct tcplike_recv_ccb *cb, u_int64_t localseq, u_int64_t ackthru)
1012abc2ac13Srjs {
1013abc2ac13Srjs 	struct ack_list *a;
1014abc2ac13Srjs 	ACK_DEBUG((LOG_INFO,"Adding localseq %u - ackthru %u to avlist\n", localseq, ackthru));
1015abc2ac13Srjs 	/*MALLOC_DEBUG((LOG_INFO, "New ack_list, %u\n", sizeof (struct ack_list)));*/
1016abc2ac13Srjs 	a = malloc(sizeof(struct ack_list), M_TEMP, M_NOWAIT);
1017abc2ac13Srjs 	if (a == NULL) {
1018abc2ac13Srjs 		MALLOC_DEBUG((LOG_INFO, "avlist_add: FAILED\n"));
1019abc2ac13Srjs 		dccpstat.tcplikes_recv_memerr++;
1020abc2ac13Srjs 		return;
1021abc2ac13Srjs 	}
1022abc2ac13Srjs 	memset(a, 0, sizeof(struct ack_list));
1023abc2ac13Srjs 	a->localseq = localseq;
1024abc2ac13Srjs 	a->ackthru = ackthru;
1025abc2ac13Srjs 	a->next = cb->av_list;
1026abc2ac13Srjs 	cb->av_list = a;
1027abc2ac13Srjs }
1028abc2ac13Srjs 
1029abc2ac13Srjs /*
1030abc2ac13Srjs  * Searches the av_list. if 'localseq' found, drop it from list and return
1031abc2ac13Srjs  * ackthru
1032abc2ac13Srjs  */
1033abc2ac13Srjs u_int64_t
_avlist_get(struct tcplike_recv_ccb * cb,u_int64_t localseq)1034abc2ac13Srjs _avlist_get(struct tcplike_recv_ccb *cb, u_int64_t localseq)
1035abc2ac13Srjs {
1036abc2ac13Srjs 	struct ack_list *a, *n, *p;
1037abc2ac13Srjs 	u_int64_t ackthru;
1038abc2ac13Srjs 
1039abc2ac13Srjs 	ACK_DEBUG((LOG_INFO,"Getting localseq %u from avlist\n", localseq));
1040abc2ac13Srjs 	a = cb->av_list;
1041abc2ac13Srjs 	p = 0;
1042abc2ac13Srjs 	while (a) {
1043abc2ac13Srjs 		n = a->next;
1044abc2ac13Srjs 		if (a->localseq == localseq) {
1045abc2ac13Srjs 			if (p)
1046abc2ac13Srjs 				p->next = n;
1047abc2ac13Srjs 			else
1048abc2ac13Srjs 				cb->av_list = n;
1049abc2ac13Srjs 			ackthru = a->ackthru;
1050abc2ac13Srjs 			/*MALLOC_DEBUG((LOG_INFO, "Freeing element %u in ack_list\n", a->localseq));*/
1051abc2ac13Srjs 			free(a, M_TEMP);
1052abc2ac13Srjs 			return ackthru;
1053abc2ac13Srjs 		}
1054abc2ac13Srjs 		p = a;
1055abc2ac13Srjs 		a = n;
1056abc2ac13Srjs 	}
1057abc2ac13Srjs 	/* Not found. return 0 */
1058abc2ac13Srjs 	return 0;
1059abc2ac13Srjs }
1060abc2ac13Srjs 
1061abc2ac13Srjs /*
1062abc2ac13Srjs int tcplike_option_recv(void);
1063abc2ac13Srjs */
1064