xref: /freebsd/sys/netinet/sctp_cc_functions.c (revision e28a4053)
1 /*-
2  * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * a) Redistributions of source code must retain the above copyright notice,
8  *   this list of conditions and the following disclaimer.
9  *
10  * b) Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in
12  *   the documentation and/or other materials provided with the distribution.
13  *
14  * c) Neither the name of Cisco Systems, Inc. nor the names of its
15  *    contributors may be used to endorse or promote products derived
16  *    from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include <netinet/sctp_os.h>
32 #include <netinet/sctp_var.h>
33 #include <netinet/sctp_sysctl.h>
34 #include <netinet/sctp_pcb.h>
35 #include <netinet/sctp_header.h>
36 #include <netinet/sctputil.h>
37 #include <netinet/sctp_output.h>
38 #include <netinet/sctp_input.h>
39 #include <netinet/sctp_indata.h>
40 #include <netinet/sctp_uio.h>
41 #include <netinet/sctp_timer.h>
42 #include <netinet/sctp_auth.h>
43 #include <netinet/sctp_asconf.h>
44 #include <netinet/sctp_cc_functions.h>
45 #include <sys/cdefs.h>
46 __FBSDID("$FreeBSD$");
47 
48 void
49 sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
50 {
51 	struct sctp_association *assoc;
52 	uint32_t cwnd_in_mtu;
53 
54 	assoc = &stcb->asoc;
55 	/*
56 	 * We take the minimum of the burst limit and the initial congestion
57 	 * window. The initial congestion window is at least two times the
58 	 * MTU.
59 	 */
60 	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
61 	if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
62 		cwnd_in_mtu = assoc->max_burst;
63 	net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
64 	net->ssthresh = assoc->peers_rwnd;
65 
66 	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
67 	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
68 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
69 	}
70 }
71 
72 void
73 sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
74     struct sctp_association *asoc)
75 {
76 	struct sctp_nets *net;
77 
78 	/*-
79 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
80 	 * (net->fast_retran_loss_recovery == 0)))
81 	 */
82 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
83 		if ((asoc->fast_retran_loss_recovery == 0) ||
84 		    (asoc->sctp_cmt_on_off == 1)) {
85 			/* out of a RFC2582 Fast recovery window? */
86 			if (net->net_ack > 0) {
87 				/*
88 				 * per section 7.2.3, are there any
89 				 * destinations that had a fast retransmit
90 				 * to them. If so what we need to do is
91 				 * adjust ssthresh and cwnd.
92 				 */
93 				struct sctp_tmit_chunk *lchk;
94 				int old_cwnd = net->cwnd;
95 
96 				net->ssthresh = net->cwnd / 2;
97 				if (net->ssthresh < (net->mtu * 2)) {
98 					net->ssthresh = 2 * net->mtu;
99 				}
100 				net->cwnd = net->ssthresh;
101 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
102 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
103 					    SCTP_CWND_LOG_FROM_FR);
104 				}
105 				lchk = TAILQ_FIRST(&asoc->send_queue);
106 
107 				net->partial_bytes_acked = 0;
108 				/* Turn on fast recovery window */
109 				asoc->fast_retran_loss_recovery = 1;
110 				if (lchk == NULL) {
111 					/* Mark end of the window */
112 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
113 				} else {
114 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
115 				}
116 
117 				/*
118 				 * CMT fast recovery -- per destination
119 				 * recovery variable.
120 				 */
121 				net->fast_retran_loss_recovery = 1;
122 
123 				if (lchk == NULL) {
124 					/* Mark end of the window */
125 					net->fast_recovery_tsn = asoc->sending_seq - 1;
126 				} else {
127 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
128 				}
129 
130 				/*
131 				 * Disable Nonce Sum Checking and store the
132 				 * resync tsn
133 				 */
134 				asoc->nonce_sum_check = 0;
135 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
136 
137 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
138 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
139 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
140 				    stcb->sctp_ep, stcb, net);
141 			}
142 		} else if (net->net_ack > 0) {
143 			/*
144 			 * Mark a peg that we WOULD have done a cwnd
145 			 * reduction but RFC2582 prevented this action.
146 			 */
147 			SCTP_STAT_INCR(sctps_fastretransinrtt);
148 		}
149 	}
150 }
151 
152 void
153 sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
154     struct sctp_association *asoc,
155     int accum_moved, int reneged_all, int will_exit)
156 {
157 	struct sctp_nets *net;
158 
159 	/******************************/
160 	/* update cwnd and Early FR   */
161 	/******************************/
162 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
163 
164 #ifdef JANA_CMT_FAST_RECOVERY
165 		/*
166 		 * CMT fast recovery code. Need to debug.
167 		 */
168 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
169 			if (compare_with_wrap(asoc->last_acked_seq,
170 			    net->fast_recovery_tsn, MAX_TSN) ||
171 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
172 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
173 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
174 				net->will_exit_fast_recovery = 1;
175 			}
176 		}
177 #endif
178 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
179 			/*
180 			 * So, first of all do we need to have a Early FR
181 			 * timer running?
182 			 */
183 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
184 			    (net->ref_count > 1) &&
185 			    (net->flight_size < net->cwnd)) ||
186 			    (reneged_all)) {
187 				/*
188 				 * yes, so in this case stop it if its
189 				 * running, and then restart it. Reneging
190 				 * all is a special case where we want to
191 				 * run the Early FR timer and then force the
192 				 * last few unacked to be sent, causing us
193 				 * to illicit a sack with gaps to force out
194 				 * the others.
195 				 */
196 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
197 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
198 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
199 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
200 				}
201 				SCTP_STAT_INCR(sctps_earlyfrstrid);
202 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
203 			} else {
204 				/* No, stop it if its running */
205 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
206 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
207 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
208 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
209 				}
210 			}
211 		}
212 		/* if nothing was acked on this destination skip it */
213 		if (net->net_ack == 0) {
214 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
215 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
216 			}
217 			continue;
218 		}
219 		if (net->net_ack2 > 0) {
220 			/*
221 			 * Karn's rule applies to clearing error count, this
222 			 * is optional.
223 			 */
224 			net->error_count = 0;
225 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
226 			    SCTP_ADDR_NOT_REACHABLE) {
227 				/* addr came good */
228 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
229 				net->dest_state |= SCTP_ADDR_REACHABLE;
230 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
231 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
232 				/* now was it the primary? if so restore */
233 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
234 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
235 				}
236 			}
237 			/*
238 			 * JRS 5/14/07 - If CMT PF is on and the destination
239 			 * is in PF state, set the destination to active
240 			 * state and set the cwnd to one or two MTU's based
241 			 * on whether PF1 or PF2 is being used.
242 			 *
243 			 * Should we stop any running T3 timer here?
244 			 */
245 			if ((asoc->sctp_cmt_on_off == 1) &&
246 			    (asoc->sctp_cmt_pf > 0) &&
247 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
248 				net->dest_state &= ~SCTP_ADDR_PF;
249 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
250 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
251 				    net, net->cwnd);
252 				/*
253 				 * Since the cwnd value is explicitly set,
254 				 * skip the code that updates the cwnd
255 				 * value.
256 				 */
257 				goto skip_cwnd_update;
258 			}
259 		}
260 #ifdef JANA_CMT_FAST_RECOVERY
261 		/*
262 		 * CMT fast recovery code
263 		 */
264 		/*
265 		 * if (sctp_cmt_on_off == 1 &&
266 		 * net->fast_retran_loss_recovery &&
267 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
268 		 * else if (sctp_cmt_on_off == 0 &&
269 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
270 		 */
271 #endif
272 
273 		if (asoc->fast_retran_loss_recovery &&
274 		    (will_exit == 0) &&
275 		    (asoc->sctp_cmt_on_off == 0)) {
276 			/*
277 			 * If we are in loss recovery we skip any cwnd
278 			 * update
279 			 */
280 			goto skip_cwnd_update;
281 		}
282 		/*
283 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
284 		 * moved.
285 		 */
286 		if (accum_moved ||
287 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
288 			/* If the cumulative ack moved we can proceed */
289 			if (net->cwnd <= net->ssthresh) {
290 				/* We are in slow start */
291 				if (net->flight_size + net->net_ack >= net->cwnd) {
292 					if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
293 						net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
294 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
295 							sctp_log_cwnd(stcb, net, net->mtu,
296 							    SCTP_CWND_LOG_FROM_SS);
297 						}
298 					} else {
299 						net->cwnd += net->net_ack;
300 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
301 							sctp_log_cwnd(stcb, net, net->net_ack,
302 							    SCTP_CWND_LOG_FROM_SS);
303 						}
304 					}
305 				} else {
306 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
307 						sctp_log_cwnd(stcb, net, net->net_ack,
308 						    SCTP_CWND_LOG_NOADV_SS);
309 					}
310 				}
311 			} else {
312 				/* We are in congestion avoidance */
313 				/*
314 				 * Add to pba
315 				 */
316 				net->partial_bytes_acked += net->net_ack;
317 
318 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
319 				    (net->partial_bytes_acked >= net->cwnd)) {
320 					net->partial_bytes_acked -= net->cwnd;
321 					net->cwnd += net->mtu;
322 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
323 						sctp_log_cwnd(stcb, net, net->mtu,
324 						    SCTP_CWND_LOG_FROM_CA);
325 					}
326 				} else {
327 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
328 						sctp_log_cwnd(stcb, net, net->net_ack,
329 						    SCTP_CWND_LOG_NOADV_CA);
330 					}
331 				}
332 			}
333 		} else {
334 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
335 				sctp_log_cwnd(stcb, net, net->mtu,
336 				    SCTP_CWND_LOG_NO_CUMACK);
337 			}
338 		}
339 skip_cwnd_update:
340 		/*
341 		 * NOW, according to Karn's rule do we need to restore the
342 		 * RTO timer back? Check our net_ack2. If not set then we
343 		 * have a ambiguity.. i.e. all data ack'd was sent to more
344 		 * than one place.
345 		 */
346 		if (net->net_ack2) {
347 			/* restore any doubled timers */
348 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
349 			if (net->RTO < stcb->asoc.minrto) {
350 				net->RTO = stcb->asoc.minrto;
351 			}
352 			if (net->RTO > stcb->asoc.maxrto) {
353 				net->RTO = stcb->asoc.maxrto;
354 			}
355 		}
356 	}
357 }
358 
359 void
360 sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
361 {
362 	int old_cwnd = net->cwnd;
363 
364 	net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
365 	net->cwnd = net->mtu;
366 	net->partial_bytes_acked = 0;
367 
368 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
369 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
370 	}
371 }
372 
373 void
374 sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net)
375 {
376 	int old_cwnd = net->cwnd;
377 
378 	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
379 	net->ssthresh = net->cwnd / 2;
380 	if (net->ssthresh < net->mtu) {
381 		net->ssthresh = net->mtu;
382 		/* here back off the timer as well, to slow us down */
383 		net->RTO <<= 1;
384 	}
385 	net->cwnd = net->ssthresh;
386 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
387 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
388 	}
389 }
390 
391 void
392 sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
393     struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
394     uint32_t * bottle_bw, uint32_t * on_queue)
395 {
396 	uint32_t bw_avail;
397 	int rtt, incr;
398 	int old_cwnd = net->cwnd;
399 
400 	/* need real RTT for this calc */
401 	rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
402 	/* get bottle neck bw */
403 	*bottle_bw = ntohl(cp->bottle_bw);
404 	/* and whats on queue */
405 	*on_queue = ntohl(cp->current_onq);
406 	/*
407 	 * adjust the on-queue if our flight is more it could be that the
408 	 * router has not yet gotten data "in-flight" to it
409 	 */
410 	if (*on_queue < net->flight_size)
411 		*on_queue = net->flight_size;
412 	/* calculate the available space */
413 	bw_avail = (*bottle_bw * rtt) / 1000;
414 	if (bw_avail > *bottle_bw) {
415 		/*
416 		 * Cap the growth to no more than the bottle neck. This can
417 		 * happen as RTT slides up due to queues. It also means if
418 		 * you have more than a 1 second RTT with a empty queue you
419 		 * will be limited to the bottle_bw per second no matter if
420 		 * other points have 1/2 the RTT and you could get more
421 		 * out...
422 		 */
423 		bw_avail = *bottle_bw;
424 	}
425 	if (*on_queue > bw_avail) {
426 		/*
427 		 * No room for anything else don't allow anything else to be
428 		 * "added to the fire".
429 		 */
430 		int seg_inflight, seg_onqueue, my_portion;
431 
432 		net->partial_bytes_acked = 0;
433 
434 		/* how much are we over queue size? */
435 		incr = *on_queue - bw_avail;
436 		if (stcb->asoc.seen_a_sack_this_pkt) {
437 			/*
438 			 * undo any cwnd adjustment that the sack might have
439 			 * made
440 			 */
441 			net->cwnd = net->prev_cwnd;
442 		}
443 		/* Now how much of that is mine? */
444 		seg_inflight = net->flight_size / net->mtu;
445 		seg_onqueue = *on_queue / net->mtu;
446 		my_portion = (incr * seg_inflight) / seg_onqueue;
447 
448 		/* Have I made an adjustment already */
449 		if (net->cwnd > net->flight_size) {
450 			/*
451 			 * for this flight I made an adjustment we need to
452 			 * decrease the portion by a share our previous
453 			 * adjustment.
454 			 */
455 			int diff_adj;
456 
457 			diff_adj = net->cwnd - net->flight_size;
458 			if (diff_adj > my_portion)
459 				my_portion = 0;
460 			else
461 				my_portion -= diff_adj;
462 		}
463 		/*
464 		 * back down to the previous cwnd (assume we have had a sack
465 		 * before this packet). minus what ever portion of the
466 		 * overage is my fault.
467 		 */
468 		net->cwnd -= my_portion;
469 
470 		/* we will NOT back down more than 1 MTU */
471 		if (net->cwnd <= net->mtu) {
472 			net->cwnd = net->mtu;
473 		}
474 		/* force into CA */
475 		net->ssthresh = net->cwnd - 1;
476 	} else {
477 		/*
478 		 * Take 1/4 of the space left or max burst up .. whichever
479 		 * is less.
480 		 */
481 		incr = min((bw_avail - *on_queue) >> 2,
482 		    stcb->asoc.max_burst * net->mtu);
483 		net->cwnd += incr;
484 	}
485 	if (net->cwnd > bw_avail) {
486 		/* We can't exceed the pipe size */
487 		net->cwnd = bw_avail;
488 	}
489 	if (net->cwnd < net->mtu) {
490 		/* We always have 1 MTU */
491 		net->cwnd = net->mtu;
492 	}
493 	if (net->cwnd - old_cwnd != 0) {
494 		/* log only changes */
495 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
496 			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
497 			    SCTP_CWND_LOG_FROM_SAT);
498 		}
499 	}
500 }
501 
502 void
503 sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
504     struct sctp_nets *net, int burst_limit)
505 {
506 	int old_cwnd = net->cwnd;
507 
508 	if (net->ssthresh < net->cwnd)
509 		net->ssthresh = net->cwnd;
510 	net->cwnd = (net->flight_size + (burst_limit * net->mtu));
511 
512 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
513 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
514 	}
515 }
516 
517 void
518 sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
519     struct sctp_tcb *stcb, struct sctp_nets *net)
520 {
521 	int old_cwnd = net->cwnd;
522 
523 	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
524 	/*
525 	 * make a small adjustment to cwnd and force to CA.
526 	 */
527 	if (net->cwnd > net->mtu)
528 		/* drop down one MTU after sending */
529 		net->cwnd -= net->mtu;
530 	if (net->cwnd < net->ssthresh)
531 		/* still in SS move to CA */
532 		net->ssthresh = net->cwnd - 1;
533 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
534 		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
535 	}
536 }
537 
538 struct sctp_hs_raise_drop {
539 	int32_t cwnd;
540 	int32_t increase;
541 	int32_t drop_percent;
542 };
543 
544 #define SCTP_HS_TABLE_SIZE 73
545 
546 struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
547 	{38, 1, 50},		/* 0   */
548 	{118, 2, 44},		/* 1   */
549 	{221, 3, 41},		/* 2   */
550 	{347, 4, 38},		/* 3   */
551 	{495, 5, 37},		/* 4   */
552 	{663, 6, 35},		/* 5   */
553 	{851, 7, 34},		/* 6   */
554 	{1058, 8, 33},		/* 7   */
555 	{1284, 9, 32},		/* 8   */
556 	{1529, 10, 31},		/* 9   */
557 	{1793, 11, 30},		/* 10  */
558 	{2076, 12, 29},		/* 11  */
559 	{2378, 13, 28},		/* 12  */
560 	{2699, 14, 28},		/* 13  */
561 	{3039, 15, 27},		/* 14  */
562 	{3399, 16, 27},		/* 15  */
563 	{3778, 17, 26},		/* 16  */
564 	{4177, 18, 26},		/* 17  */
565 	{4596, 19, 25},		/* 18  */
566 	{5036, 20, 25},		/* 19  */
567 	{5497, 21, 24},		/* 20  */
568 	{5979, 22, 24},		/* 21  */
569 	{6483, 23, 23},		/* 22  */
570 	{7009, 24, 23},		/* 23  */
571 	{7558, 25, 22},		/* 24  */
572 	{8130, 26, 22},		/* 25  */
573 	{8726, 27, 22},		/* 26  */
574 	{9346, 28, 21},		/* 27  */
575 	{9991, 29, 21},		/* 28  */
576 	{10661, 30, 21},	/* 29  */
577 	{11358, 31, 20},	/* 30  */
578 	{12082, 32, 20},	/* 31  */
579 	{12834, 33, 20},	/* 32  */
580 	{13614, 34, 19},	/* 33  */
581 	{14424, 35, 19},	/* 34  */
582 	{15265, 36, 19},	/* 35  */
583 	{16137, 37, 19},	/* 36  */
584 	{17042, 38, 18},	/* 37  */
585 	{17981, 39, 18},	/* 38  */
586 	{18955, 40, 18},	/* 39  */
587 	{19965, 41, 17},	/* 40  */
588 	{21013, 42, 17},	/* 41  */
589 	{22101, 43, 17},	/* 42  */
590 	{23230, 44, 17},	/* 43  */
591 	{24402, 45, 16},	/* 44  */
592 	{25618, 46, 16},	/* 45  */
593 	{26881, 47, 16},	/* 46  */
594 	{28193, 48, 16},	/* 47  */
595 	{29557, 49, 15},	/* 48  */
596 	{30975, 50, 15},	/* 49  */
597 	{32450, 51, 15},	/* 50  */
598 	{33986, 52, 15},	/* 51  */
599 	{35586, 53, 14},	/* 52  */
600 	{37253, 54, 14},	/* 53  */
601 	{38992, 55, 14},	/* 54  */
602 	{40808, 56, 14},	/* 55  */
603 	{42707, 57, 13},	/* 56  */
604 	{44694, 58, 13},	/* 57  */
605 	{46776, 59, 13},	/* 58  */
606 	{48961, 60, 13},	/* 59  */
607 	{51258, 61, 13},	/* 60  */
608 	{53677, 62, 12},	/* 61  */
609 	{56230, 63, 12},	/* 62  */
610 	{58932, 64, 12},	/* 63  */
611 	{61799, 65, 12},	/* 64  */
612 	{64851, 66, 11},	/* 65  */
613 	{68113, 67, 11},	/* 66  */
614 	{71617, 68, 11},	/* 67  */
615 	{75401, 69, 10},	/* 68  */
616 	{79517, 70, 10},	/* 69  */
617 	{84035, 71, 10},	/* 70  */
618 	{89053, 72, 10},	/* 71  */
619 	{94717, 73, 9}		/* 72  */
620 };
621 
622 static void
623 sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
624 {
625 	int cur_val, i, indx, incr;
626 
627 	cur_val = net->cwnd >> 10;
628 	indx = SCTP_HS_TABLE_SIZE - 1;
629 #ifdef SCTP_DEBUG
630 	printf("HS CC CAlled.\n");
631 #endif
632 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
633 		/* normal mode */
634 		if (net->net_ack > net->mtu) {
635 			net->cwnd += net->mtu;
636 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
637 				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
638 			}
639 		} else {
640 			net->cwnd += net->net_ack;
641 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
642 				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
643 			}
644 		}
645 	} else {
646 		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
647 			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
648 				indx = i;
649 				break;
650 			}
651 		}
652 		net->last_hs_used = indx;
653 		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
654 		net->cwnd += incr;
655 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
656 			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
657 		}
658 	}
659 }
660 
661 static void
662 sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
663 {
664 	int cur_val, i, indx;
665 	int old_cwnd = net->cwnd;
666 
667 	cur_val = net->cwnd >> 10;
668 	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
669 		/* normal mode */
670 		net->ssthresh = net->cwnd / 2;
671 		if (net->ssthresh < (net->mtu * 2)) {
672 			net->ssthresh = 2 * net->mtu;
673 		}
674 		net->cwnd = net->ssthresh;
675 	} else {
676 		/* drop by the proper amount */
677 		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
678 		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
679 		net->cwnd = net->ssthresh;
680 		/* now where are we */
681 		indx = net->last_hs_used;
682 		cur_val = net->cwnd >> 10;
683 		/* reset where we are in the table */
684 		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
685 			/* feel out of hs */
686 			net->last_hs_used = 0;
687 		} else {
688 			for (i = indx; i >= 1; i--) {
689 				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
690 					break;
691 				}
692 			}
693 			net->last_hs_used = indx;
694 		}
695 	}
696 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
697 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
698 	}
699 }
700 
701 void
702 sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
703     struct sctp_association *asoc)
704 {
705 	struct sctp_nets *net;
706 
707 	/*
708 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
709 	 * (net->fast_retran_loss_recovery == 0)))
710 	 */
711 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
712 		if ((asoc->fast_retran_loss_recovery == 0) ||
713 		    (asoc->sctp_cmt_on_off == 1)) {
714 			/* out of a RFC2582 Fast recovery window? */
715 			if (net->net_ack > 0) {
716 				/*
717 				 * per section 7.2.3, are there any
718 				 * destinations that had a fast retransmit
719 				 * to them. If so what we need to do is
720 				 * adjust ssthresh and cwnd.
721 				 */
722 				struct sctp_tmit_chunk *lchk;
723 
724 				sctp_hs_cwnd_decrease(stcb, net);
725 
726 				lchk = TAILQ_FIRST(&asoc->send_queue);
727 
728 				net->partial_bytes_acked = 0;
729 				/* Turn on fast recovery window */
730 				asoc->fast_retran_loss_recovery = 1;
731 				if (lchk == NULL) {
732 					/* Mark end of the window */
733 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
734 				} else {
735 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
736 				}
737 
738 				/*
739 				 * CMT fast recovery -- per destination
740 				 * recovery variable.
741 				 */
742 				net->fast_retran_loss_recovery = 1;
743 
744 				if (lchk == NULL) {
745 					/* Mark end of the window */
746 					net->fast_recovery_tsn = asoc->sending_seq - 1;
747 				} else {
748 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
749 				}
750 
751 				/*
752 				 * Disable Nonce Sum Checking and store the
753 				 * resync tsn
754 				 */
755 				asoc->nonce_sum_check = 0;
756 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
757 
758 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
759 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
760 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
761 				    stcb->sctp_ep, stcb, net);
762 			}
763 		} else if (net->net_ack > 0) {
764 			/*
765 			 * Mark a peg that we WOULD have done a cwnd
766 			 * reduction but RFC2582 prevented this action.
767 			 */
768 			SCTP_STAT_INCR(sctps_fastretransinrtt);
769 		}
770 	}
771 }
772 
773 void
774 sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
775     struct sctp_association *asoc,
776     int accum_moved, int reneged_all, int will_exit)
777 {
778 	struct sctp_nets *net;
779 
780 	/******************************/
781 	/* update cwnd and Early FR   */
782 	/******************************/
783 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
784 
785 #ifdef JANA_CMT_FAST_RECOVERY
786 		/*
787 		 * CMT fast recovery code. Need to debug.
788 		 */
789 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
790 			if (compare_with_wrap(asoc->last_acked_seq,
791 			    net->fast_recovery_tsn, MAX_TSN) ||
792 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
793 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
794 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
795 				net->will_exit_fast_recovery = 1;
796 			}
797 		}
798 #endif
799 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
800 			/*
801 			 * So, first of all do we need to have a Early FR
802 			 * timer running?
803 			 */
804 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
805 			    (net->ref_count > 1) &&
806 			    (net->flight_size < net->cwnd)) ||
807 			    (reneged_all)) {
808 				/*
809 				 * yes, so in this case stop it if its
810 				 * running, and then restart it. Reneging
811 				 * all is a special case where we want to
812 				 * run the Early FR timer and then force the
813 				 * last few unacked to be sent, causing us
814 				 * to illicit a sack with gaps to force out
815 				 * the others.
816 				 */
817 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
818 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
819 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
820 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
821 				}
822 				SCTP_STAT_INCR(sctps_earlyfrstrid);
823 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
824 			} else {
825 				/* No, stop it if its running */
826 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
827 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
828 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
829 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
830 				}
831 			}
832 		}
833 		/* if nothing was acked on this destination skip it */
834 		if (net->net_ack == 0) {
835 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
836 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
837 			}
838 			continue;
839 		}
840 		if (net->net_ack2 > 0) {
841 			/*
842 			 * Karn's rule applies to clearing error count, this
843 			 * is optional.
844 			 */
845 			net->error_count = 0;
846 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
847 			    SCTP_ADDR_NOT_REACHABLE) {
848 				/* addr came good */
849 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
850 				net->dest_state |= SCTP_ADDR_REACHABLE;
851 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
852 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
853 				/* now was it the primary? if so restore */
854 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
855 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
856 				}
857 			}
858 			/*
859 			 * JRS 5/14/07 - If CMT PF is on and the destination
860 			 * is in PF state, set the destination to active
861 			 * state and set the cwnd to one or two MTU's based
862 			 * on whether PF1 or PF2 is being used.
863 			 *
864 			 * Should we stop any running T3 timer here?
865 			 */
866 			if ((asoc->sctp_cmt_on_off == 1) &&
867 			    (asoc->sctp_cmt_pf > 0) &&
868 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
869 				net->dest_state &= ~SCTP_ADDR_PF;
870 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
871 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
872 				    net, net->cwnd);
873 				/*
874 				 * Since the cwnd value is explicitly set,
875 				 * skip the code that updates the cwnd
876 				 * value.
877 				 */
878 				goto skip_cwnd_update;
879 			}
880 		}
881 #ifdef JANA_CMT_FAST_RECOVERY
882 		/*
883 		 * CMT fast recovery code
884 		 */
885 		/*
886 		 * if (sctp_cmt_on_off == 1 &&
887 		 * net->fast_retran_loss_recovery &&
888 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
889 		 * else if (sctp_cmt_on_off == 0 &&
890 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
891 		 */
892 #endif
893 
894 		if (asoc->fast_retran_loss_recovery &&
895 		    (will_exit == 0) &&
896 		    (asoc->sctp_cmt_on_off == 0)) {
897 			/*
898 			 * If we are in loss recovery we skip any cwnd
899 			 * update
900 			 */
901 			goto skip_cwnd_update;
902 		}
903 		/*
904 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
905 		 * moved.
906 		 */
907 		if (accum_moved ||
908 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
909 			/* If the cumulative ack moved we can proceed */
910 			if (net->cwnd <= net->ssthresh) {
911 				/* We are in slow start */
912 				if (net->flight_size + net->net_ack >= net->cwnd) {
913 
914 					sctp_hs_cwnd_increase(stcb, net);
915 
916 				} else {
917 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
918 						sctp_log_cwnd(stcb, net, net->net_ack,
919 						    SCTP_CWND_LOG_NOADV_SS);
920 					}
921 				}
922 			} else {
923 				/* We are in congestion avoidance */
924 				net->partial_bytes_acked += net->net_ack;
925 				if ((net->flight_size + net->net_ack >= net->cwnd) &&
926 				    (net->partial_bytes_acked >= net->cwnd)) {
927 					net->partial_bytes_acked -= net->cwnd;
928 					net->cwnd += net->mtu;
929 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
930 						sctp_log_cwnd(stcb, net, net->mtu,
931 						    SCTP_CWND_LOG_FROM_CA);
932 					}
933 				} else {
934 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
935 						sctp_log_cwnd(stcb, net, net->net_ack,
936 						    SCTP_CWND_LOG_NOADV_CA);
937 					}
938 				}
939 			}
940 		} else {
941 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
942 				sctp_log_cwnd(stcb, net, net->mtu,
943 				    SCTP_CWND_LOG_NO_CUMACK);
944 			}
945 		}
946 skip_cwnd_update:
947 		/*
948 		 * NOW, according to Karn's rule do we need to restore the
949 		 * RTO timer back? Check our net_ack2. If not set then we
950 		 * have a ambiguity.. i.e. all data ack'd was sent to more
951 		 * than one place.
952 		 */
953 		if (net->net_ack2) {
954 			/* restore any doubled timers */
955 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
956 			if (net->RTO < stcb->asoc.minrto) {
957 				net->RTO = stcb->asoc.minrto;
958 			}
959 			if (net->RTO > stcb->asoc.maxrto) {
960 				net->RTO = stcb->asoc.maxrto;
961 			}
962 		}
963 	}
964 }
965 
966 
967 /*
968  * H-TCP congestion control. The algorithm is detailed in:
969  * R.N.Shorten, D.J.Leith:
970  *   "H-TCP: TCP for high-speed and long-distance networks"
971  *   Proc. PFLDnet, Argonne, 2004.
972  * http://www.hamilton.ie/net/htcp3.pdf
973  */
974 
975 
976 static int use_rtt_scaling = 1;
977 static int use_bandwidth_switch = 1;
978 
979 static inline int
980 between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
981 {
982 	return seq3 - seq2 >= seq1 - seq2;
983 }
984 
985 static inline uint32_t
986 htcp_cong_time(struct htcp *ca)
987 {
988 	return sctp_get_tick_count() - ca->last_cong;
989 }
990 
991 static inline uint32_t
992 htcp_ccount(struct htcp *ca)
993 {
994 	return htcp_cong_time(ca) / ca->minRTT;
995 }
996 
997 static inline void
998 htcp_reset(struct htcp *ca)
999 {
1000 	ca->undo_last_cong = ca->last_cong;
1001 	ca->undo_maxRTT = ca->maxRTT;
1002 	ca->undo_old_maxB = ca->old_maxB;
1003 	ca->last_cong = sctp_get_tick_count();
1004 }
1005 
1006 #ifdef SCTP_NOT_USED
1007 
1008 static uint32_t
1009 htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
1010 {
1011 	net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
1012 	net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
1013 	net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
1014 	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
1015 }
1016 
1017 #endif
1018 
1019 static inline void
1020 measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
1021 {
1022 	uint32_t srtt = net->lastsa >> 3;
1023 
1024 	/* keep track of minimum RTT seen so far, minRTT is zero at first */
1025 	if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
1026 		net->htcp_ca.minRTT = srtt;
1027 
1028 	/* max RTT */
1029 	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
1030 		if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
1031 			net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
1032 		if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
1033 			net->htcp_ca.maxRTT = srtt;
1034 	}
1035 }
1036 
1037 static void
1038 measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
1039 {
1040 	uint32_t now = sctp_get_tick_count();
1041 
1042 	if (net->fast_retran_ip == 0)
1043 		net->htcp_ca.bytes_acked = net->net_ack;
1044 
1045 	if (!use_bandwidth_switch)
1046 		return;
1047 
1048 	/* achieved throughput calculations */
1049 	/* JRS - not 100% sure of this statement */
1050 	if (net->fast_retran_ip == 1) {
1051 		net->htcp_ca.bytecount = 0;
1052 		net->htcp_ca.lasttime = now;
1053 		return;
1054 	}
1055 	net->htcp_ca.bytecount += net->net_ack;
1056 
1057 	if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
1058 	    && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
1059 	    && net->htcp_ca.minRTT > 0) {
1060 		uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
1061 
1062 		if (htcp_ccount(&net->htcp_ca) <= 3) {
1063 			/* just after backoff */
1064 			net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
1065 		} else {
1066 			net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
1067 			if (net->htcp_ca.Bi > net->htcp_ca.maxB)
1068 				net->htcp_ca.maxB = net->htcp_ca.Bi;
1069 			if (net->htcp_ca.minB > net->htcp_ca.maxB)
1070 				net->htcp_ca.minB = net->htcp_ca.maxB;
1071 		}
1072 		net->htcp_ca.bytecount = 0;
1073 		net->htcp_ca.lasttime = now;
1074 	}
1075 }
1076 
1077 static inline void
1078 htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
1079 {
1080 	if (use_bandwidth_switch) {
1081 		uint32_t maxB = ca->maxB;
1082 		uint32_t old_maxB = ca->old_maxB;
1083 
1084 		ca->old_maxB = ca->maxB;
1085 
1086 		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
1087 			ca->beta = BETA_MIN;
1088 			ca->modeswitch = 0;
1089 			return;
1090 		}
1091 	}
1092 	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
1093 		ca->beta = (minRTT << 7) / maxRTT;
1094 		if (ca->beta < BETA_MIN)
1095 			ca->beta = BETA_MIN;
1096 		else if (ca->beta > BETA_MAX)
1097 			ca->beta = BETA_MAX;
1098 	} else {
1099 		ca->beta = BETA_MIN;
1100 		ca->modeswitch = 1;
1101 	}
1102 }
1103 
1104 static inline void
1105 htcp_alpha_update(struct htcp *ca)
1106 {
1107 	uint32_t minRTT = ca->minRTT;
1108 	uint32_t factor = 1;
1109 	uint32_t diff = htcp_cong_time(ca);
1110 
1111 	if (diff > (uint32_t) hz) {
1112 		diff -= hz;
1113 		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
1114 	}
1115 	if (use_rtt_scaling && minRTT) {
1116 		uint32_t scale = (hz << 3) / (10 * minRTT);
1117 
1118 		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
1119 								 * interval [0.5,10]<<3 */
1120 		factor = (factor << 3) / scale;
1121 		if (!factor)
1122 			factor = 1;
1123 	}
1124 	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
1125 	if (!ca->alpha)
1126 		ca->alpha = ALPHA_BASE;
1127 }
1128 
1129 /* After we have the rtt data to calculate beta, we'd still prefer to wait one
1130  * rtt before we adjust our beta to ensure we are working from a consistent
1131  * data.
1132  *
1133  * This function should be called when we hit a congestion event since only at
1134  * that point do we really have a real sense of maxRTT (the queues en route
1135  * were getting just too full now).
1136  */
1137 static void
1138 htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
1139 {
1140 	uint32_t minRTT = net->htcp_ca.minRTT;
1141 	uint32_t maxRTT = net->htcp_ca.maxRTT;
1142 
1143 	htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
1144 	htcp_alpha_update(&net->htcp_ca);
1145 
1146 	/*
1147 	 * add slowly fading memory for maxRTT to accommodate routing
1148 	 * changes etc
1149 	 */
1150 	if (minRTT > 0 && maxRTT > minRTT)
1151 		net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
1152 }
1153 
1154 static uint32_t
1155 htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
1156 {
1157 	htcp_param_update(stcb, net);
1158 	return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
1159 }
1160 
1161 static void
1162 htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
1163 {
1164 	/*-
1165 	 * How to handle these functions?
1166          *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
1167 	 *		return;
1168 	 */
1169 	if (net->cwnd <= net->ssthresh) {
1170 		/* We are in slow start */
1171 		if (net->flight_size + net->net_ack >= net->cwnd) {
1172 			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
1173 				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
1174 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1175 					sctp_log_cwnd(stcb, net, net->mtu,
1176 					    SCTP_CWND_LOG_FROM_SS);
1177 				}
1178 			} else {
1179 				net->cwnd += net->net_ack;
1180 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1181 					sctp_log_cwnd(stcb, net, net->net_ack,
1182 					    SCTP_CWND_LOG_FROM_SS);
1183 				}
1184 			}
1185 		} else {
1186 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1187 				sctp_log_cwnd(stcb, net, net->net_ack,
1188 				    SCTP_CWND_LOG_NOADV_SS);
1189 			}
1190 		}
1191 	} else {
1192 		measure_rtt(stcb, net);
1193 
1194 		/*
1195 		 * In dangerous area, increase slowly. In theory this is
1196 		 * net->cwnd += alpha / net->cwnd
1197 		 */
1198 		/* What is snd_cwnd_cnt?? */
1199 		if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
1200 			/*-
1201 			 * Does SCTP have a cwnd clamp?
1202 			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
1203 			 */
1204 			net->cwnd += net->mtu;
1205 			net->partial_bytes_acked = 0;
1206 			htcp_alpha_update(&net->htcp_ca);
1207 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1208 				sctp_log_cwnd(stcb, net, net->mtu,
1209 				    SCTP_CWND_LOG_FROM_CA);
1210 			}
1211 		} else {
1212 			net->partial_bytes_acked += net->net_ack;
1213 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1214 				sctp_log_cwnd(stcb, net, net->net_ack,
1215 				    SCTP_CWND_LOG_NOADV_CA);
1216 			}
1217 		}
1218 
1219 		net->htcp_ca.bytes_acked = net->mtu;
1220 	}
1221 }
1222 
1223 #ifdef SCTP_NOT_USED
1224 /* Lower bound on congestion window. */
1225 static uint32_t
1226 htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
1227 {
1228 	return net->ssthresh;
1229 }
1230 
1231 #endif
1232 
1233 static void
1234 htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
1235 {
1236 	memset(&net->htcp_ca, 0, sizeof(struct htcp));
1237 	net->htcp_ca.alpha = ALPHA_BASE;
1238 	net->htcp_ca.beta = BETA_MIN;
1239 	net->htcp_ca.bytes_acked = net->mtu;
1240 	net->htcp_ca.last_cong = sctp_get_tick_count();
1241 }
1242 
1243 void
1244 sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
1245 {
1246 	/*
1247 	 * We take the max of the burst limit times a MTU or the
1248 	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
1249 	 */
1250 	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
1251 	net->ssthresh = stcb->asoc.peers_rwnd;
1252 	htcp_init(stcb, net);
1253 
1254 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
1255 		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
1256 	}
1257 }
1258 
1259 void
1260 sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
1261     struct sctp_association *asoc,
1262     int accum_moved, int reneged_all, int will_exit)
1263 {
1264 	struct sctp_nets *net;
1265 
1266 	/******************************/
1267 	/* update cwnd and Early FR   */
1268 	/******************************/
1269 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1270 
1271 #ifdef JANA_CMT_FAST_RECOVERY
1272 		/*
1273 		 * CMT fast recovery code. Need to debug.
1274 		 */
1275 		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
1276 			if (compare_with_wrap(asoc->last_acked_seq,
1277 			    net->fast_recovery_tsn, MAX_TSN) ||
1278 			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
1279 			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
1280 			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
1281 				net->will_exit_fast_recovery = 1;
1282 			}
1283 		}
1284 #endif
1285 		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
1286 			/*
1287 			 * So, first of all do we need to have a Early FR
1288 			 * timer running?
1289 			 */
1290 			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
1291 			    (net->ref_count > 1) &&
1292 			    (net->flight_size < net->cwnd)) ||
1293 			    (reneged_all)) {
1294 				/*
1295 				 * yes, so in this case stop it if its
1296 				 * running, and then restart it. Reneging
1297 				 * all is a special case where we want to
1298 				 * run the Early FR timer and then force the
1299 				 * last few unacked to be sent, causing us
1300 				 * to illicit a sack with gaps to force out
1301 				 * the others.
1302 				 */
1303 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1304 					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
1305 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1306 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
1307 				}
1308 				SCTP_STAT_INCR(sctps_earlyfrstrid);
1309 				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
1310 			} else {
1311 				/* No, stop it if its running */
1312 				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1313 					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
1314 					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1315 					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
1316 				}
1317 			}
1318 		}
1319 		/* if nothing was acked on this destination skip it */
1320 		if (net->net_ack == 0) {
1321 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1322 				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
1323 			}
1324 			continue;
1325 		}
1326 		if (net->net_ack2 > 0) {
1327 			/*
1328 			 * Karn's rule applies to clearing error count, this
1329 			 * is optional.
1330 			 */
1331 			net->error_count = 0;
1332 			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
1333 			    SCTP_ADDR_NOT_REACHABLE) {
1334 				/* addr came good */
1335 				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
1336 				net->dest_state |= SCTP_ADDR_REACHABLE;
1337 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
1338 				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
1339 				/* now was it the primary? if so restore */
1340 				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
1341 					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
1342 				}
1343 			}
1344 			/*
1345 			 * JRS 5/14/07 - If CMT PF is on and the destination
1346 			 * is in PF state, set the destination to active
1347 			 * state and set the cwnd to one or two MTU's based
1348 			 * on whether PF1 or PF2 is being used.
1349 			 *
1350 			 * Should we stop any running T3 timer here?
1351 			 */
1352 			if ((asoc->sctp_cmt_on_off == 1) &&
1353 			    (asoc->sctp_cmt_pf > 0) &&
1354 			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
1355 				net->dest_state &= ~SCTP_ADDR_PF;
1356 				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
1357 				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
1358 				    net, net->cwnd);
1359 				/*
1360 				 * Since the cwnd value is explicitly set,
1361 				 * skip the code that updates the cwnd
1362 				 * value.
1363 				 */
1364 				goto skip_cwnd_update;
1365 			}
1366 		}
1367 #ifdef JANA_CMT_FAST_RECOVERY
1368 		/*
1369 		 * CMT fast recovery code
1370 		 */
1371 		/*
1372 		 * if (sctp_cmt_on_off == 1 &&
1373 		 * net->fast_retran_loss_recovery &&
1374 		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
1375 		 * else if (sctp_cmt_on_off == 0 &&
1376 		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
1377 		 */
1378 #endif
1379 
1380 		if (asoc->fast_retran_loss_recovery &&
1381 		    will_exit == 0 &&
1382 		    (asoc->sctp_cmt_on_off == 0)) {
1383 			/*
1384 			 * If we are in loss recovery we skip any cwnd
1385 			 * update
1386 			 */
1387 			goto skip_cwnd_update;
1388 		}
1389 		/*
1390 		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
1391 		 * moved.
1392 		 */
1393 		if (accum_moved ||
1394 		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
1395 			htcp_cong_avoid(stcb, net);
1396 			measure_achieved_throughput(stcb, net);
1397 		} else {
1398 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1399 				sctp_log_cwnd(stcb, net, net->mtu,
1400 				    SCTP_CWND_LOG_NO_CUMACK);
1401 			}
1402 		}
1403 skip_cwnd_update:
1404 		/*
1405 		 * NOW, according to Karn's rule do we need to restore the
1406 		 * RTO timer back? Check our net_ack2. If not set then we
1407 		 * have a ambiguity.. i.e. all data ack'd was sent to more
1408 		 * than one place.
1409 		 */
1410 		if (net->net_ack2) {
1411 			/* restore any doubled timers */
1412 			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
1413 			if (net->RTO < stcb->asoc.minrto) {
1414 				net->RTO = stcb->asoc.minrto;
1415 			}
1416 			if (net->RTO > stcb->asoc.maxrto) {
1417 				net->RTO = stcb->asoc.maxrto;
1418 			}
1419 		}
1420 	}
1421 }
1422 
1423 void
1424 sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
1425     struct sctp_association *asoc)
1426 {
1427 	struct sctp_nets *net;
1428 
1429 	/*
1430 	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
1431 	 * (net->fast_retran_loss_recovery == 0)))
1432 	 */
1433 	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1434 		if ((asoc->fast_retran_loss_recovery == 0) ||
1435 		    (asoc->sctp_cmt_on_off == 1)) {
1436 			/* out of a RFC2582 Fast recovery window? */
1437 			if (net->net_ack > 0) {
1438 				/*
1439 				 * per section 7.2.3, are there any
1440 				 * destinations that had a fast retransmit
1441 				 * to them. If so what we need to do is
1442 				 * adjust ssthresh and cwnd.
1443 				 */
1444 				struct sctp_tmit_chunk *lchk;
1445 				int old_cwnd = net->cwnd;
1446 
1447 				/* JRS - reset as if state were changed */
1448 				htcp_reset(&net->htcp_ca);
1449 				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1450 				net->cwnd = net->ssthresh;
1451 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1452 					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
1453 					    SCTP_CWND_LOG_FROM_FR);
1454 				}
1455 				lchk = TAILQ_FIRST(&asoc->send_queue);
1456 
1457 				net->partial_bytes_acked = 0;
1458 				/* Turn on fast recovery window */
1459 				asoc->fast_retran_loss_recovery = 1;
1460 				if (lchk == NULL) {
1461 					/* Mark end of the window */
1462 					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
1463 				} else {
1464 					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1465 				}
1466 
1467 				/*
1468 				 * CMT fast recovery -- per destination
1469 				 * recovery variable.
1470 				 */
1471 				net->fast_retran_loss_recovery = 1;
1472 
1473 				if (lchk == NULL) {
1474 					/* Mark end of the window */
1475 					net->fast_recovery_tsn = asoc->sending_seq - 1;
1476 				} else {
1477 					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1478 				}
1479 
1480 				/*
1481 				 * Disable Nonce Sum Checking and store the
1482 				 * resync tsn
1483 				 */
1484 				asoc->nonce_sum_check = 0;
1485 				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
1486 
1487 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
1488 				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
1489 				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
1490 				    stcb->sctp_ep, stcb, net);
1491 			}
1492 		} else if (net->net_ack > 0) {
1493 			/*
1494 			 * Mark a peg that we WOULD have done a cwnd
1495 			 * reduction but RFC2582 prevented this action.
1496 			 */
1497 			SCTP_STAT_INCR(sctps_fastretransinrtt);
1498 		}
1499 	}
1500 }
1501 
1502 void
1503 sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
1504     struct sctp_nets *net)
1505 {
1506 	int old_cwnd = net->cwnd;
1507 
1508 	/* JRS - reset as if the state were being changed to timeout */
1509 	htcp_reset(&net->htcp_ca);
1510 	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1511 	net->cwnd = net->mtu;
1512 	net->partial_bytes_acked = 0;
1513 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1514 		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
1515 	}
1516 }
1517 
1518 void
1519 sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
1520     struct sctp_tcb *stcb, struct sctp_nets *net)
1521 {
1522 	int old_cwnd;
1523 
1524 	old_cwnd = net->cwnd;
1525 
1526 	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
1527 	net->htcp_ca.last_cong = sctp_get_tick_count();
1528 	/*
1529 	 * make a small adjustment to cwnd and force to CA.
1530 	 */
1531 	if (net->cwnd > net->mtu)
1532 		/* drop down one MTU after sending */
1533 		net->cwnd -= net->mtu;
1534 	if (net->cwnd < net->ssthresh)
1535 		/* still in SS move to CA */
1536 		net->ssthresh = net->cwnd - 1;
1537 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1538 		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
1539 	}
1540 }
1541 
1542 void
1543 sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
1544     struct sctp_nets *net)
1545 {
1546 	int old_cwnd;
1547 
1548 	old_cwnd = net->cwnd;
1549 
1550 	/* JRS - reset hctp as if state changed */
1551 	htcp_reset(&net->htcp_ca);
1552 	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1553 	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1554 	if (net->ssthresh < net->mtu) {
1555 		net->ssthresh = net->mtu;
1556 		/* here back off the timer as well, to slow us down */
1557 		net->RTO <<= 1;
1558 	}
1559 	net->cwnd = net->ssthresh;
1560 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1561 		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1562 	}
1563 }
1564