xref: /freebsd/sys/netinet/tcp_stacks/tcp_bbr.h (revision 95ee2897)
135c7bb34SRandall Stewart /*-
2963fb2adSRandall Stewart  * Copyright (c) 2016-2020 Netflix, Inc.
335c7bb34SRandall Stewart  *
435c7bb34SRandall Stewart  * Redistribution and use in source and binary forms, with or without
535c7bb34SRandall Stewart  * modification, are permitted provided that the following conditions
635c7bb34SRandall Stewart  * are met:
735c7bb34SRandall Stewart  * 1. Redistributions of source code must retain the above copyright
835c7bb34SRandall Stewart  *    notice, this list of conditions and the following disclaimer.
935c7bb34SRandall Stewart  * 2. Redistributions in binary form must reproduce the above copyright
1035c7bb34SRandall Stewart  *    notice, this list of conditions and the following disclaimer in the
1135c7bb34SRandall Stewart  *    documentation and/or other materials provided with the distribution.
1235c7bb34SRandall Stewart  *
1335c7bb34SRandall Stewart  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1435c7bb34SRandall Stewart  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1535c7bb34SRandall Stewart  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1635c7bb34SRandall Stewart  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1735c7bb34SRandall Stewart  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1835c7bb34SRandall Stewart  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1935c7bb34SRandall Stewart  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2035c7bb34SRandall Stewart  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2135c7bb34SRandall Stewart  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2235c7bb34SRandall Stewart  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2335c7bb34SRandall Stewart  * SUCH DAMAGE.
2435c7bb34SRandall Stewart  */
2535c7bb34SRandall Stewart 
2635c7bb34SRandall Stewart #ifndef _NETINET_TCP_BBR_H_
2735c7bb34SRandall Stewart #define _NETINET_TCP_BBR_H_
2835c7bb34SRandall Stewart 
2935c7bb34SRandall Stewart #define BBR_INITIAL_RTO  1000000	/* 1 second in micro-seconds */
3035c7bb34SRandall Stewart /* Send map flags */
3135c7bb34SRandall Stewart #define BBR_ACKED	  0x0001	/* The remote endpoint acked this */
3235c7bb34SRandall Stewart #define BBR_WAS_RENEGED	  0x0002	/* The peer reneged the ack  */
3335c7bb34SRandall Stewart #define BBR_RXT_CLEARED	  0x0004	/* ACK Cleared by the RXT timer  */
3435c7bb34SRandall Stewart #define BBR_OVERMAX	  0x0008	/* We have more retran's then we can
3535c7bb34SRandall Stewart 					 * fit */
3635c7bb34SRandall Stewart #define BBR_SACK_PASSED   0x0010	/* A sack was done above this block */
3735c7bb34SRandall Stewart #define BBR_WAS_SACKPASS  0x0020	/* We retransmitted due to SACK pass */
3835c7bb34SRandall Stewart #define BBR_HAS_FIN	  0x0040	/* segment is sent with fin */
3935c7bb34SRandall Stewart #define BBR_TLP	  	  0x0080	/* segment sent as tail-loss-probe */
4035c7bb34SRandall Stewart #define BBR_HAS_SYN	  0x0100	/* segment has the syn */
4135c7bb34SRandall Stewart #define BBR_MARKED_LOST   0x0200	/*
4235c7bb34SRandall Stewart 					 * This segments is lost and
4335c7bb34SRandall Stewart 					 * totaled into bbr->rc_ctl.rc_lost
4435c7bb34SRandall Stewart 					 */
4535c7bb34SRandall Stewart #define BBR_RWND_COLLAPSED 0x0400	/* The peer collapsed the rwnd on the segment */
4635c7bb34SRandall Stewart #define BBR_NUM_OF_RETRANS 7
4735c7bb34SRandall Stewart 
4835c7bb34SRandall Stewart /* Defines for socket options to set pacing overheads */
4935c7bb34SRandall Stewart #define BBR_INCL_ENET_OH 0x01
5035c7bb34SRandall Stewart #define BBR_INCL_IP_OH   0x02
5135c7bb34SRandall Stewart #define BBR_INCL_TCP_OH  0x03
5235c7bb34SRandall Stewart 
5335c7bb34SRandall Stewart /*
5435c7bb34SRandall Stewart  * With the addition of both measurement algorithms
5535c7bb34SRandall Stewart  * I had to move over the size of a
5635c7bb34SRandall Stewart  * cache line (unfortunately). For now there is
5735c7bb34SRandall Stewart  * no way around this. We may be able to cut back
5835c7bb34SRandall Stewart  * at some point I hope.
5935c7bb34SRandall Stewart  */
6035c7bb34SRandall Stewart struct bbr_sendmap {
6135c7bb34SRandall Stewart 	TAILQ_ENTRY(bbr_sendmap) r_next;	/* seq number arrayed next */
6235c7bb34SRandall Stewart 	TAILQ_ENTRY(bbr_sendmap) r_tnext;	/* Time of tmit based next */
6335c7bb34SRandall Stewart 	uint32_t r_start;	/* Sequence number of the segment */
6435c7bb34SRandall Stewart 	uint32_t r_end;		/* End seq, this is 1 beyond actually */
6535c7bb34SRandall Stewart 
6635c7bb34SRandall Stewart 	uint32_t r_rtr_bytes;	/* How many bytes have been retransmitted */
6735c7bb34SRandall Stewart 	uint32_t r_delivered;	/* Delivered amount at send */
6835c7bb34SRandall Stewart 
6935c7bb34SRandall Stewart 	uint32_t r_del_time;	/* The time of the last delivery update */
7035c7bb34SRandall Stewart 	uint8_t r_rtr_cnt:4,	/* Retran count, index this -1 to get time
7135c7bb34SRandall Stewart 				 * sent */
725d8fd932SRandall Stewart 		r_rtt_not_allowed:1,	/* No rtt measurement allowed */
7335c7bb34SRandall Stewart 	        r_is_drain:1,	/* In a draining cycle */
7435c7bb34SRandall Stewart 		r_app_limited:1,/* We went app limited */
7535c7bb34SRandall Stewart 	        r_ts_valid:1;	/* Timestamp field is valid (r_del_ack_ts) */
7635c7bb34SRandall Stewart 	uint8_t r_dupack;	/* Dup ack count */
7735c7bb34SRandall Stewart 	uint8_t r_in_tmap:1,	/* Flag to see if its in the r_tnext array */
7835c7bb34SRandall Stewart 	        r_is_smallmap:1,/* Was logged as a small-map send-map item */
7935c7bb34SRandall Stewart 		r_is_gain:1,	/* Was in gain cycle */
8035c7bb34SRandall Stewart 		r_bbr_state:5;  /* The BBR state at send */
8135c7bb34SRandall Stewart 	uint8_t r_limit_type;	/* is this entry counted against a limit? */
8235c7bb34SRandall Stewart 
8335c7bb34SRandall Stewart 	uint16_t r_flags;	/* Flags as defined above */
8435c7bb34SRandall Stewart 	uint16_t r_spare16;
8535c7bb34SRandall Stewart 	uint32_t r_del_ack_ts;  /* At send what timestamp of peer was (if r_ts_valid set) */
8635c7bb34SRandall Stewart 	/****************Cache line*****************/
8735c7bb34SRandall Stewart 	uint32_t r_tim_lastsent[BBR_NUM_OF_RETRANS];
8835c7bb34SRandall Stewart 	/*
8935c7bb34SRandall Stewart 	 * Question, should we instead just grab the sending b/w
9035c7bb34SRandall Stewart 	 * from the filter with the gain and store it in a
9135c7bb34SRandall Stewart 	 * uint64_t instead?
9235c7bb34SRandall Stewart 	 */
9335c7bb34SRandall Stewart 	uint32_t r_first_sent_time; /* Time of first pkt in flight sent */
9435c7bb34SRandall Stewart 	uint32_t r_pacing_delay;	/* pacing delay of this send */
9535c7bb34SRandall Stewart 	uint32_t r_flight_at_send;	/* flight at the time of the send */
9635c7bb34SRandall Stewart #ifdef _KERNEL
9735c7bb34SRandall Stewart }           __aligned(CACHE_LINE_SIZE);
9835c7bb34SRandall Stewart #else
9935c7bb34SRandall Stewart };
10035c7bb34SRandall Stewart #endif
10135c7bb34SRandall Stewart #define BBR_LIMIT_TYPE_SPLIT	1
10235c7bb34SRandall Stewart 
10335c7bb34SRandall Stewart TAILQ_HEAD(bbr_head, bbr_sendmap);
10435c7bb34SRandall Stewart 
10535c7bb34SRandall Stewart #define BBR_SEGMENT_TIME_SIZE 1500	/* How many bytes in time_between */
10635c7bb34SRandall Stewart 
10735c7bb34SRandall Stewart #define BBR_MIN_SEG 1460		/* MSS size */
10835c7bb34SRandall Stewart #define BBR_MAX_GAIN_VALUE 0xffff
10935c7bb34SRandall Stewart 
11035c7bb34SRandall Stewart #define BBR_TIMER_FUDGE  1500	/* 1.5ms in micro seconds */
11135c7bb34SRandall Stewart 
11235c7bb34SRandall Stewart /* BW twiddle secret codes */
11335c7bb34SRandall Stewart #define BBR_RED_BW_CONGSIG  	 0	/* We enter recovery and set using b/w */
11435c7bb34SRandall Stewart #define BBR_RED_BW_RATECAL  	 1	/* We are calculating the loss rate */
11535c7bb34SRandall Stewart #define BBR_RED_BW_USELRBW       2	/* We are dropping the lower b/w with
11635c7bb34SRandall Stewart 					 * cDR */
11735c7bb34SRandall Stewart #define BBR_RED_BW_SETHIGHLOSS	 3	/* We have set our highloss value at
11835c7bb34SRandall Stewart 					 * exit from probe-rtt */
11935c7bb34SRandall Stewart #define BBR_RED_BW_PE_CLREARLY	 4	/* We have decided to clear the
12035c7bb34SRandall Stewart 					 * reduction early */
12135c7bb34SRandall Stewart #define BBR_RED_BW_PE_CLAFDEL	 5	/* We are clearing it on schedule
12235c7bb34SRandall Stewart 					 * delayed */
12335c7bb34SRandall Stewart #define BBR_RED_BW_REC_ENDCLL	 6	/* Recover exits save high if needed
12435c7bb34SRandall Stewart 					 * an clear to start measuring */
12535c7bb34SRandall Stewart #define BBR_RED_BW_PE_NOEARLY_OUT 7	/* Set pkt epoch judged that we do not
12635c7bb34SRandall Stewart 					 * get out of jail early */
12735c7bb34SRandall Stewart /* For calculating a rate */
12835c7bb34SRandall Stewart #define BBR_CALC_BW 	1
12935c7bb34SRandall Stewart #define BBR_CALC_LOSS  	2
13035c7bb34SRandall Stewart 
13135c7bb34SRandall Stewart #define BBR_RTT_BY_TIMESTAMP	0
13235c7bb34SRandall Stewart #define BBR_RTT_BY_EXACTMATCH	1
13335c7bb34SRandall Stewart #define BBR_RTT_BY_EARLIER_RET	2
13435c7bb34SRandall Stewart #define BBR_RTT_BY_THIS_RETRAN  3
13535c7bb34SRandall Stewart #define BBR_RTT_BY_SOME_RETRAN	4
13635c7bb34SRandall Stewart #define BBR_RTT_BY_TSMATCHING	5
13735c7bb34SRandall Stewart 
13835c7bb34SRandall Stewart /* Markers to track where we enter persists from */
13935c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_1	1
14035c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_2	2
14135c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_3	3
14235c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_4	4
14335c7bb34SRandall Stewart #define BBR_PERSISTS_FROM_5	5
14435c7bb34SRandall Stewart 
14535c7bb34SRandall Stewart /* magic cookies to ask for the RTT */
14635c7bb34SRandall Stewart #define BBR_RTT_PROP    0
14735c7bb34SRandall Stewart #define BBR_RTT_RACK    1
14835c7bb34SRandall Stewart #define BBR_RTT_PKTRTT  2
14935c7bb34SRandall Stewart #define BBR_SRTT	3
15035c7bb34SRandall Stewart 
15135c7bb34SRandall Stewart #define BBR_SACKED 0
15235c7bb34SRandall Stewart #define BBR_CUM_ACKED  1
15335c7bb34SRandall Stewart 
15435c7bb34SRandall Stewart /* threshold in useconds where we consider we need a higher min cwnd */
15535c7bb34SRandall Stewart #define BBR_HIGH_SPEED 1000
15635c7bb34SRandall Stewart #define BBR_HIGHSPEED_NUM_MSS 12
15735c7bb34SRandall Stewart 
15835c7bb34SRandall Stewart #define MAX_REDUCE_RXT 3	/* What is the maximum times we are willing to
15935c7bb34SRandall Stewart 				 * reduce b/w in RTX's. Setting this has a
16035c7bb34SRandall Stewart 				 * multiplicative effect e.g. if we are
16135c7bb34SRandall Stewart 				 * reducing by 20% then setting it to 3 means
16235c7bb34SRandall Stewart 				 * you will have reduced the b/w estimate by >
16335c7bb34SRandall Stewart 				 * 60% before you stop. */
16435c7bb34SRandall Stewart /*
16535c7bb34SRandall Stewart  * We use the rate sample structure to
16635c7bb34SRandall Stewart  * assist in single sack/ack rate and rtt
16735c7bb34SRandall Stewart  * calculation. In the future we will expand
16835c7bb34SRandall Stewart  * this in BBR to do forward rate sample
16935c7bb34SRandall Stewart  * b/w estimation.
17035c7bb34SRandall Stewart  */
17135c7bb34SRandall Stewart #define BBR_RS_RTT_EMPTY 0x00000001	/* Nothing yet stored in RTT's */
17235c7bb34SRandall Stewart #define BBR_RS_BW_EMPTY  0x00000002	/* Nothing yet stored in cDR */
17335c7bb34SRandall Stewart #define BBR_RS_RTT_VALID 0x00000004	/* We have at least one valid RTT */
17435c7bb34SRandall Stewart #define BBR_RS_BW_VAILD  0x00000008	/* We have a valid cDR */
17535c7bb34SRandall Stewart #define BBR_RS_EMPTY   (BBR_RS_RTT_EMPTY|BBR_RS_BW_EMPTY)
17635c7bb34SRandall Stewart struct bbr_rtt_sample {
17735c7bb34SRandall Stewart 	uint32_t rs_flags;
17835c7bb34SRandall Stewart 	uint32_t rs_rtt_lowest;
17935c7bb34SRandall Stewart 	uint32_t rs_rtt_lowest_sendtime;
18035c7bb34SRandall Stewart 	uint32_t rs_rtt_low_seq_start;
18135c7bb34SRandall Stewart 
18235c7bb34SRandall Stewart 	uint32_t rs_rtt_highest;
18335c7bb34SRandall Stewart 	uint32_t rs_rtt_cnt;
18435c7bb34SRandall Stewart 
18535c7bb34SRandall Stewart 	uint64_t rs_rtt_tot;
18635c7bb34SRandall Stewart 	uint32_t cur_rtt;
18735c7bb34SRandall Stewart 	uint32_t cur_rtt_bytecnt;
18835c7bb34SRandall Stewart 
18935c7bb34SRandall Stewart 	uint32_t cur_rtt_rsmcnt;
19035c7bb34SRandall Stewart 	uint32_t rc_crtt_set:1,
19135c7bb34SRandall Stewart 		avail_bits:31;
19235c7bb34SRandall Stewart 	uint64_t rs_cDR;
19335c7bb34SRandall Stewart };
19435c7bb34SRandall Stewart 
19535c7bb34SRandall Stewart /* RTT shrink reasons */
19635c7bb34SRandall Stewart #define BBR_RTTS_INIT     0
19735c7bb34SRandall Stewart #define BBR_RTTS_NEWRTT   1
19835c7bb34SRandall Stewart #define BBR_RTTS_RTTPROBE 2
19935c7bb34SRandall Stewart #define BBR_RTTS_WASIDLE  3
20035c7bb34SRandall Stewart #define BBR_RTTS_PERSIST  4
20135c7bb34SRandall Stewart #define BBR_RTTS_REACHTAR 5
20235c7bb34SRandall Stewart #define BBR_RTTS_ENTERPROBE 6
20335c7bb34SRandall Stewart #define BBR_RTTS_SHRINK_PG 7
20435c7bb34SRandall Stewart #define BBR_RTTS_SHRINK_PG_FINAL 8
20535c7bb34SRandall Stewart #define BBR_RTTS_NEW_TARGET 9
20635c7bb34SRandall Stewart #define BBR_RTTS_LEAVE_DRAIN 10
20735c7bb34SRandall Stewart #define BBR_RTTS_RESETS_VALUES 11
20835c7bb34SRandall Stewart 
20935c7bb34SRandall Stewart #define BBR_NUM_RATES 5
21035c7bb34SRandall Stewart /* Rate flags */
21135c7bb34SRandall Stewart #define BBR_RT_FLAG_FREE       0x00	/* Is on the free list */
21235c7bb34SRandall Stewart #define BBR_RT_FLAG_INUSE      0x01	/* Has been allocated */
21335c7bb34SRandall Stewart #define BBR_RT_FLAG_READY      0x02	/* Ready to initiate a measurement. */
21435c7bb34SRandall Stewart #define BBR_RT_FLAG_CAPPED_PRE 0x04	/* Ready to cap if we send the next segment */
21535c7bb34SRandall Stewart #define BBR_RT_FLAG_CAPPED     0x08	/* Measurement is capped */
21635c7bb34SRandall Stewart #define BBR_RT_FLAG_PASTFA     0x10	/* Past the first ack. */
21735c7bb34SRandall Stewart #define BBR_RT_FLAG_LIMITED    0x20	/* Saw application/cwnd or rwnd limited period */
21835c7bb34SRandall Stewart #define BBR_RT_SEEN_A_ACK      0x40	/* A ack has been saved */
21935c7bb34SRandall Stewart #define BBR_RT_PREV_RTT_SET    0x80	/* There was a RTT set in */
22035c7bb34SRandall Stewart #define BBR_RT_PREV_SEND_TIME  0x100	/*
22135c7bb34SRandall Stewart 					 *There was a RTT send time set that can be used
22235c7bb34SRandall Stewart 					 * no snd_limits
22335c7bb34SRandall Stewart 					 */
22435c7bb34SRandall Stewart #define BBR_RT_SET_GRADIENT    0x200
22535c7bb34SRandall Stewart #define BBR_RT_TS_VALID        0x400
22635c7bb34SRandall Stewart 
22735c7bb34SRandall Stewart struct bbr_log {
22835c7bb34SRandall Stewart 	union {
22935c7bb34SRandall Stewart 		struct bbr_sendmap *rsm;	/* For alloc/free */
23035c7bb34SRandall Stewart 		uint64_t sb_acc;	/* For out/ack or t-o */
23135c7bb34SRandall Stewart 	};
23235c7bb34SRandall Stewart 	struct tcpcb *tp;
23335c7bb34SRandall Stewart 	uint32_t t_flags;
23435c7bb34SRandall Stewart 	uint32_t th_seq;
23535c7bb34SRandall Stewart 	uint32_t th_ack;
23635c7bb34SRandall Stewart 	uint32_t snd_una;
23735c7bb34SRandall Stewart 	uint32_t snd_nxt;
23835c7bb34SRandall Stewart 	uint32_t snd_max;
23935c7bb34SRandall Stewart 	uint32_t snd_cwnd;
24035c7bb34SRandall Stewart 	uint32_t snd_wnd;
24135c7bb34SRandall Stewart 	uint32_t rc_lost;
24235c7bb34SRandall Stewart 	uint32_t target_cwnd;	/* UU */
24335c7bb34SRandall Stewart 	uint32_t inflight;	/* UU */
24435c7bb34SRandall Stewart 	uint32_t applimited;	/* UU */
24535c7bb34SRandall Stewart 	/* Things for BBR */
24635c7bb34SRandall Stewart 	uint32_t delivered;	/* UU */
24735c7bb34SRandall Stewart 	uint64_t cur_del_rate;	/* UU */
24835c7bb34SRandall Stewart 	uint64_t delRate;	/* UU */
24935c7bb34SRandall Stewart 	uint64_t rttProp;	/* UU */
25035c7bb34SRandall Stewart 	uint64_t lt_bw;		/* UU */
25135c7bb34SRandall Stewart 	uint32_t timeStamp;
25235c7bb34SRandall Stewart 	uint32_t time;
25335c7bb34SRandall Stewart 	uint32_t slot;		/* UU */
25435c7bb34SRandall Stewart 	uint32_t delayed_by;
25535c7bb34SRandall Stewart 	uint32_t exp_del;
25635c7bb34SRandall Stewart 	uint32_t pkts_out;
25735c7bb34SRandall Stewart 	uint32_t new_win;
25835c7bb34SRandall Stewart 	uint32_t hptsi_gain;	/* UU */
25935c7bb34SRandall Stewart 	uint32_t cwnd_gain;	/* UU */
26035c7bb34SRandall Stewart 	uint32_t epoch;		/* UU */
26135c7bb34SRandall Stewart 	uint32_t lt_epoch;	/* UU */
26235c7bb34SRandall Stewart 	/* Sack fun */
26335c7bb34SRandall Stewart 	uint32_t blk_start[4];	/* xx */
26435c7bb34SRandall Stewart 	uint32_t blk_end[4];
26535c7bb34SRandall Stewart 	uint32_t len;		/* Timeout T3=1, TLP=2, RACK=3 */
26635c7bb34SRandall Stewart 	uint8_t type;
26735c7bb34SRandall Stewart 	uint8_t n_sackblks;
26835c7bb34SRandall Stewart 	uint8_t applied;	/* UU */
26935c7bb34SRandall Stewart 	uint8_t inhpts;		/* UU */
270a370832bSGleb Smirnoff 	uint8_t __spare;	/* UU */
27135c7bb34SRandall Stewart 	uint8_t use_lt_bw;	/* UU */
27235c7bb34SRandall Stewart };
27335c7bb34SRandall Stewart 
27435c7bb34SRandall Stewart struct bbr_log_sysctl_out {
27535c7bb34SRandall Stewart 	uint32_t bbr_log_at;
27635c7bb34SRandall Stewart 	uint32_t bbr_log_max;
27735c7bb34SRandall Stewart 	struct bbr_log entries[0];
27835c7bb34SRandall Stewart };
27935c7bb34SRandall Stewart 
28035c7bb34SRandall Stewart /*
28135c7bb34SRandall Stewart  * Magic numbers for logging timeout events if the
28235c7bb34SRandall Stewart  * logging is enabled.
28335c7bb34SRandall Stewart  */
28435c7bb34SRandall Stewart #define BBR_TO_FRM_TMR  1
28535c7bb34SRandall Stewart #define BBR_TO_FRM_TLP  2
28635c7bb34SRandall Stewart #define BBR_TO_FRM_RACK 3
28735c7bb34SRandall Stewart #define BBR_TO_FRM_KEEP 4
28835c7bb34SRandall Stewart #define BBR_TO_FRM_PERSIST 5
28935c7bb34SRandall Stewart #define BBR_TO_FRM_DELACK 6
29035c7bb34SRandall Stewart 
29135c7bb34SRandall Stewart #define BBR_SEES_STRETCH_ACK 1
29235c7bb34SRandall Stewart #define BBR_SEES_COMPRESSED_ACKS 2
29335c7bb34SRandall Stewart 
29435c7bb34SRandall Stewart /*
29535c7bb34SRandall Stewart  * As we get each SACK we wade through the
29635c7bb34SRandall Stewart  * rc_map and mark off what is acked.
29735c7bb34SRandall Stewart  * We also increment rc_sacked as well.
29835c7bb34SRandall Stewart  *
29935c7bb34SRandall Stewart  * We also pay attention to missing entries
30035c7bb34SRandall Stewart  * based on the time and possibly mark them
30135c7bb34SRandall Stewart  * for retransmit. If we do and we are not already
30235c7bb34SRandall Stewart  * in recovery we enter recovery. In doing
30335c7bb34SRandall Stewart  * so we claer prr_delivered/holes_rxt and prr_sent_dur_rec.
30435c7bb34SRandall Stewart  * We also setup rc_next/rc_snd_nxt/rc_send_end so
30535c7bb34SRandall Stewart  * we will know where to send from. When not in
30635c7bb34SRandall Stewart  * recovery rc_next will be NULL and rc_snd_nxt should
30735c7bb34SRandall Stewart  * equal snd_max.
30835c7bb34SRandall Stewart  *
30935c7bb34SRandall Stewart  * Whenever we retransmit from recovery we increment
31035c7bb34SRandall Stewart  * rc_holes_rxt as we retran a block and mark it as retransmitted
31135c7bb34SRandall Stewart  * with the time it was sent. During non-recovery sending we
31235c7bb34SRandall Stewart  * add to our map and note the time down of any send expanding
31335c7bb34SRandall Stewart  * the rc_map at the tail and moving rc_snd_nxt up with snd_max.
31435c7bb34SRandall Stewart  *
31535c7bb34SRandall Stewart  * In recovery during SACK/ACK processing if a chunk has
31635c7bb34SRandall Stewart  * been retransmitted and it is now acked, we decrement rc_holes_rxt.
31735c7bb34SRandall Stewart  * When we retransmit from the scoreboard we use
31835c7bb34SRandall Stewart  * rc_next and rc_snd_nxt/rc_send_end to help us
31935c7bb34SRandall Stewart  * find what needs to be retran.
32035c7bb34SRandall Stewart  *
32135c7bb34SRandall Stewart  * To calculate pipe we simply take (snd_max - snd_una) + rc_holes_rxt
32235c7bb34SRandall Stewart  * This gets us the effect of RFC6675 pipe, counting twice for
32335c7bb34SRandall Stewart  * bytes retransmitted.
32435c7bb34SRandall Stewart  */
32535c7bb34SRandall Stewart 
32635c7bb34SRandall Stewart #define TT_BBR_FR_TMR	0x2001
32735c7bb34SRandall Stewart 
32835c7bb34SRandall Stewart #define BBR_SCALE 8
32935c7bb34SRandall Stewart #define BBR_UNIT (1 << BBR_SCALE)
33035c7bb34SRandall Stewart 
33135c7bb34SRandall Stewart #define BBR_NUM_RTTS_FOR_DEL_LIMIT 8	/* How many pkt-rtts do we keep
33235c7bb34SRandall Stewart 					 * Delivery rate for */
33335c7bb34SRandall Stewart #define BBR_NUM_RTTS_FOR_GOOG_DEL_LIMIT 10	/* How many pkt-rtts do we keep
33435c7bb34SRandall Stewart 						 * Delivery rate for google */
33535c7bb34SRandall Stewart 
33635c7bb34SRandall Stewart #define BBR_SECONDS_NO_RTT 10	/* 10 seconds with no RTT shrinkage */
33735c7bb34SRandall Stewart #define BBR_PROBERTT_MAX 200	/* 200ms */
33835c7bb34SRandall Stewart #define BBR_PROBERTT_NUM_MSS 4
33935c7bb34SRandall Stewart #define BBR_STARTUP_EPOCHS 3
34035c7bb34SRandall Stewart #define USECS_IN_MSEC 1000
34135c7bb34SRandall Stewart #define BBR_TIME_TO_SECONDS(a) (a / USECS_IN_SECOND)
34235c7bb34SRandall Stewart #define BBR_TIME_TO_MILLI(a) (a / MS_IN_USEC)
34335c7bb34SRandall Stewart 
34435c7bb34SRandall Stewart /* BBR keeps time in usec's so we divide by 1000 and round up */
34535c7bb34SRandall Stewart #define BBR_TS_TO_MS(t)  ((t+999)/MS_IN_USEC)
34635c7bb34SRandall Stewart 
34735c7bb34SRandall Stewart /*
34835c7bb34SRandall Stewart  * Locking for the rack control block.
34935c7bb34SRandall Stewart  * a) Locked by INP_WLOCK
35035c7bb34SRandall Stewart  * b) Locked by the hpts-mutex
35135c7bb34SRandall Stewart  *
35235c7bb34SRandall Stewart  */
35335c7bb34SRandall Stewart #define BBR_STATE_STARTUP   0x01
35435c7bb34SRandall Stewart #define BBR_STATE_DRAIN     0x02
35535c7bb34SRandall Stewart #define BBR_STATE_PROBE_BW  0x03
35635c7bb34SRandall Stewart #define BBR_STATE_PROBE_RTT 0x04
35735c7bb34SRandall Stewart #define BBR_STATE_IDLE_EXIT 0x05
35835c7bb34SRandall Stewart 
35935c7bb34SRandall Stewart /* Substate defines for STATE == PROBE_BW */
36035c7bb34SRandall Stewart #define BBR_SUB_GAIN  0		/* State 0 where we are 5/4 BBR_UNIT */
36135c7bb34SRandall Stewart #define BBR_SUB_DRAIN 1		/* State 1 where we are at 3/4 BBR_UNIT */
36235c7bb34SRandall Stewart #define BBR_SUB_LEVEL1 2	/* State 1 first BBR_UNIT */
36335c7bb34SRandall Stewart #define BBR_SUB_LEVEL2 3	/* State 2nd BBR_UNIT */
36435c7bb34SRandall Stewart #define BBR_SUB_LEVEL3 4	/* State 3rd BBR_UNIT */
36535c7bb34SRandall Stewart #define BBR_SUB_LEVEL4 5	/* State 4th BBR_UNIT */
36635c7bb34SRandall Stewart #define BBR_SUB_LEVEL5 6	/* State 5th BBR_UNIT */
36735c7bb34SRandall Stewart #define BBR_SUB_LEVEL6 7	/* State last BBR_UNIT */
36835c7bb34SRandall Stewart #define BBR_SUBSTATE_COUNT 8
36935c7bb34SRandall Stewart 
37035c7bb34SRandall Stewart /* Single remaining reduce log */
37135c7bb34SRandall Stewart #define BBR_REDUCE_AT_FR 5
37235c7bb34SRandall Stewart 
37335c7bb34SRandall Stewart #define BBR_BIG_LOG_SIZE 300000
37435c7bb34SRandall Stewart 
37535c7bb34SRandall Stewart struct bbr_stats {
37635c7bb34SRandall Stewart 	uint64_t bbr_badfr;		/* 0 */
37735c7bb34SRandall Stewart 	uint64_t bbr_badfr_bytes;	/* 1 */
37835c7bb34SRandall Stewart 	uint64_t bbr_saw_oerr;		/* 2 */
37935c7bb34SRandall Stewart 	uint64_t bbr_saw_emsgsiz;	/* 3 */
38035c7bb34SRandall Stewart 	uint64_t bbr_reorder_seen;	/* 4 */
38135c7bb34SRandall Stewart 	uint64_t bbr_tlp_tot;		/* 5 */
38235c7bb34SRandall Stewart 	uint64_t bbr_tlp_newdata;	/* 6 */
38335c7bb34SRandall Stewart 	uint64_t bbr_offset_recovery;	/* 7 */
38435c7bb34SRandall Stewart 	uint64_t bbr_tlp_retran_fail;	/* 8 */
38535c7bb34SRandall Stewart 	uint64_t bbr_to_tot;		/* 9 */
38635c7bb34SRandall Stewart 	uint64_t bbr_to_arm_rack;	/* 10 */
38735c7bb34SRandall Stewart 	uint64_t bbr_enter_probertt;	/* 11 */
38835c7bb34SRandall Stewart 	uint64_t bbr_tlp_set;		/* 12 */
38935c7bb34SRandall Stewart 	uint64_t bbr_resends_set;	/* 13 */
39035c7bb34SRandall Stewart 	uint64_t bbr_force_output;	/* 14 */
39135c7bb34SRandall Stewart 	uint64_t bbr_to_arm_tlp;	/* 15 */
39235c7bb34SRandall Stewart 	uint64_t bbr_paced_segments;	/* 16 */
39335c7bb34SRandall Stewart 	uint64_t bbr_saw_enobuf;	/* 17 */
39435c7bb34SRandall Stewart 	uint64_t bbr_to_alloc_failed;	/* 18 */
39535c7bb34SRandall Stewart 	uint64_t bbr_to_alloc_emerg;	/* 19 */
39635c7bb34SRandall Stewart 	uint64_t bbr_sack_proc_all;	/* 20 */
39735c7bb34SRandall Stewart 	uint64_t bbr_sack_proc_short;	/* 21 */
39835c7bb34SRandall Stewart 	uint64_t bbr_sack_proc_restart;	/* 22 */
39935c7bb34SRandall Stewart 	uint64_t bbr_to_alloc;		/* 23 */
40035c7bb34SRandall Stewart 	uint64_t bbr_offset_drop;	/* 24 */
40135c7bb34SRandall Stewart 	uint64_t bbr_runt_sacks;	/* 25 */
40235c7bb34SRandall Stewart 	uint64_t bbr_sack_passed;	/* 26 */
40335c7bb34SRandall Stewart 	uint64_t bbr_rlock_left_ret0;	/* 27 */
40435c7bb34SRandall Stewart 	uint64_t bbr_rlock_left_ret1;	/* 28 */
40535c7bb34SRandall Stewart 	uint64_t bbr_dynamic_rwnd;	/* 29 */
40635c7bb34SRandall Stewart 	uint64_t bbr_static_rwnd;	/* 30 */
40735c7bb34SRandall Stewart 	uint64_t bbr_sack_blocks;	/* 31 */
40835c7bb34SRandall Stewart 	uint64_t bbr_sack_blocks_skip;	/* 32 */
40935c7bb34SRandall Stewart 	uint64_t bbr_sack_search_both;	/* 33 */
41035c7bb34SRandall Stewart 	uint64_t bbr_sack_search_fwd;	/* 34 */
41135c7bb34SRandall Stewart 	uint64_t bbr_sack_search_back;	/* 35 */
41235c7bb34SRandall Stewart 	uint64_t bbr_plain_acks;	/* 36 */
41335c7bb34SRandall Stewart 	uint64_t bbr_acks_with_sacks;	/* 37 */
41435c7bb34SRandall Stewart 	uint64_t bbr_progress_drops;	/* 38 */
41535c7bb34SRandall Stewart 	uint64_t bbr_early;		/* 39 */
41635c7bb34SRandall Stewart 	uint64_t bbr_reneges_seen;	/* 40 */
41735c7bb34SRandall Stewart 	uint64_t bbr_persist_reneg;	/* 41 */
41835c7bb34SRandall Stewart 	uint64_t bbr_dropped_af_data;	/* 42 */
41935c7bb34SRandall Stewart 	uint64_t bbr_failed_mbuf_aloc;	/* 43 */
42035c7bb34SRandall Stewart 	uint64_t bbr_cwnd_limited;	/* 44 */
42135c7bb34SRandall Stewart 	uint64_t bbr_rwnd_limited;	/* 45 */
42235c7bb34SRandall Stewart 	uint64_t bbr_app_limited;	/* 46 */
42335c7bb34SRandall Stewart 	uint64_t bbr_force_timer_start;	/* 47 */
42435c7bb34SRandall Stewart 	uint64_t bbr_hpts_min_time;	/* 48 */
42535c7bb34SRandall Stewart 	uint64_t bbr_meets_tso_thresh;  /* 49 */
42635c7bb34SRandall Stewart 	uint64_t bbr_miss_tso_rwnd;	/* 50 */
42735c7bb34SRandall Stewart 	uint64_t bbr_miss_tso_cwnd;	/* 51 */
42835c7bb34SRandall Stewart 	uint64_t bbr_miss_tso_app;	/* 52 */
42935c7bb34SRandall Stewart 	uint64_t bbr_miss_retran;	/* 53 */
43035c7bb34SRandall Stewart 	uint64_t bbr_miss_tlp;		/* 54 */
43135c7bb34SRandall Stewart 	uint64_t bbr_miss_unknown;	/* 55 */
43235c7bb34SRandall Stewart 	uint64_t bbr_hdwr_rl_add_ok;	/* 56 */
43335c7bb34SRandall Stewart 	uint64_t bbr_hdwr_rl_add_fail;	/* 57 */
43435c7bb34SRandall Stewart 	uint64_t bbr_hdwr_rl_mod_ok;	/* 58 */
43535c7bb34SRandall Stewart 	uint64_t bbr_hdwr_rl_mod_fail;	/* 59 */
43635c7bb34SRandall Stewart 	uint64_t bbr_collapsed_win;     /* 60 */
43735c7bb34SRandall Stewart 	uint64_t bbr_alloc_limited;	/* 61 */
43835c7bb34SRandall Stewart 	uint64_t bbr_alloc_limited_conns; /* 62 */
43935c7bb34SRandall Stewart 	uint64_t bbr_split_limited;	/* 63 */
44035c7bb34SRandall Stewart };
44135c7bb34SRandall Stewart 
44235c7bb34SRandall Stewart /*
44335c7bb34SRandall Stewart  * The structure bbr_opt_stats is a simple
44435c7bb34SRandall Stewart  * way to see how many options are being
44535c7bb34SRandall Stewart  * changed in the stack.
44635c7bb34SRandall Stewart  */
44735c7bb34SRandall Stewart struct bbr_opts_stats {
44835c7bb34SRandall Stewart 	uint64_t tcp_bbr_pace_per_sec;
44935c7bb34SRandall Stewart 	uint64_t tcp_bbr_pace_del_tar;
45035c7bb34SRandall Stewart 	uint64_t tcp_bbr_pace_seg_max;
45135c7bb34SRandall Stewart 	uint64_t tcp_bbr_pace_seg_min;
45235c7bb34SRandall Stewart 	uint64_t tcp_bbr_pace_cross;
45335c7bb34SRandall Stewart 	uint64_t tcp_bbr_drain_inc_extra;
45435c7bb34SRandall Stewart 	uint64_t tcp_bbr_unlimited;
45535c7bb34SRandall Stewart 	uint64_t tcp_bbr_iwintso;
45635c7bb34SRandall Stewart 	uint64_t tcp_bbr_rec_over_hpts;
45735c7bb34SRandall Stewart 	uint64_t tcp_bbr_recforce;
45835c7bb34SRandall Stewart 	uint64_t tcp_bbr_startup_pg;
45935c7bb34SRandall Stewart 	uint64_t tcp_bbr_drain_pg;
46035c7bb34SRandall Stewart 	uint64_t tcp_bbr_rwnd_is_app;
46135c7bb34SRandall Stewart 	uint64_t tcp_bbr_probe_rtt_int;
46235c7bb34SRandall Stewart 	uint64_t tcp_bbr_one_retran;
46335c7bb34SRandall Stewart 	uint64_t tcp_bbr_startup_loss_exit;
46435c7bb34SRandall Stewart 	uint64_t tcp_bbr_use_lowgain;
46535c7bb34SRandall Stewart 	uint64_t tcp_bbr_lowgain_thresh;
46635c7bb34SRandall Stewart 	uint64_t tcp_bbr_lowgain_half;
46735c7bb34SRandall Stewart 	uint64_t tcp_bbr_lowgain_fd;
46835c7bb34SRandall Stewart 	uint64_t tcp_bbr_usedel_rate;
46935c7bb34SRandall Stewart 	uint64_t tcp_bbr_min_rto;
47035c7bb34SRandall Stewart 	uint64_t tcp_bbr_max_rto;
47135c7bb34SRandall Stewart 	uint64_t tcp_rack_pace_max_seg;
47235c7bb34SRandall Stewart 	uint64_t tcp_rack_min_to;
47335c7bb34SRandall Stewart 	uint64_t tcp_rack_reord_thresh;
47435c7bb34SRandall Stewart 	uint64_t tcp_rack_reord_fade;
47535c7bb34SRandall Stewart 	uint64_t tcp_rack_tlp_thresh;
47635c7bb34SRandall Stewart 	uint64_t tcp_rack_pkt_delay;
47735c7bb34SRandall Stewart 	uint64_t tcp_bbr_startup_exit_epoch;
47835c7bb34SRandall Stewart 	uint64_t tcp_bbr_ack_comp_alg;
47935c7bb34SRandall Stewart 	uint64_t tcp_rack_cheat;
48035c7bb34SRandall Stewart 	uint64_t tcp_iwnd_tso;
48135c7bb34SRandall Stewart 	uint64_t tcp_utter_max_tso;
48235c7bb34SRandall Stewart 	uint64_t tcp_hdwr_pacing;
48335c7bb34SRandall Stewart 	uint64_t tcp_extra_state;
48435c7bb34SRandall Stewart 	uint64_t tcp_floor_min_tso;
48535c7bb34SRandall Stewart 	/* New */
48635c7bb34SRandall Stewart 	uint64_t tcp_bbr_algorithm;
48735c7bb34SRandall Stewart 	uint64_t tcp_bbr_tslimits;
48835c7bb34SRandall Stewart 	uint64_t tcp_bbr_probertt_len;
48935c7bb34SRandall Stewart 	uint64_t tcp_bbr_probertt_gain;
49035c7bb34SRandall Stewart 	uint64_t tcp_bbr_topaceout;
49135c7bb34SRandall Stewart 	uint64_t tcp_use_rackcheat;
49235c7bb34SRandall Stewart 	uint64_t tcp_delack;
49335c7bb34SRandall Stewart 	uint64_t tcp_maxpeak;
49435c7bb34SRandall Stewart 	uint64_t tcp_retran_wtso;
49535c7bb34SRandall Stewart 	uint64_t tcp_data_ac;
49635c7bb34SRandall Stewart 	uint64_t tcp_ts_raises;
49735c7bb34SRandall Stewart 	uint64_t tcp_pacing_oh_tmr;
49835c7bb34SRandall Stewart 	uint64_t tcp_pacing_oh;
49935c7bb34SRandall Stewart 	uint64_t tcp_policer_det;
50035c7bb34SRandall Stewart };
50135c7bb34SRandall Stewart 
50235c7bb34SRandall Stewart #ifdef _KERNEL
50335c7bb34SRandall Stewart #define BBR_STAT_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t))
50435c7bb34SRandall Stewart extern counter_u64_t bbr_stat_arry[BBR_STAT_SIZE];
50535c7bb34SRandall Stewart #define BBR_STAT_ADD(name, amm) counter_u64_add(bbr_stat_arry[(offsetof(struct bbr_stats, name)/sizeof(uint64_t))], (amm))
50635c7bb34SRandall Stewart #define BBR_STAT_INC(name) BBR_STAT_ADD(name, 1)
50735c7bb34SRandall Stewart #define BBR_OPTS_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t))
50835c7bb34SRandall Stewart extern counter_u64_t bbr_opts_arry[BBR_OPTS_SIZE];
50935c7bb34SRandall Stewart #define BBR_OPTS_ADD(name, amm) counter_u64_add(bbr_opts_arry[(offsetof(struct bbr_opts_stats, name)/sizeof(uint64_t))], (amm))
51035c7bb34SRandall Stewart #define BBR_OPTS_INC(name) BBR_OPTS_ADD(name, 1)
51135c7bb34SRandall Stewart #endif
51235c7bb34SRandall Stewart 
51335c7bb34SRandall Stewart #define BBR_NUM_LOSS_RATES 3
51435c7bb34SRandall Stewart #define BBR_NUM_BW_RATES 3
51535c7bb34SRandall Stewart 
51635c7bb34SRandall Stewart #define BBR_RECOVERY_LOWRTT 1
51735c7bb34SRandall Stewart #define BBR_RECOVERY_MEDRTT 2
51835c7bb34SRandall Stewart #define BBR_RECOVERY_HIGHRTT 3
51935c7bb34SRandall Stewart #define BBR_RECOVERY_EXTREMERTT 4
52035c7bb34SRandall Stewart 
52135c7bb34SRandall Stewart struct bbr_control {
52235c7bb34SRandall Stewart 	/*******************************/
52335c7bb34SRandall Stewart 	/* Cache line 2 from bbr start */
52435c7bb34SRandall Stewart 	/*******************************/
52535c7bb34SRandall Stewart 	struct bbr_head rc_map;	/* List of all segments Lock(a) */
52635c7bb34SRandall Stewart 	struct bbr_head rc_tmap;	/* List in transmit order Lock(a) */
52735c7bb34SRandall Stewart 	struct bbr_sendmap *rc_resend;	/* something we have been asked to
52835c7bb34SRandall Stewart 					 * resend */
52935c7bb34SRandall Stewart 	uint32_t rc_last_delay_val;	/* How much we expect to delay Lock(a) */
53035c7bb34SRandall Stewart 	uint32_t rc_bbr_hptsi_gain:16,	/* Current hptsi gain Lock(a) */
53135c7bb34SRandall Stewart 	         rc_hpts_flags:16;	/* flags on whats on the pacer wheel */
53235c7bb34SRandall Stewart 
53335c7bb34SRandall Stewart 	uint32_t rc_delivered;	/* BRR delivered amount Lock(a) */
53435c7bb34SRandall Stewart 	uint32_t rc_hptsi_agg_delay;	/* How much time are we behind */
53535c7bb34SRandall Stewart 
53635c7bb34SRandall Stewart 	uint32_t rc_flight_at_input;
53735c7bb34SRandall Stewart 	uint32_t rc_lost_bytes;		/* Total bytes currently marked lost */
53835c7bb34SRandall Stewart 	/*******************************/
53935c7bb34SRandall Stewart 	/* Cache line 3 from bbr start */
54035c7bb34SRandall Stewart 	/*******************************/
54135c7bb34SRandall Stewart 	struct time_filter rc_delrate;
54235c7bb34SRandall Stewart 	/*******************************/
54335c7bb34SRandall Stewart 	/* Cache line 4 from bbr start */
54435c7bb34SRandall Stewart 	/*******************************/
54535c7bb34SRandall Stewart 	struct bbr_head rc_free;	/* List of Free map entries Lock(a) */
54635c7bb34SRandall Stewart 	struct bbr_sendmap *rc_tlp_send;	/* something we have been
54735c7bb34SRandall Stewart 						 * asked to resend */
54835c7bb34SRandall Stewart 	uint32_t rc_del_time;
54935c7bb34SRandall Stewart 	uint32_t rc_target_at_state;	/* Target for a state */
55035c7bb34SRandall Stewart 
55135c7bb34SRandall Stewart 	uint16_t rc_free_cnt;	/* Number of free entries on the rc_free list
55235c7bb34SRandall Stewart 				 * Lock(a) */
55335c7bb34SRandall Stewart 	uint16_t rc_startup_pg;
55435c7bb34SRandall Stewart 
55535c7bb34SRandall Stewart 	uint32_t cur_rtt;	/* Last RTT from ack */
55635c7bb34SRandall Stewart 
55735c7bb34SRandall Stewart 	uint32_t rc_went_idle_time;	/* Used for persits to see if its
55835c7bb34SRandall Stewart 					 * probe-rtt qualified */
55935c7bb34SRandall Stewart 	uint32_t rc_pace_max_segs:17,	/* How much in any single TSO we send Lock(a) */
56035c7bb34SRandall Stewart 		 rc_pace_min_segs:15;	/* The minimum single segment size before we enter persists */
56135c7bb34SRandall Stewart 
56235c7bb34SRandall Stewart 	uint32_t rc_rtt_shrinks;	/* Time of last rtt shrinkage Lock(a) */
56335c7bb34SRandall Stewart 	uint32_t r_app_limited_until;
56435c7bb34SRandall Stewart 	uint32_t rc_timer_exp;	/* If a timer ticks of expiry */
56535c7bb34SRandall Stewart 	uint32_t rc_rcv_epoch_start;	/* Start time of the Epoch Lock(a) */
56635c7bb34SRandall Stewart 
56735c7bb34SRandall Stewart 	/*******************************/
56835c7bb34SRandall Stewart 	/* Cache line 5 from bbr start */
56935c7bb34SRandall Stewart 	/*******************************/
57035c7bb34SRandall Stewart 
57135c7bb34SRandall Stewart 	uint32_t rc_lost_at_pktepoch;	/* what the lost value was at the last
57235c7bb34SRandall Stewart 					 * pkt-epoch */
57335c7bb34SRandall Stewart 	uint32_t r_measurement_count;	/* count of measurement applied lock(a) */
57435c7bb34SRandall Stewart 
57535c7bb34SRandall Stewart 	uint32_t rc_last_tlp_seq;	/* Last tlp sequence Lock(a) */
57635c7bb34SRandall Stewart 	uint16_t rc_reorder_shift;	/* Socket option value Lock(a) */
57735c7bb34SRandall Stewart 	uint16_t rc_pkt_delay;	/* Socket option value Lock(a) */
57835c7bb34SRandall Stewart 
57935c7bb34SRandall Stewart 	struct bbr_sendmap *rc_sacklast;	/* sack remembered place
58035c7bb34SRandall Stewart 						 * Lock(a) */
58135c7bb34SRandall Stewart 	struct bbr_sendmap *rc_next;	/* remembered place where we next
58235c7bb34SRandall Stewart 					 * retransmit at Lock(a) */
58335c7bb34SRandall Stewart 
58435c7bb34SRandall Stewart 	uint32_t rc_sacked;	/* Tot sacked on scoreboard Lock(a) */
58535c7bb34SRandall Stewart 	uint32_t rc_holes_rxt;	/* Tot retraned from scoreboard Lock(a) */
58635c7bb34SRandall Stewart 
58735c7bb34SRandall Stewart 	uint32_t rc_reorder_ts;	/* Last time we saw reordering Lock(a) */
58835c7bb34SRandall Stewart 	uint32_t rc_init_rwnd;	/* Initial rwnd when we transitioned */
58935c7bb34SRandall Stewart 				/*- ---
5905aa0576bSEd Maste 				 * used only initial and close
59135c7bb34SRandall Stewart 				 */
59235c7bb34SRandall Stewart 	uint32_t rc_high_rwnd;	/* Highest rwnd seen */
59335c7bb34SRandall Stewart 	uint32_t rc_lowest_rtt;	/* Smallest RTT we have seen */
59435c7bb34SRandall Stewart 
59535c7bb34SRandall Stewart 	uint32_t rc_last_rtt;	/* Last valid measured RTT that ack'd data */
59635c7bb34SRandall Stewart 	uint32_t bbr_cross_over;
59735c7bb34SRandall Stewart 
59835c7bb34SRandall Stewart 	/*******************************/
59935c7bb34SRandall Stewart 	/* Cache line 6 from bbr start */
60035c7bb34SRandall Stewart 	/*******************************/
60135c7bb34SRandall Stewart 	struct sack_filter bbr_sf;
60235c7bb34SRandall Stewart 
60335c7bb34SRandall Stewart 	/*******************************/
60435c7bb34SRandall Stewart 	/* Cache line 7 from bbr start */
60535c7bb34SRandall Stewart 	/*******************************/
60635c7bb34SRandall Stewart 	struct time_filter_small rc_rttprop;
60735c7bb34SRandall Stewart 	uint32_t last_inbound_ts;	/* Peers last timestamp */
60835c7bb34SRandall Stewart 
60935c7bb34SRandall Stewart 	uint32_t rc_inc_tcp_oh: 1,
61035c7bb34SRandall Stewart 		 rc_inc_ip_oh: 1,
61135c7bb34SRandall Stewart 		 rc_inc_enet_oh:1,
61235c7bb34SRandall Stewart 		 rc_incr_tmrs:1,
61335c7bb34SRandall Stewart 		 restrict_growth:28;
61435c7bb34SRandall Stewart 	uint32_t rc_lt_epoch_use;	/* When we started lt-bw use Lock(a) */
61535c7bb34SRandall Stewart 
61635c7bb34SRandall Stewart 	uint32_t rc_recovery_start;	/* Time we start recovery Lock(a) */
61735c7bb34SRandall Stewart 	uint32_t rc_lt_del;	/* Delivered at lt bw sampling start Lock(a) */
61835c7bb34SRandall Stewart 
61935c7bb34SRandall Stewart 	uint64_t rc_bbr_cur_del_rate;	/* Current measured delivery rate
62035c7bb34SRandall Stewart 					 * Lock(a) */
62135c7bb34SRandall Stewart 
62235c7bb34SRandall Stewart 	/*******************************/
62335c7bb34SRandall Stewart 	/* Cache line 8 from bbr start */
62435c7bb34SRandall Stewart 	/*******************************/
62535c7bb34SRandall Stewart 	uint32_t rc_cwnd_on_ent;	/* On entry to recovery the cwnd
62635c7bb34SRandall Stewart 					 * Lock(a) */
62735c7bb34SRandall Stewart 	uint32_t rc_agg_early;	/* aggregate amount early */
62835c7bb34SRandall Stewart 
62935c7bb34SRandall Stewart 	uint32_t rc_rcvtime;	/* When we last received data Lock(a) */
63035c7bb34SRandall Stewart 	uint32_t rc_pkt_epoch_del;	/* seq num that we need for RTT epoch */
63135c7bb34SRandall Stewart 
63235c7bb34SRandall Stewart 	uint32_t rc_pkt_epoch;	/* Epoch based on packet RTTs */
63335c7bb34SRandall Stewart 	uint32_t rc_pkt_epoch_time;	/* Time we started the pkt epoch */
63435c7bb34SRandall Stewart 
63535c7bb34SRandall Stewart 	uint32_t rc_pkt_epoch_rtt;	/* RTT using the packet epoch */
63635c7bb34SRandall Stewart 	uint32_t rc_rtt_epoch;	/* Current RTT epoch, it ticks every rttProp
63735c7bb34SRandall Stewart 				 * Lock(a) */
63835c7bb34SRandall Stewart 	uint32_t lowest_rtt;
63935c7bb34SRandall Stewart 	uint32_t bbr_smallest_srtt_this_state;
64035c7bb34SRandall Stewart 
64135c7bb34SRandall Stewart 	uint32_t rc_lt_epoch;	/* LT epoch start of bw_sampling */
64235c7bb34SRandall Stewart 	uint32_t rc_lost_at_startup;
64335c7bb34SRandall Stewart 
64435c7bb34SRandall Stewart 	uint32_t rc_bbr_state_atflight;
64535c7bb34SRandall Stewart 	uint32_t rc_bbr_last_startup_epoch;	/* Last startup epoch where we
64635c7bb34SRandall Stewart 						 * increased 20% */
64735c7bb34SRandall Stewart 	uint32_t rc_bbr_enters_probertt;	/* Timestamp we entered
64835c7bb34SRandall Stewart 						 * probertt Lock(a) */
64935c7bb34SRandall Stewart 	uint32_t rc_lt_time;	/* Time of lt sampling start Lock(a) */
65035c7bb34SRandall Stewart 
65135c7bb34SRandall Stewart 	/*******************************/
65235c7bb34SRandall Stewart 	/* Cache line 9 from bbr start */
65335c7bb34SRandall Stewart 	/*******************************/
65435c7bb34SRandall Stewart 	uint64_t rc_lt_bw;	/* LT bw calculated Lock(a) */
65535c7bb34SRandall Stewart 	uint64_t rc_bbr_lastbtlbw;	/* For startup, what was last btlbw I
65635c7bb34SRandall Stewart 					 * saw to check the 20% gain Lock(a) */
65735c7bb34SRandall Stewart 
65835c7bb34SRandall Stewart 	uint32_t rc_bbr_cwnd_gain;	/* Current cwnd gain Lock(a) */
65935c7bb34SRandall Stewart 	uint32_t rc_pkt_epoch_loss_rate;	/* pkt-epoch loss rate */
66035c7bb34SRandall Stewart 
66135c7bb34SRandall Stewart 	uint32_t rc_saved_cwnd;	/* Saved cwnd during Probe-rtt drain Lock(a) */
66235c7bb34SRandall Stewart 	uint32_t substate_pe;
66335c7bb34SRandall Stewart 
66435c7bb34SRandall Stewart 	uint32_t rc_lost;	/* Number of bytes lost Lock(a) */
66535c7bb34SRandall Stewart 	uint32_t rc_exta_time_gd; /* How much extra time we got in d/g */
66635c7bb34SRandall Stewart 
66735c7bb34SRandall Stewart 	uint32_t rc_lt_lost;	/* Number of lt bytes lost at sampling start
66835c7bb34SRandall Stewart 				 * Lock(a) */
66935c7bb34SRandall Stewart 	uint32_t rc_bbr_state_time;
67035c7bb34SRandall Stewart 
67135c7bb34SRandall Stewart 	uint32_t rc_min_to;	/* Socket option value Lock(a) */
67235c7bb34SRandall Stewart 	uint32_t rc_initial_hptsi_bw;	/* Our initial startup bw Lock(a) */
67335c7bb34SRandall Stewart 
67435c7bb34SRandall Stewart 	uint32_t bbr_lost_at_state;	/* Temp counter debug lost value as we
67535c7bb34SRandall Stewart 					 * enter a state */
67635c7bb34SRandall Stewart 	/*******************************/
67735c7bb34SRandall Stewart 	/* Cache line 10 from bbr start */
67835c7bb34SRandall Stewart 	/*******************************/
67935c7bb34SRandall Stewart 	uint32_t rc_level_state_extra;
68035c7bb34SRandall Stewart 	uint32_t rc_red_cwnd_pe;
68135c7bb34SRandall Stewart 	const struct tcp_hwrate_limit_table *crte;
68235c7bb34SRandall Stewart 	uint64_t red_bw;
68335c7bb34SRandall Stewart 
68435c7bb34SRandall Stewart 	uint32_t rc_probertt_int;
68535c7bb34SRandall Stewart 	uint32_t rc_probertt_srttchktim;	/* Time we last did a srtt
68635c7bb34SRandall Stewart 						 * check  */
68735c7bb34SRandall Stewart 	uint32_t gain_epoch;	/* Epoch we should be out of gain */
68835c7bb34SRandall Stewart 	uint32_t rc_min_rto_ms;
68935c7bb34SRandall Stewart 
69035c7bb34SRandall Stewart 	uint32_t rc_reorder_fade;	/* Socket option value Lock(a) */
69135c7bb34SRandall Stewart 	uint32_t last_startup_measure;
69235c7bb34SRandall Stewart 
69335c7bb34SRandall Stewart 	int32_t bbr_hptsi_per_second;
69435c7bb34SRandall Stewart 	int32_t bbr_hptsi_segments_delay_tar;
69535c7bb34SRandall Stewart 
69635c7bb34SRandall Stewart 	int32_t bbr_hptsi_segments_max;
69735c7bb34SRandall Stewart 	uint32_t bbr_rttprobe_gain_val;
69835c7bb34SRandall Stewart 	/*******************************/
69935c7bb34SRandall Stewart 	/* Cache line 11 from bbr start */
70035c7bb34SRandall Stewart 	/*******************************/
70135c7bb34SRandall Stewart 	uint32_t cur_rtt_send_time;	/* Time we sent our rtt measured packet */
70235c7bb34SRandall Stewart 	uint32_t bbr_peer_tsratio;	/* Our calculated ts ratio to multply */
70335c7bb34SRandall Stewart 	uint32_t bbr_ts_check_tstmp;	/* When we filled it the TS that came on the ack */
70435c7bb34SRandall Stewart 	uint32_t bbr_ts_check_our_cts;	/* When we filled it the cts of the send */
70535c7bb34SRandall Stewart 	uint32_t rc_tlp_rxt_last_time;
70635c7bb34SRandall Stewart 	uint32_t bbr_smallest_srtt_state2;
70735c7bb34SRandall Stewart 	uint32_t bbr_hdwr_cnt_noset_snt;	/* count of hw pacing sends during delay */
70835c7bb34SRandall Stewart 	uint32_t startup_last_srtt;
70935c7bb34SRandall Stewart 	uint32_t rc_ack_hdwr_delay;
71035c7bb34SRandall Stewart 	uint32_t highest_hdwr_delay;		/* Largest delay we have seen from hardware */
71135c7bb34SRandall Stewart 	uint32_t non_gain_extra;
71235c7bb34SRandall Stewart 	uint32_t recovery_lr;			/* The sum of the loss rate from the pe's during recovery */
71335c7bb34SRandall Stewart 	uint32_t last_in_probertt;
71435c7bb34SRandall Stewart 	uint32_t flightsize_at_drain;		/* In draining what was the last marked flight size */
71535c7bb34SRandall Stewart 	uint32_t rc_pe_of_prtt;			/* PE we went into probe-rtt */
71635c7bb34SRandall Stewart 	uint32_t ts_in;				/* ts that went with the last rtt */
71735c7bb34SRandall Stewart 
71835c7bb34SRandall Stewart 	uint16_t rc_tlp_seg_send_cnt;	/* Number of times we have TLP sent
71935c7bb34SRandall Stewart 					 * rc_last_tlp_seq Lock(a) */
72035c7bb34SRandall Stewart 	uint16_t rc_drain_pg;
72135c7bb34SRandall Stewart 	uint32_t rc_num_maps_alloced;		/* num send map entries allocated */
72235c7bb34SRandall Stewart 	uint32_t rc_num_split_allocs;		/* num split map entries allocated */
72335c7bb34SRandall Stewart 	uint16_t rc_num_small_maps_alloced;	/* Number of sack blocks
72435c7bb34SRandall Stewart 						 * allocated */
72535c7bb34SRandall Stewart 	uint16_t bbr_hptsi_bytes_min;
72635c7bb34SRandall Stewart 
72735c7bb34SRandall Stewart 	uint16_t bbr_hptsi_segments_floor;
72835c7bb34SRandall Stewart 	uint16_t bbr_utter_max;
72935c7bb34SRandall Stewart 	uint16_t bbr_google_discount;
73035c7bb34SRandall Stewart 
73135c7bb34SRandall Stewart };
73235c7bb34SRandall Stewart 
73335c7bb34SRandall Stewart struct socket;
73435c7bb34SRandall Stewart struct tcp_bbr {
73535c7bb34SRandall Stewart 	/* First cache line 0x00 */
73635c7bb34SRandall Stewart 	int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
73735c7bb34SRandall Stewart 	    struct socket *, struct tcpcb *, struct tcpopt *,
738963fb2adSRandall Stewart 	    int32_t, int32_t, uint32_t, int32_t, int32_t, uint8_t);	/* Lock(a) */
73935c7bb34SRandall Stewart 	struct tcpcb *rc_tp;	/* The tcpcb Lock(a) */
74035c7bb34SRandall Stewart 	struct inpcb *rc_inp;	/* The inpcb Lock(a) */
74135c7bb34SRandall Stewart 	struct timeval rc_tv;
74235c7bb34SRandall Stewart 	uint32_t rc_pacer_started;  /* Time we started the pacer */
74335c7bb34SRandall Stewart 	uint16_t no_pacing_until:8, /* No pacing until N packet epochs */
74435c7bb34SRandall Stewart 		 ts_can_raise:1,/* TS b/w calculations can raise the bw higher */
74535c7bb34SRandall Stewart 		 skip_gain:1,	/* Skip the gain cycle (hardware pacing) */
74635c7bb34SRandall Stewart 		 gain_is_limited:1,	/* With hardware pacing we are limiting gain */
74735c7bb34SRandall Stewart 		 output_error_seen:1,
74835c7bb34SRandall Stewart 		 oerror_cnt:4,
74935c7bb34SRandall Stewart 		hw_pacing_set:1;	/* long enough has passed for us to start pacing */
75035c7bb34SRandall Stewart 	uint16_t xxx_r_ack_count;	/* During recovery count of ack's received
75135c7bb34SRandall Stewart 				 * that added data since output */
75235c7bb34SRandall Stewart 	uint16_t bbr_segs_rcvd;	/* In Segment count since we sent a ack */
75335c7bb34SRandall Stewart 
75435c7bb34SRandall Stewart 	uint8_t bbr_timer_src:4,	/* Used for debugging Lock(a) */
75535c7bb34SRandall Stewart 		bbr_use_rack_cheat:1,   /* Use the rack cheat */
75635c7bb34SRandall Stewart 		bbr_init_win_cheat:1,	/* Send full IW for TSO */
75735c7bb34SRandall Stewart 		bbr_attempt_hdwr_pace:1,/* Try to do hardware pacing */
75835c7bb34SRandall Stewart 		bbr_hdrw_pacing:1;	/* Hardware pacing is available */
75935c7bb34SRandall Stewart 	uint8_t bbr_hdw_pace_ena:1,	/* Does the connection allow hardware pacing to be attempted */
76035c7bb34SRandall Stewart 		bbr_prev_in_rec:1,	/* We were previously in recovery */
76135c7bb34SRandall Stewart 		pkt_conservation:1,
76235c7bb34SRandall Stewart 		use_policer_detection:1,
76335c7bb34SRandall Stewart 		xxx_bbr_hdw_pace_idx:4;	/* If hardware pacing is on, index to slot in pace tbl */
76435c7bb34SRandall Stewart 	uint16_t r_wanted_output:1,
76535c7bb34SRandall Stewart 		 rtt_valid:1,
76635c7bb34SRandall Stewart 		 rc_timer_first:1,
76735c7bb34SRandall Stewart 		 rc_output_starts_timer:1,
76835c7bb34SRandall Stewart 		 rc_resends_use_tso:1,
76935c7bb34SRandall Stewart 		 rc_all_timers_stopped:1,
77035c7bb34SRandall Stewart 		 rc_loss_exit:1,
77135c7bb34SRandall Stewart 		 rc_ack_was_delayed:1,
77235c7bb34SRandall Stewart 		 rc_lt_is_sampling:1,
77335c7bb34SRandall Stewart 		 rc_filled_pipe:1,
77435c7bb34SRandall Stewart 		 rc_tlp_new_data:1,
77535c7bb34SRandall Stewart 		 rc_hit_state_1:1,
77635c7bb34SRandall Stewart 		 rc_ts_valid:1,
77735c7bb34SRandall Stewart 		 rc_prtt_set_ts:1,
77835c7bb34SRandall Stewart 		 rc_is_pkt_epoch_now:1,
77935c7bb34SRandall Stewart 		 rc_has_collapsed:1;
78035c7bb34SRandall Stewart 
78135c7bb34SRandall Stewart 	uint8_t r_state:4,	/* Current bbr state Lock(a) */
78235c7bb34SRandall Stewart 	        r_agg_early_set:1,	/* Did we get called early */
78335c7bb34SRandall Stewart 		r_init_rtt:1,
78435c7bb34SRandall Stewart 		r_use_policer:1,	/* For google mode only */
78535c7bb34SRandall Stewart 		r_recovery_bw:1;
78635c7bb34SRandall Stewart 	uint8_t r_timer_override:1,	/* pacer override Lock(a)  0/1 */
78735c7bb34SRandall Stewart 	        rc_in_persist:1,
78835c7bb34SRandall Stewart 		rc_lt_use_bw:1,
78935c7bb34SRandall Stewart 		rc_allow_data_af_clo:1,
79035c7bb34SRandall Stewart 		rc_tlp_rtx_out:1,	/* A TLP is in flight  */
79135c7bb34SRandall Stewart 	        rc_tlp_in_progress:1,	/* a TLP timer is running needed? */
79235c7bb34SRandall Stewart 	        rc_use_idle_restart:1;   /* Do we restart fast after idle (persist or applim) */
79335c7bb34SRandall Stewart 	uint8_t rc_bbr_state:3,	/* What is the major BBR state */
79435c7bb34SRandall Stewart 	        rc_bbr_substate:3,	/* For probeBW state */
79535c7bb34SRandall Stewart 	        r_is_v6:1,
79635c7bb34SRandall Stewart 		rc_past_init_win:1;
79735c7bb34SRandall Stewart 	uint8_t rc_last_options;
79835c7bb34SRandall Stewart 	uint8_t rc_tlp_threshold;	/* Socket option value Lock(a) */
79935c7bb34SRandall Stewart 	uint8_t rc_max_rto_sec;
80035c7bb34SRandall Stewart 	uint8_t rc_cwnd_limited:1,	/* We are cwnd limited */
80135c7bb34SRandall Stewart 		rc_tmr_stopped:7;	/* What timers have been stopped  */
80235c7bb34SRandall Stewart 	uint8_t rc_use_google:1,
80335c7bb34SRandall Stewart 		rc_use_ts_limit:1,
80435c7bb34SRandall Stewart 		rc_ts_data_set:1,	/* We have filled a set point to determine */
80535c7bb34SRandall Stewart 		rc_ts_clock_set:1, 	/* We have determined the ts type */
80635c7bb34SRandall Stewart 		rc_ts_cant_be_used:1,	/* We determined we can't use ts values */
80735c7bb34SRandall Stewart 		rc_ack_is_cumack:1,
80835c7bb34SRandall Stewart 		rc_no_pacing:1,
80935c7bb34SRandall Stewart 		alloc_limit_reported:1;
81035c7bb34SRandall Stewart 	uint8_t rc_init_win;
81135c7bb34SRandall Stewart 	/* Cache line 2 0x40 */
81235c7bb34SRandall Stewart 	struct bbr_control r_ctl;
81335c7bb34SRandall Stewart #ifdef _KERNEL
81435c7bb34SRandall Stewart }       __aligned(CACHE_LINE_SIZE);
81535c7bb34SRandall Stewart #else
81635c7bb34SRandall Stewart };
81735c7bb34SRandall Stewart #endif
81835c7bb34SRandall Stewart 
81935c7bb34SRandall Stewart #endif
820